storage_proxy/read_repair: Use partition_key instead of token key for mutation

diff calculation hashmap.

This update addresses an issue in the mutation diff calculation algorithm used during read repair.
Previously, the algorithm used `token` as the hashmap key. Since `token` is calculated basing on
the Murmur3 hash function, it could generate duplicate values for different partition keys, causing
corruption in the affected rows' values.

Fixes scylladb/scylladb#19101

(cherry picked from commit 39785c6f4e)
This commit is contained in:
Sergey Zolotukhin
2024-12-19 17:13:55 +01:00
parent 12ee41869a
commit b04b6aad9e
4 changed files with 14 additions and 16 deletions

View File

@@ -365,8 +365,7 @@ async def start_writes_to_cdc_table(cql: Session, concurrency: int = 3):
stream_to_timestamp = { stream: gen.time for gen in generations for stream in gen.streams}
# FIXME: Doesn't work with all_pages=True (https://github.com/scylladb/scylladb/issues/19101)
cdc_log = await cql.run_async(f"SELECT * FROM {ks_name}.tbl_scylla_cdc_log", all_pages=False)
cdc_log = await cql.run_async(f"SELECT * FROM {ks_name}.tbl_scylla_cdc_log", all_pages=True)
for log_entry in cdc_log:
assert log_entry.cdc_stream_id in stream_to_timestamp
timestamp = stream_to_timestamp[log_entry.cdc_stream_id]

View File

@@ -17,7 +17,6 @@ from test.pylib.util import wait_for_cql_and_get_hosts
logger = logging.getLogger(__name__)
@pytest.mark.xfail(reason="https://github.com/scylladb/scylladb/issues/19101")
@pytest.mark.asyncio
async def test_read_repair_with_conflicting_hash_keys(request: pytest.FixtureRequest, manager: ManagerClient) -> None:
"""