test/cluster/test_read_repair.py: improve trace logging test (again)

The test test_read_repair_with_trace_logging wants to test read repair
with trace logging. Turns out that node restart + trace-level logging
+ debug mode is too much and even with 1 minute timeout, the read repair
times out sometimes.
Refactor the test to use injection point instead of restart. To make
sure the test still tests what it supposed to test, use tracing to
assert that read repair did indeed happen.
This commit is contained in:
Botond Dénes
2025-05-02 01:57:14 -04:00
parent 51025de755
commit 29eedaa0e5

View File

@@ -15,9 +15,10 @@ from cassandra.cluster import ConsistencyLevel, Session # type: ignore
from cassandra.query import SimpleStatement # type: ignore
from cassandra.pool import Host # type: ignore
from test.pylib.util import wait_for_cql_and_get_hosts
from test.pylib.util import wait_for_cql_and_get_hosts, execute_with_tracing
from test.pylib.internal_types import ServerInfo
from test.pylib.manager_client import ManagerClient
from test.cluster.conftest import skip_mode
from test.cluster.util import new_test_keyspace
@@ -309,13 +310,13 @@ async def test_incremental_read_repair(data_class: DataClass, manager: ManagerCl
@pytest.mark.asyncio
@skip_mode('release', 'error injections are not supported in release mode')
async def test_read_repair_with_trace_logging(request, manager):
logger.info("Creating a new cluster")
cmdline = ["--hinted-handoff-enabled", "0", "--logger-log-level", "mutation_data=trace"]
cmdline = ["--hinted-handoff-enabled", "0", "--logger-log-level", "mutation_data=trace:debug_error_injection=trace"]
config = {"read_request_timeout_in_ms": 60000}
for i in range(2):
await manager.server_add(cmdline=cmdline, config=config)
[node1, node2] = await manager.servers_add(2, cmdline=cmdline, config=config)
cql = manager.get_cql()
srvs = await manager.running_servers()
@@ -326,13 +327,15 @@ async def test_read_repair_with_trace_logging(request, manager):
await cql.run_async(f"INSERT INTO {ks}.t (pk, c) VALUES (0, 0)")
await manager.server_stop(srvs[0].server_id)
prepared = cql.prepare(f"INSERT INTO {ks}.t (pk, c) VALUES (0, 1)")
prepared.consistency_level = ConsistencyLevel.ONE
await cql.run_async(prepared)
await manager.api.enable_injection(node1.ip_addr, "database_apply", one_shot=True)
await cql.run_async(SimpleStatement(f"INSERT INTO {ks}.t (pk, c) VALUES (0, 1)", consistency_level = ConsistencyLevel.ONE))
await manager.server_start(srvs[0].server_id)
tracing = execute_with_tracing(cql, SimpleStatement(f"SELECT * FROM {ks}.t WHERE pk = 0", consistency_level = ConsistencyLevel.ALL), log = True)
prepared = cql.prepare(f"SELECT * FROM {ks}.t WHERE pk = 0")
prepared.consistency_level = ConsistencyLevel.ALL
await cql.run_async(prepared)
assert len(tracing) == 1 # 1 page
found_read_repair = False
for event in tracing[0]:
found_read_repair |= "digest mismatch, starting read repair" == event.description
assert found_read_repair