test: bootstrap tombstone gc repair cluster sequentially

Avoid concurrent topology changes in the tombstone GC repair setup, where debug-mode nodes running hinted handoff and materialized view startup work can time out while applying Raft entries before the test starts.

Keep the sequential path opt-in so unrelated repair tests still exercise concurrent bootstrap behavior.

Closes scylladb/scylladb#29829
This commit is contained in:
Piotr Smaron
2026-05-11 12:29:16 +02:00
committed by Botond Dénes
parent 51c35c05e2
commit 0fcae72530
2 changed files with 13 additions and 4 deletions

View File

@@ -1200,9 +1200,11 @@ async def _setup_tombstone_gc_cluster(manager, *, tablets=2, extra_cmdline=None)
cmdline = ['--logger-log-level', 'repair=debug']
if extra_cmdline:
cmdline += extra_cmdline
# These tests enable hinted handoff and materialized views, which make debug-mode
# concurrent bootstrap occasionally exceed the topology timeout before the test starts.
servers, cql, hosts, ks, table_id = await create_table_insert_data_for_repair(
manager, nr_keys=0, cmdline=cmdline, tablets=tablets,
disable_flush_cache_time=True)
disable_flush_cache_time=True, sequential_server_add=True)
# Lower propagation_delay to 0 so gc_before = repair_time, making tombstones
# GC-eligible immediately after a successful repair rather than 1h later.
await cql.run_async(

View File

@@ -64,7 +64,9 @@ async def load_tablet_repair_task_infos(cql, host, table_id):
return repair_task_infos
async def create_table_insert_data_for_repair(manager, rf = 3 , tablets = 8, fast_stats_refresh = True, nr_keys = 256, disable_flush_cache_time = False, cmdline = None) -> (list[ServerInfo], CassandraSession, list[Host], str, str):
async def create_table_insert_data_for_repair(manager, rf=3, tablets=8, fast_stats_refresh=True, nr_keys=256,
disable_flush_cache_time=False, cmdline=None,
sequential_server_add=False) -> tuple[list[ServerInfo], CassandraSession, list[Host], str, str]:
assert rf <= 3, "A keyspace with RF > 3 will be RF-rack-invalid if there are fewer racks than the RF"
if fast_stats_refresh:
@@ -73,8 +75,13 @@ async def create_table_insert_data_for_repair(manager, rf = 3 , tablets = 8, fas
config = {}
if disable_flush_cache_time:
config.update({'repair_hints_batchlog_flush_cache_time_in_ms': 0})
servers = await manager.servers_add(3, config=config, cmdline=cmdline,
property_file=[{"dc": "dc1", "rack": f"r{i % rf}"} for i in range(rf)])
property_files = [{"dc": "dc1", "rack": f"r{i % rf}"} for i in range(3)]
if sequential_server_add:
servers = []
for property_file in property_files:
servers.append(await manager.server_add(config=config, cmdline=cmdline, property_file=property_file))
else:
servers = await manager.servers_add(len(property_files), config=config, cmdline=cmdline, property_file=property_files)
cql = manager.get_cql()
ks = await create_new_test_keyspace(cql, "WITH replication = {{'class': 'NetworkTopologyStrategy', "
"'replication_factor': {}}} AND tablets = {{'initial': {}}};".format(rf, tablets))