reduce test_backup.py and test_refresh.py datasets

backup and restore tests. This made the testing times explode
with both cluster/object_store/test_backup.py and
cluster/test_refresh.py taking more than an hour each to complete
under test.py and around 14min under pytest directly.
This was painful especially in CI because it runs tests under test.py which
suffers from the issue of not being able to run test cases from within
the same file in parallel (a fix is attempted in 27618).

This patch reduces the dataset of these tests to the minimum and
gets rid of one of the tested topology as it was redundant.
The test times are reduced to 2min under pytest and 14 mins under
test.py.

Signed-off-by: Robert Bindar <robert.bindar@scylladb.com>

Closes scylladb/scylladb#28280
This commit is contained in:
Robert Bindar
2026-01-20 19:15:18 +02:00
committed by Nadav Har'El
parent 8962093d90
commit ea8a661119
2 changed files with 11 additions and 11 deletions

View File

@@ -742,7 +742,7 @@ async def check_mutation_replicas(cql, manager, servers, keys, topology, logger,
for pk, frags in node_frags.items():
mutations[pk].append(frags)
for k in random.sample(keys, 17):
for k in random.sample(keys, 10):
if not str(k) in mutations:
logger.info(f'Mutations: {mutations}')
assert False, f"Key '{k}' not found in mutations. {topology=} {scope=} {primary_replica_only=}"
@@ -771,7 +771,6 @@ def create_schema(ks, cf, min_tablet_count=None):
(topo(rf = 3, nodes = 5, racks = 1, dcs = 1), False),
(topo(rf = 1, nodes = 4, racks = 2, dcs = 1), True),
(topo(rf = 3, nodes = 6, racks = 2, dcs = 1), False),
(topo(rf = 3, nodes = 6, racks = 3, dcs = 1), True),
(topo(rf = 2, nodes = 8, racks = 4, dcs = 2), True)
])
@@ -788,11 +787,12 @@ async def test_restore_with_streaming_scopes(build_mode: str, manager: ManagerCl
ks = 'ks'
cf = 'cf'
num_keys = 1000 if build_mode == 'debug' else 10000
num_keys = 10
scopes = ['rack', 'dc'] if build_mode == 'debug' else ['all', 'dc', 'rack', 'node']
restored_min_tablet_counts = [512] if build_mode == 'debug' else [256, 512, 1024]
restored_min_tablet_counts = [5] if build_mode == 'debug' else [2, 5, 10]
schema, keys, replication_opts = await create_dataset(manager, ks, cf, topology, logger, num_keys=num_keys, min_tablet_count=512)
schema, keys, replication_opts = await create_dataset(manager, ks, cf, topology, logger, num_keys=num_keys, min_tablet_count=5)
# validate replicas assertions hold on fresh dataset
await check_mutation_replicas(cql, manager, servers, keys, topology, logger, ks, cf, scope=None, primary_replica_only=False, expected_replicas = None)
@@ -809,10 +809,10 @@ async def test_restore_with_streaming_scopes(build_mode: str, manager: ManagerCl
if scope == 'rack' and topology.rf != topology.racks:
logger.info(f'Skipping scope={scope} test since rf={topology.rf} != racks={topology.racks} and it cannot be supported with numeric replication_factor')
continue
pros = [False] if scope == 'node' else [False, True]
pros = [False] if scope == 'node' else [True, False]
for pro in pros:
for restored_min_tablet_count in restored_min_tablet_counts:
logger.info(f'Re-initialize keyspace with min_tablet_count={restored_min_tablet_count} from min_tablet_count=512')
logger.info(f'Re-initialize keyspace with min_tablet_count={restored_min_tablet_count} from min_tablet_count=5')
cql.execute(f'DROP KEYSPACE {ks}')
cql.execute((f"CREATE KEYSPACE {ks} WITH REPLICATION = {replication_opts};"))
schema = create_schema(ks, cf, restored_min_tablet_count)

View File

@@ -33,10 +33,9 @@ logger = logging.getLogger(__name__)
(topo(rf = 3, nodes = 5, racks = 1, dcs = 1), False),
(topo(rf = 1, nodes = 4, racks = 2, dcs = 1), True),
(topo(rf = 3, nodes = 6, racks = 2, dcs = 1), False),
(topo(rf = 3, nodes = 6, racks = 3, dcs = 1), True),
(topo(rf = 2, nodes = 8, racks = 4, dcs = 2), True)
])
async def test_refresh_with_streaming_scopes(manager: ManagerClient, topology_rf_validity):
async def test_refresh_with_streaming_scopes(build_mode: str, manager: ManagerClient, topology_rf_validity):
'''
Check that refreshing a cluster with stream scopes works
@@ -64,7 +63,7 @@ async def test_refresh_with_streaming_scopes(manager: ManagerClient, topology_rf
ks = 'ks'
cf = 'cf'
_, keys, _ = await create_dataset(manager, ks, cf, topology, logger, num_keys=10000, min_tablet_count=512)
_, keys, _ = await create_dataset(manager, ks, cf, topology, logger, num_keys=10, min_tablet_count=5)
# validate replicas assertions hold on fresh dataset
await check_mutation_replicas(cql, manager, servers, keys, topology, logger, ks, cf, scope=None, primary_replica_only=False, expected_replicas = None)
@@ -110,7 +109,8 @@ async def test_refresh_with_streaming_scopes(manager: ManagerClient, topology_rf
logger.info(f'Refresh {s.ip_addr} with {toc_names}, scope={scope}')
await manager.api.load_new_sstables(s.ip_addr, ks, cf, scope=scope, primary_replica=primary_replica_only, load_and_stream=True)
for scope in ['all', 'dc', 'rack', 'node']:
scopes = ['rack', 'dc'] if build_mode == 'debug' else ['all', 'dc', 'rack', 'node']
for scope in scopes:
# We can support rack-aware restore with rack lists, if we restore the rack-list per dc as it was at backup time.
# Otherwise, with numeric replication_factor we'd pick arbitrary subset of the racks when the keyspace
# is initially created and an arbitrary subset or the rack at restore time.