From 0ea5a1fc488bc7baa221804c8320fa1a328ff5bc Mon Sep 17 00:00:00 2001 From: Ferenc Szili Date: Wed, 15 Jan 2025 15:32:15 +0100 Subject: [PATCH 1/2] table: run set_split_mode() on all storage groups during all_storage_groups_split() tablet_storage_group_manager::all_storage_groups_split() calls set_split_mode() for each of its storage groups to create split ready compaction groups. It does this by iterating through storage groups using std::ranges::all_of() which is not guaranteed to iterate through the entire range, and will stop iterating on the first occurance of the predicate (set_split_mode()) returning false. set_split_mode() creates the split compaction groups and returns false if the storage group's main compaction group or merging groups are not empty. This means that in cases where the tablet storage group manager has non-empty storage groups, we could have a situation where split compaction groups are not created for all storage groups. The missing split compaction groups are later created in tablet_storage_group_manager::split_all_storage_groups() which also calls set_split_mode(), and that is the reason why split completes successfully. The problem is that tablet_storage_group_manager::all_storage_groups_split() runs under a group0 guard, and tablet_storage_group_manager::split_all_storage_groups() does not. This can cause problems with operations which should exclude with compaction group creation. i.e. DROP TABLE/DROP KEYSPACE (cherry picked from commit 24e8d2a55c72b8c2c2463fee525a57a406e58ea1) --- replica/table.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/replica/table.cc b/replica/table.cc index e106208526..b2b832ad0e 100644 --- a/replica/table.cc +++ b/replica/table.cc @@ -952,8 +952,10 @@ bool tablet_storage_group_manager::all_storage_groups_split() { return true; } - auto split_ready = std::ranges::all_of(_storage_groups | boost::adaptors::map_values, - std::mem_fn(&storage_group::set_split_mode)); + bool split_ready = true; + for (const storage_group_ptr& sg : _storage_groups | boost::adaptors::map_values) { + split_ready &= sg->set_split_mode(); + } // The table replica will say to coordinator that its split status is ready by // mirroring the sequence number from tablet metadata into its local state, From 5a74ded58219b2a703c9f6f9b13a13d8858c59df Mon Sep 17 00:00:00 2001 From: Ferenc Szili Date: Wed, 15 Jan 2025 16:23:07 +0100 Subject: [PATCH 2/2] test: add reproducer and test for fix to split ready CG creation This adds a reproducer for #22431 In cases where a tablet storage group manager had more than one storage group, it was possible to create compaction groups outside the group0 guard, which could create problems with operations which should exclude with compaction group creation. (cherry picked from commit 8bff7786a8041a430e707af65452e9211642e31f) --- replica/database.cc | 6 ++++ replica/table.cc | 6 ++++ test/topology_custom/test_tablets.py | 53 ++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+) diff --git a/replica/database.cc b/replica/database.cc index a6bd5f13e2..6a0836ac86 100644 --- a/replica/database.cc +++ b/replica/database.cc @@ -2452,6 +2452,12 @@ future<> database::truncate_table_on_all_shards(sharded& sharded_db, s }); }); + co_await utils::get_local_injector().inject("truncate_compaction_disabled_wait", [] (auto& handler) -> future<> { + dblog.info("truncate_compaction_disabled_wait: wait"); + co_await handler.wait_for_message(std::chrono::steady_clock::now() + std::chrono::minutes{5}); + dblog.info("truncate_compaction_disabled_wait: done"); + }, false); + const auto should_flush = with_snapshot && cf.can_flush(); dblog.trace("{} {}.{} and views on all shards", should_flush ? "Flushing" : "Clearing", s->ks_name(), s->cf_name()); std::function(replica::table&)> flush_or_clear = should_flush ? diff --git a/replica/table.cc b/replica/table.cc index b2b832ad0e..152795f189 100644 --- a/replica/table.cc +++ b/replica/table.cc @@ -983,6 +983,12 @@ sstables::compaction_type_options::split tablet_storage_group_manager::split_com future<> tablet_storage_group_manager::split_all_storage_groups() { sstables::compaction_type_options::split opt = split_compaction_options(); + co_await utils::get_local_injector().inject("split_storage_groups_wait", [] (auto& handler) -> future<> { + dblog.info("split_storage_groups_wait: waiting"); + co_await handler.wait_for_message(std::chrono::steady_clock::now() + std::chrono::minutes{5}); + dblog.info("split_storage_groups_wait: done"); + }, false); + co_await for_each_storage_group_gently([opt] (storage_group& storage_group) { return storage_group.split(opt); }); diff --git a/test/topology_custom/test_tablets.py b/test/topology_custom/test_tablets.py index ad25f8e29f..7673c71371 100644 --- a/test/topology_custom/test_tablets.py +++ b/test/topology_custom/test_tablets.py @@ -377,6 +377,59 @@ async def test_read_of_pending_replica_during_migration(manager: ManagerClient, assert len(list(rows)) == 1 +@pytest.mark.asyncio +@skip_mode('release', 'error injections are not supported in release mode') +async def test_drop_keyspace_while_split(manager: ManagerClient): + + # Reproducer for: https://github.com/scylladb/scylladb/issues/22431 + # This tests if the split ready compaction groups are correctly created + # on a shard with several storage groups for the same table + + logger.info("Bootstrapping cluster") + cmdline = [ '--target-tablet-size-in-bytes', '8192', + '--smp', '2' ] + config = { 'error_injections_at_startup': ['short_tablet_stats_refresh_interval'], + 'enable_tablets': True } + servers = [await manager.server_add(config=config, cmdline=cmdline)] + + s0_log = await manager.server_open_log(servers[0].server_id) + + cql = manager.get_cql() + await wait_for_cql_and_get_hosts(cql, [servers[0]], time.time() + 60) + + await manager.api.disable_tablet_balancing(servers[0].ip_addr) + + # create a table so that it has at least 2 tablets (and storage groups) per shard + await cql.run_async("CREATE KEYSPACE test WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1} AND tablets = {'initial': 4};") + await cql.run_async("CREATE TABLE test.test (pk int PRIMARY KEY, c int);") + + await manager.api.disable_autocompaction(servers[0].ip_addr, 'test') + + keys = range(2048) + await asyncio.gather(*[cql.run_async(f'INSERT INTO test.test (pk, c) VALUES ({k}, {k});') for k in keys]) + await manager.api.flush_keyspace(servers[0].ip_addr, 'test') + + await manager.api.enable_injection(servers[0].ip_addr, 'truncate_compaction_disabled_wait', one_shot=False) + await manager.api.enable_injection(servers[0].ip_addr, 'split_storage_groups_wait', one_shot=False) + + # enable the load balancer which should emmit a tablet split + await manager.api.enable_tablet_balancing(servers[0].ip_addr) + + # wait for compaction groups to be created and split to begin + await s0_log.wait_for('split_storage_groups_wait: wait') + + # start a DROP and wait for it to disable compaction + drop_ks_task = cql.run_async('DROP KEYSPACE test;') + await s0_log.wait_for('truncate_compaction_disabled_wait: wait') + + # release split + await manager.api.message_injection(servers[0].ip_addr, "split_storage_groups_wait") + + # release drop and wait for it to complete + await manager.api.message_injection(servers[0].ip_addr, "truncate_compaction_disabled_wait") + await drop_ks_task + + # This test checks that --enable-tablets option and the TABLETS parameters of the CQL CREATE KEYSPACE # statemement are mutually correct from the "the least surprising behavior" concept. See comments inside # the test code for more details.