Merge '[Backport 2025.1] split: run set_split_mode() on all storage groups during all_storage_groups_split()' from Scylladb[bot]

`tablet_storage_group_manager::all_storage_groups_split()` calls `set_split_mode()` for each of its storage groups to create split ready compaction groups. It does this by iterating through storage groups using `std::ranges::all_of()` which is not guaranteed to iterate through the entire range, and will stop iterating on the first occurrence of the predicate (`set_split_mode()`) returning false. `set_split_mode()` creates the split compaction groups and returns false if the storage group's main compaction group or merging groups are not empty. This means that in cases where the tablet storage group manager has non-empty storage groups, we could have a situation where split compaction groups are not created for all storage groups.

The missing split compaction groups are later created in `tablet_storage_group_manager::split_all_storage_groups()` which also calls `set_split_mode()`, and that is the reason why split completes successfully. The problem is that
`tablet_storage_group_manager::all_storage_groups_split()` runs under a group0 guard, but
`tablet_storage_group_manager::split_all_storage_groups()` does not. This can cause problems with operations which should exclude with compaction group creation. i.e. DROP TABLE/DROP KEYSPACE

Fixes #22431

This is a bugfix and should be back ported to versions with tablets: 6.1 6.2 and 2025.1

- (cherry picked from commit 24e8d2a55c)

- (cherry picked from commit 8bff7786a8)

Parent PR: #22330

Closes scylladb/scylladb#22560

* github.com:scylladb/scylladb:
  test: add reproducer and test for fix to split ready CG creation
  table: run set_split_mode() on all storage groups during all_storage_groups_split()
This commit is contained in:
Botond Dénes
2025-02-13 09:36:23 +02:00
3 changed files with 67 additions and 2 deletions

View File

@@ -2561,6 +2561,12 @@ future<> database::truncate_table_on_all_shards(sharded<database>& sharded_db, s
});
});
co_await utils::get_local_injector().inject("truncate_compaction_disabled_wait", [] (auto& handler) -> future<> {
dblog.info("truncate_compaction_disabled_wait: wait");
co_await handler.wait_for_message(std::chrono::steady_clock::now() + std::chrono::minutes{5});
dblog.info("truncate_compaction_disabled_wait: done");
}, false);
const auto should_flush = with_snapshot && cf.can_flush();
dblog.trace("{} {}.{} and views on all shards", should_flush ? "Flushing" : "Clearing", s->ks_name(), s->cf_name());
std::function<future<>(replica::table&)> flush_or_clear = should_flush ?

View File

@@ -1029,8 +1029,10 @@ bool tablet_storage_group_manager::all_storage_groups_split() {
return true;
}
auto split_ready = std::ranges::all_of(_storage_groups | std::views::values,
std::mem_fn(&storage_group::set_split_mode));
bool split_ready = true;
for (const storage_group_ptr& sg : _storage_groups | std::views::values) {
split_ready &= sg->set_split_mode();
}
// The table replica will say to coordinator that its split status is ready by
// mirroring the sequence number from tablet metadata into its local state,
@@ -1058,6 +1060,12 @@ sstables::compaction_type_options::split tablet_storage_group_manager::split_com
future<> tablet_storage_group_manager::split_all_storage_groups(tasks::task_info tablet_split_task_info) {
sstables::compaction_type_options::split opt = split_compaction_options();
co_await utils::get_local_injector().inject("split_storage_groups_wait", [] (auto& handler) -> future<> {
dblog.info("split_storage_groups_wait: waiting");
co_await handler.wait_for_message(std::chrono::steady_clock::now() + std::chrono::minutes{5});
dblog.info("split_storage_groups_wait: done");
}, false);
co_await for_each_storage_group_gently([opt, tablet_split_task_info] (storage_group& storage_group) {
return storage_group.split(opt, tablet_split_task_info);
});

View File

@@ -754,3 +754,54 @@ async def test_replace_with_no_normal_token_owners_in_dc(manager: ManagerClient,
assert len(rows) == len(keys)
for r in rows:
assert r.c == r.pk
@pytest.mark.asyncio
@skip_mode('release', 'error injections are not supported in release mode')
async def test_drop_keyspace_while_split(manager: ManagerClient):
# Reproducer for: https://github.com/scylladb/scylladb/issues/22431
# This tests if the split ready compaction groups are correctly created
# on a shard with several storage groups for the same table
logger.info("Bootstrapping cluster")
cmdline = [ '--target-tablet-size-in-bytes', '8192',
'--smp', '2' ]
config = { 'error_injections_at_startup': ['short_tablet_stats_refresh_interval'] }
servers = [await manager.server_add(config=config, cmdline=cmdline)]
s0_log = await manager.server_open_log(servers[0].server_id)
cql = manager.get_cql()
await wait_for_cql_and_get_hosts(cql, [servers[0]], time.time() + 60)
await manager.api.disable_tablet_balancing(servers[0].ip_addr)
# create a table so that it has at least 2 tablets (and storage groups) per shard
await cql.run_async("CREATE KEYSPACE test WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1} AND tablets = {'initial': 4};")
await cql.run_async("CREATE TABLE test.test (pk int PRIMARY KEY, c int);")
await manager.api.disable_autocompaction(servers[0].ip_addr, 'test')
keys = range(2048)
await asyncio.gather(*[cql.run_async(f'INSERT INTO test.test (pk, c) VALUES ({k}, {k});') for k in keys])
await manager.api.flush_keyspace(servers[0].ip_addr, 'test')
await manager.api.enable_injection(servers[0].ip_addr, 'truncate_compaction_disabled_wait', one_shot=False)
await manager.api.enable_injection(servers[0].ip_addr, 'split_storage_groups_wait', one_shot=False)
# enable the load balancer which should emmit a tablet split
await manager.api.enable_tablet_balancing(servers[0].ip_addr)
# wait for compaction groups to be created and split to begin
await s0_log.wait_for('split_storage_groups_wait: wait')
# start a DROP and wait for it to disable compaction
drop_ks_task = cql.run_async('DROP KEYSPACE test;')
await s0_log.wait_for('truncate_compaction_disabled_wait: wait')
# release split
await manager.api.message_injection(servers[0].ip_addr, "split_storage_groups_wait")
# release drop and wait for it to complete
await manager.api.message_injection(servers[0].ip_addr, "truncate_compaction_disabled_wait")
await drop_ks_task