Compare commits

..

2 Commits

Author SHA1 Message Date
Alex
faba13d2b7 test/auth_cluster: cover empty legacy table in service level upgrade
Add a cluster test that upgrades to raft topology with an empty legacy
`system_distributed.service_levels` table and verifies that the
migration still marks `service_level_version` as `2`.
2026-04-05 19:46:15 +03:00
Alex
d00443f4b0 service_levels: mark v2 migration complete on empty legacy table
During raft-topology upgrade in 2026.1, service_level_controller::migrate_to_v2()
returns early when system_distributed.service_levels is empty.
This skips the service_level_version = 2 write, so the cluster is never marked
as upgraded to service levels v2 even though there is no data to migrate.
Subsequent upgrades may then fail the startup check which requires
service_level_version == 2.
Remove the early return and let the migration commit the version marker even
when there are no legacy service levels rows to copy.

Fixes: https://scylladb.atlassian.net/browse/SCYLLADB-1198

backport: only needed in 2026.1 because its the critical upgrade before 2026.2,3,4
2026-04-05 18:00:12 +03:00
2 changed files with 21 additions and 4 deletions

View File

@@ -948,10 +948,6 @@ future<> service_level_controller::migrate_to_v2(size_t nodes_count, db::system_
qs,
{},
cql3::query_processor::cache_internal::no);
if (rows->empty()) {
co_return;
}
auto col_names = schema->all_columns() | std::views::transform([] (const auto& col) {return col.name_as_cql_string(); }) | std::ranges::to<std::vector<sstring>>();
auto col_names_str = fmt::to_string(fmt::join(col_names, ", "));

View File

@@ -114,6 +114,27 @@ async def test_service_levels_upgrade(request, manager: ManagerClient, build_mod
result_with_sl_v2 = await cql.run_async(f"SELECT service_level FROM system.service_levels_v2")
assert set([sl.service_level for sl in result_with_sl_v2]) == set(sls + [DRIVER_SL_NAME] + [sl_v2])
@pytest.mark.asyncio
async def test_service_levels_upgrade_with_empty_legacy_table(manager: ManagerClient):
cfg = {**auth_config, "force_gossip_topology_changes": True, "tablets_mode_for_new_keyspaces": "disabled"}
servers = [await manager.server_add(config=cfg)]
cfg.pop("force_gossip_topology_changes")
servers += [await manager.server_add(config=cfg) for _ in range(2)]
cql = manager.get_cql()
assert cql
hosts = await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60)
rows = await cql.run_async("SELECT service_level FROM system_distributed.service_levels")
assert list(rows) == []
await manager.api.upgrade_to_raft_topology(hosts[0].address)
await asyncio.gather(*(wait_until_topology_upgrade_finishes(manager, h.address, time.time() + 60) for h in hosts))
sl_version = await cql.run_async("SELECT value FROM system.scylla_local WHERE key = 'service_level_version'")
assert sl_version[0].value == "2"
@pytest.mark.asyncio
async def test_service_levels_work_during_recovery(manager: ManagerClient):
# FIXME: move this test to the Raft-based recovery procedure or remove it if unneeded.