From d00443f4b0ea86a0f0730860f4f853e55a2c1036 Mon Sep 17 00:00:00 2001 From: Alex Date: Sun, 5 Apr 2026 17:53:15 +0300 Subject: [PATCH 1/2] service_levels: mark v2 migration complete on empty legacy table During raft-topology upgrade in 2026.1, service_level_controller::migrate_to_v2() returns early when system_distributed.service_levels is empty. This skips the service_level_version = 2 write, so the cluster is never marked as upgraded to service levels v2 even though there is no data to migrate. Subsequent upgrades may then fail the startup check which requires service_level_version == 2. Remove the early return and let the migration commit the version marker even when there are no legacy service levels rows to copy. Fixes: https://scylladb.atlassian.net/browse/SCYLLADB-1198 backport: only needed in 2026.1 because its the critical upgrade before 2026.2,3,4 --- service/qos/service_level_controller.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/service/qos/service_level_controller.cc b/service/qos/service_level_controller.cc index c3b33a27ad..13c42ecc4d 100644 --- a/service/qos/service_level_controller.cc +++ b/service/qos/service_level_controller.cc @@ -948,10 +948,6 @@ future<> service_level_controller::migrate_to_v2(size_t nodes_count, db::system_ qs, {}, cql3::query_processor::cache_internal::no); - if (rows->empty()) { - co_return; - } - auto col_names = schema->all_columns() | std::views::transform([] (const auto& col) {return col.name_as_cql_string(); }) | std::ranges::to>(); auto col_names_str = fmt::to_string(fmt::join(col_names, ", ")); From faba13d2b7486bffd7c1313f953d6febedc3d0a3 Mon Sep 17 00:00:00 2001 From: Alex Date: Sun, 5 Apr 2026 19:46:15 +0300 Subject: [PATCH 2/2] test/auth_cluster: cover empty legacy table in service level upgrade Add a cluster test that upgrades to raft topology with an empty legacy `system_distributed.service_levels` table and verifies that the migration still marks `service_level_version` as `2`. --- .../auth_cluster/test_raft_service_levels.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/test/cluster/auth_cluster/test_raft_service_levels.py b/test/cluster/auth_cluster/test_raft_service_levels.py index ae837c42f0..53f014a770 100644 --- a/test/cluster/auth_cluster/test_raft_service_levels.py +++ b/test/cluster/auth_cluster/test_raft_service_levels.py @@ -114,6 +114,27 @@ async def test_service_levels_upgrade(request, manager: ManagerClient, build_mod result_with_sl_v2 = await cql.run_async(f"SELECT service_level FROM system.service_levels_v2") assert set([sl.service_level for sl in result_with_sl_v2]) == set(sls + [DRIVER_SL_NAME] + [sl_v2]) +@pytest.mark.asyncio +async def test_service_levels_upgrade_with_empty_legacy_table(manager: ManagerClient): + cfg = {**auth_config, "force_gossip_topology_changes": True, "tablets_mode_for_new_keyspaces": "disabled"} + + servers = [await manager.server_add(config=cfg)] + cfg.pop("force_gossip_topology_changes") + servers += [await manager.server_add(config=cfg) for _ in range(2)] + + cql = manager.get_cql() + assert cql + hosts = await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60) + + rows = await cql.run_async("SELECT service_level FROM system_distributed.service_levels") + assert list(rows) == [] + + await manager.api.upgrade_to_raft_topology(hosts[0].address) + await asyncio.gather(*(wait_until_topology_upgrade_finishes(manager, h.address, time.time() + 60) for h in hosts)) + + sl_version = await cql.run_async("SELECT value FROM system.scylla_local WHERE key = 'service_level_version'") + assert sl_version[0].value == "2" + @pytest.mark.asyncio async def test_service_levels_work_during_recovery(manager: ManagerClient): # FIXME: move this test to the Raft-based recovery procedure or remove it if unneeded.