From d4c28ee3178a25941f363a34e45a08cdae28fbd3 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 6 Apr 2026 14:07:48 +0300 Subject: [PATCH] Merge 'service_levels: mark v2 migration complete on empty legacy table' from Alex Dathskovsky During raft-topology upgrade in 2026.1, service_level_controller::migrate_to_v2() returns early when system_distributed.service_levels is empty. This skips the service_level_version = 2 write, so the cluster is never marked as upgraded to service levels v2 even though there is no data to migrate. Subsequent upgrades may then fail the startup check which requires service_level_version == 2. Remove the early return and let the migration commit the version marker even when there are no legacy service levels rows to copy. Fixes: https://scylladb.atlassian.net/browse/SCYLLADB-1198 backport: should be backported to all versions that can be upgraded to 2026.2 Closes scylladb/scylladb#29333 * github.com:scylladb/scylladb: test/auth_cluster: cover empty legacy table in service level upgrade service_levels: mark v2 migration complete on empty legacy table (cherry picked from commit 95e422db482b3441f8bdba6563a58d5463fa5060) Closes scylladb/scylladb#29352 --- service/qos/service_level_controller.cc | 4 ---- .../auth_cluster/test_raft_service_levels.py | 21 +++++++++++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/service/qos/service_level_controller.cc b/service/qos/service_level_controller.cc index dc874b0142..33d1683bfd 100644 --- a/service/qos/service_level_controller.cc +++ b/service/qos/service_level_controller.cc @@ -880,10 +880,6 @@ future<> service_level_controller::migrate_to_v2(size_t nodes_count, db::system_ qs, {}, cql3::query_processor::cache_internal::no); - if (rows->empty()) { - co_return; - } - auto col_names = schema->all_columns() | std::views::transform([] (const auto& col) {return col.name_as_cql_string(); }) | std::ranges::to>(); auto col_names_str = fmt::to_string(fmt::join(col_names, ", ")); diff --git a/test/cluster/auth_cluster/test_raft_service_levels.py b/test/cluster/auth_cluster/test_raft_service_levels.py index 2e6efd8916..cfa55e451f 100644 --- a/test/cluster/auth_cluster/test_raft_service_levels.py +++ b/test/cluster/auth_cluster/test_raft_service_levels.py @@ -109,6 +109,27 @@ async def test_service_levels_upgrade(request, manager: ManagerClient, build_mod result_with_sl_v2 = await cql.run_async(f"SELECT service_level FROM system.service_levels_v2") assert set([sl.service_level for sl in result_with_sl_v2]) == set(sls + [sl_v2]) +@pytest.mark.asyncio +async def test_service_levels_upgrade_with_empty_legacy_table(manager: ManagerClient): + cfg = {**auth_config, "force_gossip_topology_changes": True, "tablets_mode_for_new_keyspaces": "disabled"} + + servers = [await manager.server_add(config=cfg)] + cfg.pop("force_gossip_topology_changes") + servers += [await manager.server_add(config=cfg) for _ in range(2)] + + cql = manager.get_cql() + assert cql + hosts = await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60) + + rows = await cql.run_async("SELECT service_level FROM system_distributed.service_levels") + assert list(rows) == [] + + await manager.api.upgrade_to_raft_topology(hosts[0].address) + await asyncio.gather(*(wait_until_topology_upgrade_finishes(manager, h.address, time.time() + 60) for h in hosts)) + + sl_version = await cql.run_async("SELECT value FROM system.scylla_local WHERE key = 'service_level_version'") + assert sl_version[0].value == "2" + @pytest.mark.asyncio async def test_service_levels_work_during_recovery(manager: ManagerClient): # FIXME: move this test to the Raft-based recovery procedure or remove it if unneeded.