Merge 'test: wait for read_barrier in wait_until_driver_service_level_created' from Andrzej Jackowski

Previously, `wait_until_driver_service_level_created` only waited for
the `driver` service level to appear in the output of
`LIST ALL SERVICE_LEVELS`. However, the fact that one node lists
`sl:driver` does not necessarily mean that all other nodes can see
it yet. This caused sporadic test failures, especially in DEBUG builds.

To prevent these failures, this change adds an extra wait for
a `raft/read_barrier` after the `driver` service level first appears.
This ensures the service level is globally visible across the cluster.

Fixes: https://github.com/scylladb/scylladb/issues/27019

Na backport - test fix for `sl:driver` tests, and this that is only available on `master`

Closes scylladb/scylladb#27076

* github.com:scylladb/scylladb:
  test: wait for read_barrier in wait_until_driver_service_level_created
  test: use ManagerClient in wait_until_driver_service_level_created
This commit is contained in:
Pavel Emelyanov
2025-11-28 16:47:29 +03:00
2 changed files with 6 additions and 3 deletions

View File

@@ -102,7 +102,7 @@ async def test_service_levels_upgrade(request, manager: ManagerClient, build_mod
logging.info("Waiting until upgrade finishes")
await asyncio.gather(*(wait_until_topology_upgrade_finishes(manager, h.address, time.time() + 60) for h in hosts))
await wait_until_driver_service_level_created(cql, time.time() + 60)
await wait_until_driver_service_level_created(manager, time.time() + 60)
result_v2 = await cql.run_async("SELECT service_level FROM system.service_levels_v2")
assert set([sl.service_level for sl in result_v2]) == set(sls + [DRIVER_SL_NAME])
@@ -174,7 +174,7 @@ async def test_service_levels_work_during_recovery(manager: ManagerClient):
await manager.servers_see_each_other(servers)
await manager.api.upgrade_to_raft_topology(hosts[0].address)
await asyncio.gather(*(wait_until_topology_upgrade_finishes(manager, h.address, time.time() + 60) for h in hosts))
await wait_until_driver_service_level_created(cql, time.time() + 60)
await wait_until_driver_service_level_created(manager, time.time() + 60)
logging.info("Validating service levels works in v2 mode after leaving recovery")
new_sl = "sl" + unique_name()

View File

@@ -203,11 +203,14 @@ async def wait_until_topology_upgrade_finishes(manager: ManagerClient, ip_addr:
return status == "done" or None
await wait_for(check, deadline=deadline, period=1.0)
async def wait_until_driver_service_level_created(cql: Session, deadline: float):
async def wait_until_driver_service_level_created(manager: ManagerClient, deadline: float):
cql = manager.get_cql()
async def check():
service_levels = await cql.run_async("LIST ALL SERVICE_LEVELS")
return ("driver" in [sl.service_level for sl in service_levels]) or None
await wait_for(check, deadline=deadline, period=1.0)
# sync driver service level on all nodes
await asyncio.gather(*(read_barrier(manager.api, s.ip_addr) for s in await manager.running_servers()))
async def delete_raft_topology_state(cql: Session, host: Host):
await cql.run_async("truncate table system.topology", host=host)