From 253fa9519f49c7d0b2fc9360538088cbb32bcb95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20D=C3=A9nes?= Date: Tue, 24 Mar 2026 12:13:20 +0200 Subject: [PATCH] test/encryption: wait for topology convergence after abrupt restart test_reboot uses a custom restart function that SIGKILLs and restarts nodes sequentially. After all nodes are back up, the test proceeded directly to reads after wait_for_cql_and_get_hosts(), which only confirms CQL reachability. While a node is restarted, other nodes might execute global token metadata barriers, which advance the topology fence version. The restarted node has to learn about the new version before it can send reads/writes to the other nodes. The test issues reads as soon as the CQL port is opened, which might happen before the last restarted node learns of the latest topology version. If this node acts as a coordinator for reads/write before this happens, these will fail as the other nodes will reject the ops with the outdated topology fence version. Fix this by replacing wait_for_cql_and_get_hosts() on the abrupt-restart path with the more robus get_ready_cql(), which makes sure servers see each other before refreshing the cql connection. This should ensure that nodes have exchanged gossip and converged on topology state before any reads are executed. The rolling_restart() path is unaffected as it handles this internally. Fixes: SCYLLADB-557 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Closes scylladb/scylladb#29211 (cherry picked from commit 854c374ebf2b6a309d786ae510108c2de1b618af) Closes scylladb/scylladb#29260 --- test/cluster/test_encryption.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/cluster/test_encryption.py b/test/cluster/test_encryption.py index fa69d524c8..49dd5e5d16 100644 --- a/test/cluster/test_encryption.py +++ b/test/cluster/test_encryption.py @@ -177,7 +177,7 @@ async def _smoke_test(manager: ManagerClient, key_provider: KeyProviderFactory, # restart the cluster if restart: await restart(manager, servers, cfs) - await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60) + cql, _ = await manager.get_ready_cql(servers) else: await manager.rolling_restart(servers) for table_name in cfs: