From 46a712d6d41e60a0570d4ef6ee3080fc126cb95e Mon Sep 17 00:00:00 2001 From: Wojciech Mitros Date: Sun, 31 May 2026 09:59:59 +0200 Subject: [PATCH] test: get SC raft group leader info from raft group members To find the raft group leader of a group, a node must be a member of this group. In a few test cases in test_strong_consistency.py we try to get the leader info without knowing raft group members, so we may initially fail and we retry on another node. This is inefficient - trying to get the leader from a non-member ends in a timeout for the leader check, so the test case takes an additional 60s. In this patch we avoid this by checking raft group members (i.e. tablet replicas) before trying to get the leader, and then only get the leader from one of the replicas. On average, this should speed up the test_strong_consistency.py by 85s. Fixes: SCYLLADB-2266 Fixes: SCYLLADB-2268 Closes scylladb/scylladb#30165 --- test/cluster/test_strong_consistency.py | 55 +++++++++++-------------- 1 file changed, 25 insertions(+), 30 deletions(-) diff --git a/test/cluster/test_strong_consistency.py b/test/cluster/test_strong_consistency.py index c24c31f687..20d1964d1c 100644 --- a/test/cluster/test_strong_consistency.py +++ b/test/cluster/test_strong_consistency.py @@ -140,18 +140,17 @@ async def test_basic_write_read(manager: ManagerClient): logger.info("Select raft group id for the tablet") group_id = await get_table_raft_group_id(manager, ks, 'test') - logger.info(f"Get current leader for the group {group_id}") - try: - leader_host_id = await wait_for_leader(manager, servers[0], group_id) - except: - # We need to wait for leader on a replica, and first server might not be one - leader_host_id = await wait_for_leader(manager, servers[1], group_id) - leader_host = host_by_host_id(leader_host_id) - tablet_replicas = await get_tablet_replicas(manager, servers[0], ks, "test", 0) assert len(tablet_replicas) == 2 replica_host_ids = [replica[0] for replica in tablet_replicas] + logger.info(f"Get current leader for the group {group_id}") + if host_ids[0] in replica_host_ids: + leader_host_id = await wait_for_leader(manager, servers[0], group_id) + else: + leader_host_id = await wait_for_leader(manager, servers[1], group_id) + leader_host = host_by_host_id(leader_host_id) + logger.info(f"Get the non-leader replica for the group {group_id}") non_leader_replica_host_id = [host_id for host_id in replica_host_ids if str(host_id) != str(leader_host_id)][0] non_leader_replica_host = host_by_host_id(non_leader_replica_host_id) @@ -738,18 +737,18 @@ async def test_forward_cql_exception_passthrough(manager: ManagerClient): table_name = table.split('.')[-1] group_id = await get_table_raft_group_id(manager, ks, table_name) - logger.info(f"Get current leader for the group {group_id}") - try: - leader_host_id = await wait_for_leader(manager, servers[0], group_id) - except: - # We need to wait for leader on a replica, and first server might not be one - leader_host_id = await wait_for_leader(manager, servers[1], group_id) - leader_host = [host for host in hosts if str(host.host_id) == str(leader_host_id)][0] tablet_replicas = await get_tablet_replicas(manager, servers[0], ks, table_name, 0) assert len(tablet_replicas) == 2 replica_host_ids = [replica[0] for replica in tablet_replicas] + logger.info(f"Get current leader for the group {group_id}") + if host_ids[0] in replica_host_ids: + leader_host_id = await wait_for_leader(manager, servers[0], group_id) + else: + leader_host_id = await wait_for_leader(manager, servers[1], group_id) + leader_host = [host for host in hosts if str(host.host_id) == str(leader_host_id)][0] + logger.info(f"Get the non-leader replica for the group {group_id}") non_leader_replica_host_id = [host_id for host_id in replica_host_ids if str(host_id) != str(leader_host_id)][0] non_leader_replica_host = [host for host in hosts if str(host.host_id) == str(non_leader_replica_host_id)][0] @@ -1447,15 +1446,14 @@ async def test_leader_cache_eliminates_redirect(manager: ManagerClient): table_name = table.split('.')[-1] group_id = await get_table_raft_group_id(manager, ks, table_name) - try: - leader_host_id = await wait_for_leader(manager, servers[0], group_id) - except: - leader_host_id = await wait_for_leader(manager, servers[1], group_id) - tablet_replicas = await get_tablet_replicas(manager, servers[0], ks, table_name, 0) assert len(tablet_replicas) == 2 replica_host_ids = [replica[0] for replica in tablet_replicas] + if host_ids[0] in replica_host_ids: + leader_host_id = await wait_for_leader(manager, servers[0], group_id) + else: + leader_host_id = await wait_for_leader(manager, servers[1], group_id) # Find the rack of the leader leader_server = next(s for hid, s in zip(host_ids, servers) if str(hid) == leader_host_id) leader_rack = leader_server.rack @@ -1532,18 +1530,15 @@ async def test_read_forwarding(manager: ManagerClient): async with new_test_table(manager, ks, "pk int PRIMARY KEY, c int") as table: table_name = table.split('.')[-1] group_id = await get_table_raft_group_id(manager, ks, table_name) - - for server in servers: - try: - leader_host_id = await wait_for_leader(manager, server, group_id) - break - except: - continue - - leader_host = host_by_host_id(leader_host_id) - tablet_replicas = await get_tablet_replicas(manager, servers[0], ks, table_name, 0) replica_host_ids = [replica[0] for replica in tablet_replicas] + + for i in range(4): + if host_ids[i] in replica_host_ids: + leader_host_id = await wait_for_leader(manager, servers[i], group_id) + break + leader_host = host_by_host_id(leader_host_id) + non_leader_replica_host_id = [hid for hid in replica_host_ids if str(hid) != str(leader_host_id)][0] non_leader_replica_host = host_by_host_id(non_leader_replica_host_id) non_replica_host_id = [hid for hid in host_ids if str(hid) not in [str(r) for r in replica_host_ids]][0]