test: fix cql connection problem in test_auth_raft_command_split

This is a speculative fix as the problem is observed only on CI.
When run_async is called right after driver_connect and get_cql
it fails with ConnectionException('Host has been marked down or
removed').

If the approach proves to be succesfull we can start to deprecate
base get_cql in favor of get_ready_cql. It's better to have robust
testing helper libraries than try to take care of it in every test
case separately.

Fixes #17713

Closes scylladb/scylladb#17772
This commit is contained in:
Marcin Maliszkiewicz
2024-03-13 08:39:03 +01:00
committed by Kamil Braun
parent 4d83a8c12c
commit 7b60752e47
2 changed files with 15 additions and 8 deletions

View File

@@ -5,7 +5,6 @@
#
import asyncio
import time
from test.pylib.manager_client import ManagerClient
import pytest
from test.pylib.rest_client import inject_error, inject_error_one_shot
@@ -18,10 +17,7 @@ Tests case when bigger auth operation is split into multiple raft commands.
@pytest.mark.asyncio
async def test_auth_raft_command_split(manager: ManagerClient) -> None:
servers = await manager.servers_add(3)
cql = manager.get_cql()
hosts = await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60)
await manager.servers_see_each_other(servers)
cql, hosts = await manager.get_ready_cql(servers)
initial_perms = await cql.run_async("SELECT * FROM system_auth_v2.role_permissions")
@@ -37,11 +33,13 @@ async def test_auth_raft_command_split(manager: ManagerClient) -> None:
# this will trigger cascade of deletes which should be packed
# into raft commands in a way that none exceeds max_command_size
await manager.driver_connect(server=servers[0])
cql = manager.get_cql()
cql, _ = await manager.get_ready_cql([servers[0]])
async with inject_error(manager.api, servers[0].ip_addr,
'auth_announce_mutations_command_max_size'):
await cql.run_async(f"DROP ROLE IF EXISTS {shared_role}", execution_profile='whitelist')
cql, hosts = await manager.get_ready_cql(servers)
# auth reads are eventually consistent so we need to sync all nodes
await asyncio.gather(*(read_barrier(cql, host) for host in hosts))

View File

@@ -15,8 +15,7 @@ from time import time
import logging
from test.pylib.log_browsing import ScyllaLogFile
from test.pylib.rest_client import UnixRESTClient, ScyllaRESTAPIClient, ScyllaMetricsClient
from test.pylib.util import wait_for
from test.pylib.util import wait_for_cql_and_get_hosts
from test.pylib.util import wait_for, wait_for_cql_and_get_hosts, Host
from test.pylib.internal_types import ServerNum, IPAddress, HostID, ServerInfo
from test.pylib.scylla_cluster import ReplaceConfig, ScyllaServer
from cassandra.cluster import Session as CassandraSession # type: ignore # pylint: disable=no-name-in-module
@@ -81,6 +80,16 @@ class ManagerClient():
assert self.cql
return self.cql
# More robust version of get_cql, when topology changes
# or cql statement is executed immediately after driver_connect
# it may fail unless we perform additional readiness checks
async def get_ready_cql(self, servers: List[ServerInfo]) -> tuple[CassandraSession, list[Host]]:
"""Precondition: driver is connected"""
cql = self.get_cql()
await self.servers_see_each_other(servers)
hosts = await wait_for_cql_and_get_hosts(cql, servers, time() + 60)
return cql, hosts
# Make driver update endpoints from remote connection
def _driver_update(self) -> None:
if self.ccluster is not None: