Files
scylladb/test/cluster/test_node_isolation.py
Gleb Natapov 39cec4ae45 topology: let banned node know that it is banned
Currently if a banned node tries to connect to a cluster it fails to
create connections, but has no idea why, so from inside the node it
looks like it has communication problems. This patch adds new rpc
NOTIFY_BANNED which is sent back to the node when its connection is
dropped. On receiving the rpc the node isolates itself and print an
informative message about why it did so.

Closes scylladb/scylladb#26943
2025-11-24 17:12:13 +01:00

51 lines
2.1 KiB
Python

#
# Copyright (C) 2023-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
#
import logging
import pytest
from cassandra.cluster import ConsistencyLevel, ExecutionProfile, EXEC_PROFILE_DEFAULT # type: ignore
from cassandra.cluster import NoHostAvailable, OperationTimedOut # type: ignore
from cassandra.cluster import Cluster # type: ignore
from cassandra.query import SimpleStatement # type: ignore
from cassandra.policies import WhiteListRoundRobinPolicy # type: ignore
from test.pylib.manager_client import ManagerClient
from test.pylib.rest_client import read_barrier
from test.cluster.util import create_new_test_keyspace
logger = logging.getLogger(__name__)
@pytest.mark.asyncio
@pytest.mark.nightly
async def test_banned_node_notification(manager: ManagerClient) -> None:
"""Test that a node banned from the cluster get notification about been banned"""
# Decrease the failure detector threshold so we don't have to wait for too long.
config = {
'failure_detector_timeout_in_ms': 2000
}
srvs = await manager.servers_add(3, config=config)
cql = manager.get_cql()
# Pause one of the servers so other nodes mark it as dead and we can remove it.
# We deliberately don't shut it down, but only pause it - we want to test
# that we solved the harder problem of safely removing nodes which didn't shut down.
logger.info(f"Pausing server {srvs[2]}")
await manager.server_pause(srvs[2].server_id)
logger.info(f"Removing {srvs[2]} using {srvs[0]}")
await manager.remove_node(srvs[0].server_id, srvs[2].server_id)
# Perform a read barrier on srvs[1] so it learns about the ban.
logger.info(f"Performing read barrier on server {srvs[1]}")
await read_barrier(manager.api, srvs[1].ip_addr)
logger.info(f"Unpausing {srvs[2]}")
await manager.server_unpause(srvs[2].server_id)
log = await manager.server_open_log(srvs[2].server_id)
_, matches = await log.wait_for(f"received notification of being banned from the cluster from")
# check that the node is notified about being banned
assert len(matches) != 0, "The node did not log being banned from the cluster"