# # Copyright (C) 2025-present ScyllaDB # # SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 # import logging import pytest import asyncio from test.pylib.manager_client import ManagerClient from test.cluster.util import check_token_ring_and_group0_consistency logger = logging.getLogger(__name__) @pytest.mark.asyncio async def test_left_node_notification(manager: ManagerClient) -> None: """ Create a 3-node multi-DC cluster with 2 nodes in dc1 and 1 node in dc2. Then decommission both dc1 nodes, ensuring the topology remains consistent, and the remaining node belongs to dc2, and only two 'left the cluster' notifications were issued. """ # Bootstrap 2 nodes in dc1 logger.info("Bootstrapping dc1 nodes") dc1_node_a = await manager.server_add(property_file={"dc": "dc1", "rack": "r1"}) dc1_node_b = await manager.server_add(property_file={"dc": "dc1", "rack": "r2"}) # Bootstrap 1 node in dc2 with storage_service debug logging logger.info("Bootstrapping dc2 node with storage_service=debug") dc2_node = await manager.server_add(cmdline=["--logger-log-level", "storage_service=debug"], property_file={"dc": "dc2", "rack": "r1"}) # When table audit is enabled, Scylla creates the "audit" keyspace with # NetworkTopologyStrategy and RF=3 in dc1 only. To avoid decommission failures due to # "zero replica after the removal" or "can not find new node in local dc" errors when # removing dc1 nodes, we alter the audit keyspace to have replicas only in dc2. # Only alter if the audit keyspace exists (it might not exist if audit is disabled). cql = manager.get_cql() result = await cql.run_async("SELECT * FROM system_schema.keyspaces WHERE keyspace_name = 'audit'") if result: await cql.run_async("ALTER KEYSPACE audit WITH REPLICATION = {'class': 'NetworkTopologyStrategy', 'dc2': 1}") # Ensure ring and group0 are consistent before operations await check_token_ring_and_group0_consistency(manager) # Decommission both dc1 nodes logger.info(f"Decommissioning dc1 node {dc1_node_b}") await manager.decommission_node(dc1_node_b.server_id) await check_token_ring_and_group0_consistency(manager) logger.info(f"Decommissioning dc1 node {dc1_node_a}") await manager.decommission_node(dc1_node_a.server_id) await check_token_ring_and_group0_consistency(manager) # Verify only dc2 node remains running running = await manager.running_servers() assert len(running) == 1, f"Expected 1 running server, found {len(running)}: {running}" assert running[0].datacenter == "dc2", f"Remaining node should be in dc2, got {running[0].datacenter}" logger.info("Successfully decommissioned both dc1 nodes; dc2 node remains running") # Check the remaining node's log contains exactly two 'Notify node … has left the cluster' log = await manager.server_open_log(dc2_node.server_id) left_msgs = await log.grep(r"Notify node .* has left the cluster") assert len(left_msgs) == 2, f"Expected exactly 2 'left the cluster' notifications, got {len(left_msgs)}"