diff --git a/test/cluster/test_multidc.py b/test/cluster/test_multidc.py index 5b57c345cd..c361cbce2c 100644 --- a/test/cluster/test_multidc.py +++ b/test/cluster/test_multidc.py @@ -20,6 +20,7 @@ from cassandra.query import SimpleStatement from test.pylib.async_cql import _wrap_future from test.pylib.manager_client import ManagerClient from test.pylib.random_tables import RandomTables, TextType, Column +from test.pylib.rest_client import read_barrier from test.pylib.util import unique_name from test.cluster.conftest import cluster_con @@ -403,6 +404,7 @@ async def test_arbiter_dc_rf_rack_valid_keyspaces(manager: ManagerClient): for task in [*valid_keyspaces, *invalid_keyspaces]: _ = tg.create_task(task) +@pytest.mark.asyncio async def test_startup_with_keyspaces_violating_rf_rack_valid_keyspaces(manager: ManagerClient): """ This test verifies that starting a Scylla node fails when there's an RF-rack-invalid keyspace. @@ -464,22 +466,50 @@ async def test_startup_with_keyspaces_violating_rf_rack_valid_keyspaces(manager: for rfs, tablets in valid_keyspaces: _ = tg.create_task(create_keyspace(rfs, tablets)) - await manager.server_stop_gracefully(s1.server_id) - await manager.server_update_config(s1.server_id, "rf_rack_valid_keyspaces", "true") - + # Precondition: s1 has rf_rack_valid_keyspaces set to false. + # Postcondition: s1 still has rf_rack_valid_keyspaces set to false. async def try_fail(rfs: List[int], dc: str, rf: int, rack_count: int): + running_servers = await manager.running_servers() + should_start = s1.server_id not in [server.server_id for server in running_servers] + if should_start: + await manager.server_start(s1.server_id) + ks = await create_keyspace(rfs, True) + # We need to wait for the new schema to propagate. + # Otherwise, it's not clear when the mutation + # corresponding to the created keyspace will + # arrive at server 1. + # It could happen only after the node performs + # the check upon start-up, effectively leading + # to a successful start-up, which we don't want. + # For more context, see issue: SCYLLADB-1137. + await read_barrier(manager.api, s1.ip_addr) + + await manager.server_stop_gracefully(s1.server_id) + await manager.server_update_config(s1.server_id, "rf_rack_valid_keyspaces", "true") + err = f"The keyspace '{ks}' is required to be RF-rack-valid. " \ f"That condition is violated for DC '{dc}': RF={rf} vs. rack count={rack_count}." - _ = await manager.server_start(s1.server_id, expected_error=err) + await manager.server_start(s1.server_id, expected_error=err) await cql.run_async(f"DROP KEYSPACE {ks}") + await manager.server_update_config(s1.server_id, "rf_rack_valid_keyspaces", "false") + # Test RF-rack-invalid keyspaces. await try_fail([2, 0], "dc1", 2, 3) await try_fail([3, 2], "dc2", 2, 1) await try_fail([4, 1], "dc1", 4, 3) - _ = await manager.server_start(s1.server_id) + # We need to perform a read barrier on the node to make + # sure that it processes the last DROP KEYSPACE. + # Otherwise, the node could think the RF-rack-invalid + # keyspace still exists. + await manager.server_start(s1.server_id) + await read_barrier(manager.api, s1.ip_addr) + await manager.server_stop_gracefully(s1.server_id) + + await manager.server_update_config(s1.server_id, "rf_rack_valid_keyspaces", "true") + await manager.server_start(s1.server_id) @pytest.mark.asyncio async def test_startup_with_keyspaces_violating_rf_rack_valid_keyspaces_but_not_enforced(manager: ManagerClient):