mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-25 09:11:10 +00:00
In `ks_prop_defs::as_ks_metadata(...)` a default initial tablets count
is set to 0, when tablets are enabled and the replication strategy
is NetworkReplicationStrategy.
This effectively sets _uses_tablets = false in abstract_replication_strategy
for the remaining strategies when no `tablets = {...}` options are specified.
As a consequence, it is possible to create vnode-based keyspaces even
when tablets are enforced with `tablets_mode_for_new_keyspaces`.
The patch sets a default initial tablets count to zero regardless of
the chosen replication strategy. Then each of the replication strategy
validates the options and raises a configuration exception when tablets
are not supported.
All tests are altered in the following way:
+ whenever it was correct, SimpleStrategy was replaced with NetworkTopologyStrategy
+ otherwise, tablets were explicitly disabled with ` AND tablets = {'enabled': false}`
Fixes https://github.com/scylladb/scylladb/issues/25340
Closes scylladb/scylladb#25342
276 lines
13 KiB
Python
276 lines
13 KiB
Python
#
|
|
# Copyright (C) 2024-present ScyllaDB
|
|
#
|
|
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
|
|
#
|
|
import logging
|
|
|
|
import pytest
|
|
import asyncio
|
|
from test.pylib.internal_types import ServerNum
|
|
from test.pylib.manager_client import ManagerClient
|
|
from test.pylib.rest_client import inject_error_one_shot, InjectionHandler, read_barrier
|
|
from test.cluster.util import create_new_test_keyspace
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@pytest.fixture(name="raft_op_timeout") # avoid the W0621:redefined-outer-name pylint warning
|
|
def fixture_raft_op_timeout(build_mode):
|
|
return 10000 if build_mode == 'debug' else 1000
|
|
|
|
|
|
async def update_group0_raft_op_timeout(server_id: ServerNum, manager: ManagerClient, timeout: int) -> None:
|
|
logger.info(f"Updating group0_raft_op_timeout_in_ms on server {server_id} to {timeout}")
|
|
running_ids = [srv.server_id for srv in await manager.running_servers()]
|
|
if server_id in running_ids:
|
|
# If the node is alive, server_update_config only sends the SIGHUP signal to the Scylla process, so awaiting it
|
|
# doesn't guarantee that the new config file is active. Work around this by looking at the logs.
|
|
log_file = await manager.server_open_log(server_id)
|
|
mark = await log_file.mark()
|
|
await manager.server_update_config(server_id, 'group0_raft_op_timeout_in_ms', timeout)
|
|
await log_file.wait_for("completed re-reading configuration file", from_mark=mark, timeout=60)
|
|
else:
|
|
await manager.server_update_config(server_id, 'group0_raft_op_timeout_in_ms', timeout)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
|
|
@pytest.mark.skip_mode(mode='debug', reason='aarch64/debug is unpredictably slow', platform_key='aarch64')
|
|
async def test_cannot_add_new_node(manager: ManagerClient, raft_op_timeout: int) -> None:
|
|
# This test makes sure that trying to add a new node fails with timeout
|
|
# if the majority of the cluster is not available.
|
|
# To exercise this, we start with a cluster of five nodes. This setup lets us check two situations:
|
|
# one where the new node's join request goes to the leader of the cluster, and
|
|
# another where it goes to a non-leader. Initially, the first node we start
|
|
# becomes the leader. Then, we shut down the last three nodes.
|
|
# This means the new node's request could be handled by either of the
|
|
# first two nodes, depending on which one responds first to the discovery request
|
|
# in persistent_discovery::run.
|
|
# In the second case we rely on a leader in Raft to steps down
|
|
# if an election timeout elapses without a successful round of heartbeats to a majority
|
|
# of its cluster (fsm::tick_leader).
|
|
# This is important since execute_read_barrier_on_leader doesn't take the abort_source,
|
|
# it just returns 'not_a_leader' and read_barrier rechecks the abort_source in the
|
|
# loop inside do_on_leader_with_retries.
|
|
|
|
config = {
|
|
'direct_failure_detector_ping_timeout_in_ms': 300,
|
|
'error_injections_at_startup': [
|
|
{
|
|
'name': 'raft-group-registry-fd-threshold-in-ms',
|
|
'value': '500'
|
|
}
|
|
]
|
|
}
|
|
logger.info("starting a first node (the leader)")
|
|
servers = [await manager.server_add(config=config)]
|
|
|
|
logger.info("starting a second node (a follower)")
|
|
servers += [await manager.server_add(config=config)]
|
|
|
|
logger.info("starting other three nodes")
|
|
servers += await manager.servers_add(servers_num=3)
|
|
|
|
logger.info("stopping the last three nodes")
|
|
await asyncio.gather(manager.server_stop_gracefully(servers[2].server_id),
|
|
manager.server_stop_gracefully(servers[3].server_id),
|
|
manager.server_stop_gracefully(servers[4].server_id))
|
|
|
|
# Do it here to prevent unexpected timeouts before quorum loss.
|
|
await asyncio.gather(*(update_group0_raft_op_timeout(srv.server_id, manager, raft_op_timeout)
|
|
for srv in servers[:2]))
|
|
|
|
logger.info("starting a sixth node with no quorum")
|
|
await manager.server_add(expected_error="raft operation \\[read_barrier\\] timed out, there is no raft quorum",
|
|
timeout=60)
|
|
|
|
logger.info("done")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
|
|
@pytest.mark.skip_mode(mode='debug', reason='aarch64/debug is unpredictably slow', platform_key='aarch64')
|
|
async def test_quorum_lost_during_node_join(manager: ManagerClient, raft_op_timeout: int) -> None:
|
|
config = {
|
|
'error_injections_at_startup': [
|
|
{
|
|
'name': 'raft-group-registry-fd-threshold-in-ms',
|
|
'value': '500'
|
|
}
|
|
]
|
|
}
|
|
logger.info("starting a first node (the leader)")
|
|
servers = [await manager.server_add(config=config)]
|
|
|
|
logger.info("starting second and third nodes (followers)")
|
|
servers += await manager.servers_add(servers_num=2)
|
|
|
|
logger.info(f"injecting join-node-before-add-entry into the leader node {servers[0]}")
|
|
injection_handler = await inject_error_one_shot(manager.api, servers[0].ip_addr, 'join-node-before-add-entry')
|
|
|
|
logger.info("starting a fourth node")
|
|
fourth_node_future = asyncio.create_task(manager.server_add(
|
|
seeds=[servers[0].ip_addr],
|
|
expected_error="raft operation \\[add_entry\\] timed out, there is no raft quorum",
|
|
timeout=60))
|
|
|
|
logger.info(f"waiting for the leader node {servers[0]} to start handling the join request")
|
|
log_file = await manager.server_open_log(servers[0].server_id)
|
|
await log_file.wait_for("join-node-before-add-entry: waiting", timeout=60)
|
|
|
|
logger.info("stopping the second and third nodes")
|
|
await asyncio.gather(manager.server_stop_gracefully(servers[1].server_id),
|
|
manager.server_stop_gracefully(servers[2].server_id))
|
|
|
|
# Do it here to prevent unexpected timeouts before quorum loss.
|
|
await update_group0_raft_op_timeout(servers[0].server_id, manager, raft_op_timeout)
|
|
|
|
logger.info("release join-node-before-add-entry injection")
|
|
await injection_handler.message()
|
|
|
|
logger.info("waiting for fourth node joining process to fail")
|
|
await fourth_node_future
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
|
|
@pytest.mark.skip_mode(mode='debug', reason='aarch64/debug is unpredictably slow', platform_key='aarch64')
|
|
async def test_quorum_lost_during_node_join_response_handler(manager: ManagerClient, raft_op_timeout: int) -> None:
|
|
logger.info("starting a first node (the leader)")
|
|
servers = [await manager.server_add()]
|
|
|
|
logger.info("starting second and third nodes (followers)")
|
|
servers += await manager.servers_add(servers_num=2)
|
|
|
|
logger.info("adding a fourth node")
|
|
servers += [await manager.server_add(config={
|
|
'error_injections_at_startup': [
|
|
{
|
|
'name': 'raft-group-registry-fd-threshold-in-ms',
|
|
'value': '500'
|
|
},
|
|
{
|
|
'name': 'join-node-response_handler-before-read-barrier'
|
|
}
|
|
]
|
|
}, start=False)]
|
|
|
|
logger.info("starting a fourth node")
|
|
fourth_node_future = asyncio.create_task(
|
|
manager.server_start(servers[3].server_id,
|
|
expected_error="raft operation \\[read_barrier\\] timed out, there is no raft quorum",
|
|
timeout=60))
|
|
|
|
logger.info(
|
|
f"waiting for the fourth node {servers[3]} to hit join-node-response_handler-before-read-barrier")
|
|
log_file = await manager.server_open_log(servers[3].server_id)
|
|
await log_file.wait_for("join-node-response_handler-before-read-barrier: waiting", timeout=60)
|
|
|
|
logger.info("stopping the second and third nodes")
|
|
await asyncio.gather(manager.server_stop_gracefully(servers[1].server_id),
|
|
manager.server_stop_gracefully(servers[2].server_id))
|
|
|
|
# Do it here to prevent unexpected timeouts before quorum loss.
|
|
await update_group0_raft_op_timeout(servers[3].server_id, manager, raft_op_timeout)
|
|
|
|
logger.info("release join-node-response_handler-before-read-barrier injection")
|
|
injection_handler = InjectionHandler(manager.api,
|
|
'join-node-response_handler-before-read-barrier',
|
|
servers[3].ip_addr)
|
|
await injection_handler.message()
|
|
|
|
logger.info("waiting for fourth node joining process to fail")
|
|
await fourth_node_future
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
|
|
@pytest.mark.skip_mode(mode='debug', reason='aarch64/debug is unpredictably slow', platform_key='aarch64')
|
|
async def test_cannot_run_operations(manager: ManagerClient, raft_op_timeout: int) -> None:
|
|
logger.info("starting a first node (the leader)")
|
|
servers = [await manager.server_add(config={
|
|
'error_injections_at_startup': [
|
|
{
|
|
'name': 'raft-group-registry-fd-threshold-in-ms',
|
|
'value': '500'
|
|
}
|
|
]
|
|
}, property_file={"dc": "dc1", "rack": "rack1"})]
|
|
|
|
logger.info("starting second and third nodes (followers)")
|
|
servers += await manager.servers_add(servers_num=2, property_file={"dc": "dc1", "rack": "rack2"})
|
|
|
|
logger.info('create keyspace and table')
|
|
ks = await create_new_test_keyspace(manager.get_cql(), "with replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 2}")
|
|
await manager.get_cql().run_async(f'create table {ks}.test_table (pk int primary key)')
|
|
|
|
logger.info("stopping the second and third nodes")
|
|
await asyncio.gather(manager.server_stop_gracefully(servers[1].server_id),
|
|
manager.server_stop_gracefully(servers[2].server_id))
|
|
|
|
# Do it here to prevent unexpected timeouts before quorum loss.
|
|
await update_group0_raft_op_timeout(servers[0].server_id, manager, raft_op_timeout)
|
|
|
|
logger.info("attempting removenode for the second node")
|
|
await manager.remove_node(servers[0].server_id, servers[1].server_id,
|
|
expected_error="raft operation [read_barrier] timed out, there is no raft quorum",
|
|
timeout=60)
|
|
|
|
logger.info("attempting decommission_node for the first node")
|
|
await manager.decommission_node(servers[0].server_id,
|
|
expected_error="raft operation [read_barrier] timed out, there is no raft quorum",
|
|
timeout=60)
|
|
|
|
logger.info("attempting rebuild_node for the first node")
|
|
await manager.rebuild_node(servers[0].server_id,
|
|
expected_error="raft operation [read_barrier] timed out, there is no raft quorum",
|
|
timeout=60)
|
|
|
|
with pytest.raises(Exception, match="raft operation \\[read_barrier\\] timed out, "
|
|
"there is no raft quorum, total voters count 3, alive voters count 1"):
|
|
await manager.get_cql().run_async(f'drop table {ks}.test_table', timeout=60)
|
|
|
|
logger.info("done")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.skip_mode(mode='release', reason='dev mode is sufficient for this test')
|
|
@pytest.mark.skip_mode(mode='debug', reason='dev mode is sufficient for this test')
|
|
async def test_can_restart(manager: ManagerClient, raft_op_timeout: int) -> None:
|
|
"""
|
|
Test that restarts work without group 0 quorum. Stop all five nodes and restart them one by one.
|
|
|
|
The purpose of this test is to catch regressions that introduce a group 0 quorum requirement on the restart path.
|
|
This could happen if we added code that, for example, adds a group 0 command or executes a group 0 read barrier.
|
|
|
|
Note that a restarting node can add a group 0 command if it's upgrading (see e.g.
|
|
system_distributed_keyspace::create_tables that can add a new table). However, we can safely assume that nodes never
|
|
upgrade in quorum loss scenarios.
|
|
"""
|
|
logger.info("Adding servers")
|
|
servers = await manager.servers_add(5)
|
|
|
|
logger.info(f"Stopping {servers}")
|
|
await asyncio.gather(*(manager.server_stop(srv.server_id) for srv in servers))
|
|
|
|
# This ensures the read barriers below fail quickly without group 0 quorum.
|
|
await asyncio.gather(*(update_group0_raft_op_timeout(srv.server_id, manager, raft_op_timeout) for srv in servers))
|
|
|
|
logger.info(f"Restarting {servers[:2]} with no group 0 quorum")
|
|
for idx, srv in enumerate(servers[:2]):
|
|
await manager.server_start(srv.server_id)
|
|
with pytest.raises(Exception, match="raft operation \\[read_barrier\\] timed out, "
|
|
"there is no raft quorum, total voters count 5, "
|
|
f"alive voters count {idx + 1}"):
|
|
await read_barrier(manager.api, srv.ip_addr)
|
|
|
|
# Increase the timeout back to 300s to ensure the new group 0 leader is elected before the first read barrier below
|
|
# times out.
|
|
await asyncio.gather(*(update_group0_raft_op_timeout(srv.server_id, manager, 300000) for srv in servers))
|
|
|
|
logger.info(f"Restarting {servers[2:]} with group 0 quorum")
|
|
for srv in servers[2:]:
|
|
await manager.server_start(srv.server_id)
|
|
await read_barrier(manager.api, srv.ip_addr)
|