Files
scylladb/test/cluster/test_not_enough_token_owners.py
Tomasz Grabiec c9f0a9d0eb tablets: scheduler: Balance racks separately when rf_rack_valid_keyspaces is true
Greatly improves performance of plan making, because we don't consider
candidates in other racks, most of which will fail to be selected due
to replication constraints (no rack overload). Also (but minor)
reduces the overhead of candidate evaluation, as we don't have to
evaluate rack load.

Enabled only for rf_rack_valid_keyspaces because such setups guarantee
that we will not need (because we must not) move tablets across racks,
and we don't need to execute the general algorithm for the whole DC.

Tested with perf-load-balancing, which performs a single scale-out
operation on a cluster which initially has 10 nodes 88 shards each, 2
racks, RF=2, 70 tables, 256 tablets per table. Scale out adds 6 new
nodes (same shard count). Time to rebalance the cluster (plan making
only, sum of all iterations, no streaming):

Before: 16 min 25 s
After: 0 min 25 s

Before, plan making cost (single incremental iteration) alternated
between fast (0.1 [s]) and slow (14.1 [s]):

  Rebalance iteration 7 took 14.156 [s]: mig=88, bad=88, first_bad=17741, eval=93874484, skiplist=0, skip: (load=0, rack=17653, node=0)
  Rebalance iteration 8 took 0.143 [s]: mig=88, bad=88, first_bad=88, eval=865407, skiplist=0, skip: (load=0, rack=0, node=0)

The slow run chose min and max nodes in different racks, hence the
fast path failed to find any candidates and we switched to exhaustive
search of candidates in other nodes.

After, all iterations are fast (0.1 [s] per rack, 0.2 [s] per plan-making).
The plan is twice as large because it combines the output of two subsequent (pre-patch)
plan-making calls.

Fixes #26016
2025-09-23 00:30:37 +02:00

72 lines
3.4 KiB
Python

#
# Copyright (C) 2024-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
#
import pytest
import logging
import time
from test.pylib.manager_client import ManagerClient
from test.pylib.util import unique_name, wait_for_cql_and_get_hosts
from test.cluster.util import new_test_keyspace
@pytest.mark.asyncio
async def test_not_enough_token_owners(manager: ManagerClient):
"""
Test that:
- the first node in the cluster cannot be a zero-token node
- removenode and decommission of the only token owner fail in the presence of zero-token nodes
- removenode and decommission of a token owner fail in the presence of zero-token nodes if the number of token
owners would fall below the RF of some keyspace using tablets
"""
logging.info('Trying to add a zero-token server as the first server in the cluster')
await manager.server_add(config={'join_ring': False},
property_file={"dc": "dc1", "rack": "rz"},
expected_error='Cannot start the first node in the cluster as zero-token')
logging.info('Adding the first server')
server_a = await manager.server_add(property_file={"dc": "dc1", "rack": "r1"})
logging.info('Adding two zero-token servers')
# The second server is needed only to preserve the Raft majority.
server_b = (await manager.servers_add(2, config={'join_ring': False}, property_file={"dc": "dc1", "rack": "rz"}))[0]
logging.info(f'Trying to decommission the only token owner {server_a}')
await manager.decommission_node(server_a.server_id,
expected_error='Cannot decommission the last token-owning node in the cluster')
logging.info(f'Stopping {server_a}')
await manager.server_stop_gracefully(server_a.server_id)
logging.info(f'Trying to remove the only token owner {server_a} by {server_b}')
await manager.remove_node(server_b.server_id, server_a.server_id,
expected_error='cannot be removed because it is the last token-owning node in the cluster')
logging.info(f'Starting {server_a}')
await manager.server_start(server_a.server_id)
logging.info('Adding a normal server')
await manager.server_add(property_file={"dc": "dc1", "rack": "r2"})
cql = manager.get_cql()
await wait_for_cql_and_get_hosts(cql, [server_a], time.time() + 60)
async with new_test_keyspace(manager, "WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 2} AND tablets = { 'enabled': true }") as ks_name:
await cql.run_async(f'CREATE TABLE {ks_name}.tbl (pk int PRIMARY KEY, v int)')
await cql.run_async(f'INSERT INTO {ks_name}.tbl (pk, v) VALUES (1, 1)')
# FIXME: Once scylladb/scylladb#16195 is fixed, we will have to replace the expected error message.
# A similar change may be needed for remove_node below.
logging.info(f'Trying to decommission {server_a} - one of the two token owners')
await manager.decommission_node(server_a.server_id, expected_error='Decommission failed')
logging.info(f'Stopping {server_a}')
await manager.server_stop_gracefully(server_a.server_id)
logging.info(f'Trying to remove {server_a}, one of the two token owners, by {server_b}')
await manager.remove_node(server_b.server_id, server_a.server_id,
expected_error='Removenode failed')