Greatly improves performance of plan making, because we don't consider candidates in other racks, most of which will fail to be selected due to replication constraints (no rack overload). Also (but minor) reduces the overhead of candidate evaluation, as we don't have to evaluate rack load. Enabled only for rf_rack_valid_keyspaces because such setups guarantee that we will not need (because we must not) move tablets across racks, and we don't need to execute the general algorithm for the whole DC. Tested with perf-load-balancing, which performs a single scale-out operation on a cluster which initially has 10 nodes 88 shards each, 2 racks, RF=2, 70 tables, 256 tablets per table. Scale out adds 6 new nodes (same shard count). Time to rebalance the cluster (plan making only, sum of all iterations, no streaming): Before: 16 min 25 s After: 0 min 25 s Before, plan making cost (single incremental iteration) alternated between fast (0.1 [s]) and slow (14.1 [s]): Rebalance iteration 7 took 14.156 [s]: mig=88, bad=88, first_bad=17741, eval=93874484, skiplist=0, skip: (load=0, rack=17653, node=0) Rebalance iteration 8 took 0.143 [s]: mig=88, bad=88, first_bad=88, eval=865407, skiplist=0, skip: (load=0, rack=0, node=0) The slow run chose min and max nodes in different racks, hence the fast path failed to find any candidates and we switched to exhaustive search of candidates in other nodes. After, all iterations are fast (0.1 [s] per rack, 0.2 [s] per plan-making). The plan is twice as large because it combines the output of two subsequent (pre-patch) plan-making calls. Fixes #26016
72 lines
3.4 KiB
Python
72 lines
3.4 KiB
Python
#
|
|
# Copyright (C) 2024-present ScyllaDB
|
|
#
|
|
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
|
#
|
|
import pytest
|
|
import logging
|
|
import time
|
|
|
|
from test.pylib.manager_client import ManagerClient
|
|
from test.pylib.util import unique_name, wait_for_cql_and_get_hosts
|
|
from test.cluster.util import new_test_keyspace
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_not_enough_token_owners(manager: ManagerClient):
|
|
"""
|
|
Test that:
|
|
- the first node in the cluster cannot be a zero-token node
|
|
- removenode and decommission of the only token owner fail in the presence of zero-token nodes
|
|
- removenode and decommission of a token owner fail in the presence of zero-token nodes if the number of token
|
|
owners would fall below the RF of some keyspace using tablets
|
|
"""
|
|
logging.info('Trying to add a zero-token server as the first server in the cluster')
|
|
await manager.server_add(config={'join_ring': False},
|
|
property_file={"dc": "dc1", "rack": "rz"},
|
|
expected_error='Cannot start the first node in the cluster as zero-token')
|
|
|
|
logging.info('Adding the first server')
|
|
server_a = await manager.server_add(property_file={"dc": "dc1", "rack": "r1"})
|
|
|
|
logging.info('Adding two zero-token servers')
|
|
# The second server is needed only to preserve the Raft majority.
|
|
server_b = (await manager.servers_add(2, config={'join_ring': False}, property_file={"dc": "dc1", "rack": "rz"}))[0]
|
|
|
|
logging.info(f'Trying to decommission the only token owner {server_a}')
|
|
await manager.decommission_node(server_a.server_id,
|
|
expected_error='Cannot decommission the last token-owning node in the cluster')
|
|
|
|
logging.info(f'Stopping {server_a}')
|
|
await manager.server_stop_gracefully(server_a.server_id)
|
|
|
|
logging.info(f'Trying to remove the only token owner {server_a} by {server_b}')
|
|
await manager.remove_node(server_b.server_id, server_a.server_id,
|
|
expected_error='cannot be removed because it is the last token-owning node in the cluster')
|
|
|
|
logging.info(f'Starting {server_a}')
|
|
await manager.server_start(server_a.server_id)
|
|
|
|
logging.info('Adding a normal server')
|
|
await manager.server_add(property_file={"dc": "dc1", "rack": "r2"})
|
|
|
|
cql = manager.get_cql()
|
|
|
|
await wait_for_cql_and_get_hosts(cql, [server_a], time.time() + 60)
|
|
|
|
async with new_test_keyspace(manager, "WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 2} AND tablets = { 'enabled': true }") as ks_name:
|
|
await cql.run_async(f'CREATE TABLE {ks_name}.tbl (pk int PRIMARY KEY, v int)')
|
|
await cql.run_async(f'INSERT INTO {ks_name}.tbl (pk, v) VALUES (1, 1)')
|
|
|
|
# FIXME: Once scylladb/scylladb#16195 is fixed, we will have to replace the expected error message.
|
|
# A similar change may be needed for remove_node below.
|
|
logging.info(f'Trying to decommission {server_a} - one of the two token owners')
|
|
await manager.decommission_node(server_a.server_id, expected_error='Decommission failed')
|
|
|
|
logging.info(f'Stopping {server_a}')
|
|
await manager.server_stop_gracefully(server_a.server_id)
|
|
|
|
logging.info(f'Trying to remove {server_a}, one of the two token owners, by {server_b}')
|
|
await manager.remove_node(server_b.server_id, server_a.server_id,
|
|
expected_error='Removenode failed')
|