mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-26 19:35:12 +00:00
raft: ensure topology coordinator retains votership
The limited voters feature did not account for the existing topology coordinator (Raft leader) when selecting voters to be removed. As a result, the limited voters calculator could inadvertently remove the votership of the current topology coordinator, triggering an unnecessary Raft leader re-election. This change ensures that the existing topology coordinator's votership status is preserved unless absolutely necessary. When choosing between otherwise equivalent voters, the node other than the topology coordinator is prioritized for removal. This helps maintain stability in the cluster by avoiding unnecessary leader re-elections. Additionally, only the alive leader node is considered relevant for this logic. A dead existing leader (topology coordinator) is excluded from consideration, as it is already in the process of losing leadership. Fixes: scylladb/scylladb#23588 Fixes: scylladb/scylladb#23786
This commit is contained in:
@@ -6,6 +6,7 @@
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#include <boost/test/data/test_case.hpp>
|
||||
#include <boost/test/unit_test.hpp>
|
||||
|
||||
#include "service/raft/group0_voter_calculator.hh"
|
||||
@@ -939,4 +940,106 @@ BOOST_AUTO_TEST_CASE(existing_voters_are_kept_across_racks) {
|
||||
}
|
||||
|
||||
|
||||
BOOST_DATA_TEST_CASE(leader_is_retained_as_voter, boost::unit_test::data::make({0, 1, 2}), leader_node_idx) {
|
||||
|
||||
// Arrange: Set the voters limit and create the voter calculator.
|
||||
|
||||
constexpr size_t max_voters = 3;
|
||||
|
||||
const service::group0_voter_calculator voter_calc{max_voters};
|
||||
|
||||
// Act: Add a third DC to a 2 DC cluster.
|
||||
|
||||
const std::array ids = {raft::server_id::create_random_id(), raft::server_id::create_random_id(), raft::server_id::create_random_id(),
|
||||
raft::server_id::create_random_id(), raft::server_id::create_random_id(), raft::server_id::create_random_id()};
|
||||
|
||||
const service::group0_voter_calculator::nodes_list_t nodes = {
|
||||
// The initial nodes (just 2 DCs)
|
||||
{ids[0], {.datacenter = "dc-1", .rack = "rack", .is_voter = true, .is_alive = true, .is_leader = leader_node_idx == 0}},
|
||||
{ids[1], {.datacenter = "dc-1", .rack = "rack", .is_voter = true, .is_alive = true, .is_leader = leader_node_idx == 1}},
|
||||
{ids[2], {.datacenter = "dc-2", .rack = "rack", .is_voter = true, .is_alive = true, .is_leader = leader_node_idx == 2}},
|
||||
{ids[3], {.datacenter = "dc-2", .rack = "rack", .is_voter = false, .is_alive = true}},
|
||||
// The new nodes (3rd DC)
|
||||
// - this will lead to 1 voter being removed from the DC-1
|
||||
{ids[4], {.datacenter = "dc-3", .rack = "rack", .is_voter = false, .is_alive = true}},
|
||||
{ids[5], {.datacenter = "dc-3", .rack = "rack", .is_voter = false, .is_alive = true}},
|
||||
};
|
||||
|
||||
const auto& voters = voter_calc.distribute_voters(nodes);
|
||||
|
||||
// Assert: The current leader is retained as a voter, even after adding
|
||||
// the third datacenter and redistributing voters.
|
||||
|
||||
BOOST_CHECK_EQUAL(voters.size(), max_voters);
|
||||
BOOST_CHECK(voters.contains(ids[leader_node_idx]));
|
||||
}
|
||||
|
||||
|
||||
BOOST_DATA_TEST_CASE(leader_is_retained_as_voter_in_racks, boost::unit_test::data::make({0, 1, 3}), leader_node_idx) {
|
||||
|
||||
// Arrange: Set the voters limit and create the voter calculator.
|
||||
|
||||
constexpr size_t max_voters = 3;
|
||||
|
||||
const service::group0_voter_calculator voter_calc{max_voters};
|
||||
|
||||
// Act: Add a third DC to a 2 DC cluster.
|
||||
|
||||
const std::array ids = {raft::server_id::create_random_id(), raft::server_id::create_random_id(), raft::server_id::create_random_id(),
|
||||
raft::server_id::create_random_id(), raft::server_id::create_random_id(), raft::server_id::create_random_id()};
|
||||
|
||||
const service::group0_voter_calculator::nodes_list_t nodes = {
|
||||
// The initial nodes (just 2 DCs)
|
||||
{ids[0], {.datacenter = "dc-1", .rack = "rack-1", .is_voter = true, .is_alive = true, .is_leader = leader_node_idx == 0}},
|
||||
{ids[1], {.datacenter = "dc-1", .rack = "rack-2", .is_voter = true, .is_alive = true, .is_leader = leader_node_idx == 1}},
|
||||
{ids[2], {.datacenter = "dc-2", .rack = "rack-3", .is_voter = false, .is_alive = true}},
|
||||
{ids[3], {.datacenter = "dc-2", .rack = "rack-4", .is_voter = true, .is_alive = true, .is_leader = leader_node_idx == 3}},
|
||||
// The new nodes (3rd DC)
|
||||
// - this will lead to 1 voter being removed from the DC-1
|
||||
{ids[4], {.datacenter = "dc-3", .rack = "rack-5", .is_voter = false, .is_alive = true}},
|
||||
{ids[5], {.datacenter = "dc-3", .rack = "rack-6", .is_voter = false, .is_alive = true}},
|
||||
};
|
||||
|
||||
const auto& voters = voter_calc.distribute_voters(nodes);
|
||||
|
||||
// Assert: The current leader is retained as a voter, even after adding
|
||||
// the third datacenter and redistributing voters.
|
||||
|
||||
BOOST_CHECK_EQUAL(voters.size(), max_voters);
|
||||
BOOST_CHECK(voters.contains(ids[leader_node_idx]));
|
||||
}
|
||||
|
||||
|
||||
BOOST_DATA_TEST_CASE(leader_is_retained_as_voter_in_two_dc_asymmetric_setup, boost::unit_test::data::make({0, 1}), leader_node_idx) {
|
||||
|
||||
// Arrange: Set the voters limit and create the voter calculator.
|
||||
|
||||
constexpr size_t max_voters = 3;
|
||||
|
||||
const service::group0_voter_calculator voter_calc{max_voters};
|
||||
|
||||
// Act: Remove nodes from a 2 DC cluster so that each DC has 1 node left.
|
||||
|
||||
// - this will lead to 1 voter being removed from one DC
|
||||
// - the leader should be retained as a voter
|
||||
|
||||
const std::array ids = {raft::server_id::create_random_id(), raft::server_id::create_random_id()};
|
||||
|
||||
const service::group0_voter_calculator::nodes_list_t nodes = {
|
||||
// The result nodes (just one per DC)
|
||||
{ids[0], {.datacenter = "dc-1", .rack = "rack", .is_voter = true, .is_alive = true, .is_leader = leader_node_idx == 0}},
|
||||
{ids[1], {.datacenter = "dc-2", .rack = "rack", .is_voter = true, .is_alive = true, .is_leader = leader_node_idx == 1}},
|
||||
// no other nodes - all removed
|
||||
};
|
||||
|
||||
const auto& voters = voter_calc.distribute_voters(nodes);
|
||||
|
||||
// Assert: The current leader is retained as a voter.
|
||||
|
||||
// TODO (scylladb/scylladb#23266): This should be equal to 1, after we enforce the odd number of voters
|
||||
BOOST_CHECK_GE(voters.size(), 1);
|
||||
BOOST_CHECK(voters.contains(ids[leader_node_idx]));
|
||||
}
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
||||
|
||||
@@ -15,7 +15,7 @@ from test.pylib.internal_types import ServerInfo
|
||||
from test.pylib.manager_client import ManagerClient
|
||||
from test.pylib.rest_client import read_barrier
|
||||
from test.cluster.conftest import cluster_con, skip_mode
|
||||
from test.cluster.util import get_current_group0_config
|
||||
from test.cluster.util import get_coordinator_host_ids, get_current_group0_config
|
||||
|
||||
|
||||
GROUP0_VOTERS_LIMIT = 5
|
||||
@@ -176,3 +176,60 @@ async def test_raft_limited_voters_upgrade(manager: ManagerClient):
|
||||
|
||||
num_voters = await get_number_of_voters(manager, servers[0])
|
||||
assert num_voters == GROUP0_VOTERS_LIMIT, f"The number of voters should be limited to {GROUP0_VOTERS_LIMIT} (but is {num_voters})"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_raft_limited_voters_retain_coordinator(manager: ManagerClient):
|
||||
"""
|
||||
Test that the topology coordinator is retained as a voter when possible.
|
||||
|
||||
This test ensures that if a voter needs to be removed and the topology coordinator
|
||||
is among the candidates for removal, the coordinator is prioritized to remain a voter.
|
||||
|
||||
Arrange:
|
||||
- Create 2 DCs with 3 nodes each.
|
||||
- Retrieve the current topology coordinator.
|
||||
Act:
|
||||
- Add a third DC with 1 node.
|
||||
- This will require removing one voter from either DC1 or DC2.
|
||||
Assert:
|
||||
- Verify that the topology coordinator is retained as a voter.
|
||||
"""
|
||||
|
||||
# Arrange: Create a 3-node cluster with the limited voters feature disabled
|
||||
|
||||
dc_setup = [
|
||||
{
|
||||
'property_file': {'dc': 'dc1', 'rack': 'rack1'},
|
||||
'num_nodes': 3,
|
||||
},
|
||||
{
|
||||
'property_file': {'dc': 'dc2', 'rack': 'rack2'},
|
||||
'num_nodes': 3,
|
||||
},
|
||||
]
|
||||
|
||||
dc_servers = []
|
||||
for dc in dc_setup:
|
||||
logging.info(
|
||||
f"Creating {dc['property_file']['dc']} with {dc['num_nodes']} nodes")
|
||||
dc_servers.append(await manager.servers_add(dc['num_nodes'],
|
||||
property_file=dc['property_file']))
|
||||
|
||||
assert len(dc_servers) == len(dc_setup)
|
||||
|
||||
coordinator_ids = await get_coordinator_host_ids(manager)
|
||||
assert coordinator_ids, "At least 1 coordinator id should be found"
|
||||
|
||||
coordinator_id = coordinator_ids[0]
|
||||
|
||||
# Act: Add a new DC with one node
|
||||
|
||||
logging.info('Adding a new DC with one node')
|
||||
await manager.servers_add(1, property_file={'dc': 'dc3', 'rack': 'rack3'})
|
||||
|
||||
# Assert: Verify that the topology coordinator is still the voter
|
||||
|
||||
group0_members = await get_current_group0_config(manager, dc_servers[0][0])
|
||||
assert any(m[0] == coordinator_id and m[1] for m in group0_members), \
|
||||
f"The coordinator {coordinator_id} should be a voter (but is not)"
|
||||
|
||||
Reference in New Issue
Block a user