topology.cc: unindex_node: _dc_racks removal fix
The eps reference was reused to manipulate the racks dictionary. This resulted in assigning a set of nodes from the racks dictionary to an element of the _dc_endpoints dictionary. The problem was demonstrated by the dtest test_decommission_last_node_in_rack (scylladb/scylla-dtest#3299). The test set up four nodes, three on one rack and one on another, all within a single data center (dc). It then switched to a 'network_topology_strategy' for one keyspace and tried to decommission the single node on the second rack. This decomission command with error message 'zero replica after the removal.' This happened because unindex_node assigned the empty list from the second rack as a value for the single dc in _dc_endpoints dictionary. As a result, we got empty nodes list for single dc in natural_endpoints_tracker::_all_endpoints, node_count == 0 in data_center_endpoints, _rf_left == 0, so network_topology_strategy::calculate_natural_endpoints rejected all the endpoints and returned an empty endpoint_set. In repair_service::do_decommission_removenode_with_repair this caused the 'zero replica after the removal' error. With this fix the test passes both with --consistent-cluster-management option and without it. The specific unit test for this problem was added. Fixes: #14184
This commit is contained in:
@@ -362,9 +362,9 @@ void topology::unindex_node(const node* node) {
|
||||
_dc_rack_nodes[dc][rack].erase(node);
|
||||
auto& racks = _dc_racks[dc];
|
||||
if (auto rit = racks.find(rack); rit != racks.end()) {
|
||||
eps = rit->second;
|
||||
eps.erase(ep);
|
||||
if (eps.empty()) {
|
||||
auto& rack_eps = rit->second;
|
||||
rack_eps.erase(ep);
|
||||
if (rack_eps.empty()) {
|
||||
racks.erase(rit);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -178,6 +178,51 @@ SEASTAR_THREAD_TEST_CASE(test_update_node) {
|
||||
BOOST_REQUIRE_EQUAL(node->get_state(), locator::node::state::left);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_remove_endpoint) {
|
||||
using dc_endpoints_t = std::unordered_map<sstring, std::unordered_set<inet_address>>;
|
||||
using dc_racks_t = std::unordered_map<sstring, std::unordered_map<sstring, std::unordered_set<inet_address>>>;
|
||||
using dcs_t = std::unordered_set<sstring>;
|
||||
|
||||
const auto id1 = host_id::create_random_id();
|
||||
const auto ep1 = gms::inet_address("127.0.0.1");
|
||||
const auto id2 = host_id::create_random_id();
|
||||
const auto ep2 = gms::inet_address("127.0.0.2");
|
||||
const auto dc_rack1 = endpoint_dc_rack {
|
||||
.dc = "dc1",
|
||||
.rack = "rack1"
|
||||
};
|
||||
const auto dc_rack2 = endpoint_dc_rack {
|
||||
.dc = "dc1",
|
||||
.rack = "rack2"
|
||||
};
|
||||
|
||||
utils::fb_utilities::set_broadcast_address(ep1);
|
||||
topology::config cfg = {
|
||||
.this_host_id = id1,
|
||||
.this_endpoint = ep1,
|
||||
.local_dc_rack = dc_rack1
|
||||
};
|
||||
|
||||
auto topo = topology(cfg);
|
||||
|
||||
topo.add_node(id1, ep1, dc_rack1, node::state::normal);
|
||||
topo.add_node(id2, ep2, dc_rack2, node::state::normal);
|
||||
|
||||
BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{{"dc1", {ep1, ep2}}}));
|
||||
BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{{"dc1", {{"rack1", {ep1}}, {"rack2", {ep2}}}}}));
|
||||
BOOST_REQUIRE_EQUAL(topo.get_datacenters(), (dcs_t{"dc1"}));
|
||||
|
||||
topo.remove_endpoint(ep2);
|
||||
BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{{"dc1", {ep1}}}));
|
||||
BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{{"dc1", {{"rack1", {ep1}}}}}));
|
||||
BOOST_REQUIRE_EQUAL(topo.get_datacenters(), (dcs_t{"dc1"}));
|
||||
|
||||
topo.remove_endpoint(ep1);
|
||||
BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{}));
|
||||
BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{}));
|
||||
BOOST_REQUIRE_EQUAL(topo.get_datacenters(), (dcs_t{"dc1"}));
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_load_sketch) {
|
||||
inet_address ip1("192.168.0.1");
|
||||
inet_address ip2("192.168.0.2");
|
||||
|
||||
Reference in New Issue
Block a user