diff --git a/service/raft/group0_voter_handler.cc b/service/raft/group0_voter_handler.cc index 611ad830d9..73ed792690 100644 --- a/service/raft/group0_voter_handler.cc +++ b/service/raft/group0_voter_handler.cc @@ -496,6 +496,20 @@ future<> group0_voter_handler::update_nodes( group0_voter_calculator::nodes_list_t nodes; + auto get_replica_state = [this](const raft::server_id& id, bool allow_transitioning) -> const replica_state* { + if (const auto it = _topology.normal_nodes.find(id); it != _topology.normal_nodes.end()) { + return &it->second; + } + rvlogger.debug("Node {} is not present in the normal nodes", id); + if (allow_transitioning) { + if (const auto it = _topology.transition_nodes.find(id); it != _topology.transition_nodes.end()) { + return &it->second; + } + rvlogger.debug("Node {} is not present in the transitioning nodes", id); + } + return nullptr; + }; + // Helper for adding a single node to the nodes list auto add_node = [&nodes, &group0_config, &leader_id](const raft::server_id& id, const replica_state& rs, bool is_alive) { const auto is_voter = group0_config.can_vote(id); @@ -510,17 +524,16 @@ future<> group0_voter_handler::update_nodes( }; // Helper function to add nodes from a gossiper node set - auto add_nodes_from_list = [this, &add_node](const std::set& nodes_set, bool is_alive) { + auto add_nodes_from_list = [&add_node, &get_replica_state, &group0_config](const std::set& nodes_set, bool is_alive) { for (const auto& host_id : nodes_set) { const raft::server_id id{host_id.uuid()}; - const auto it = _topology.normal_nodes.find(id); - if (it == _topology.normal_nodes.end()) { - // This is expected, as nodes present in the gossiper may not be present - // in the topology yet or anymore. - rvlogger.debug("Node {} not found in the topology", id); + // We only allow transitioning nodes to be added if they are already voters. + const bool allow_transitioning = group0_config.can_vote(id); + const auto* node = get_replica_state(id, allow_transitioning); + if (!node) { continue; } - add_node(id, it->second, is_alive); + add_node(id, *node, is_alive); } }; @@ -535,12 +548,14 @@ future<> group0_voter_handler::update_nodes( rvlogger.debug("Node {} to be added is already a member", id); continue; } - const auto it = _topology.normal_nodes.find(id); - if (it == _topology.normal_nodes.end()) { + // When adding a node, it may not be present in normal nodes yet, but might be joining + // and be in the transitioning state. + const auto* node = get_replica_state(id, true); + if (!node) { rvlogger.warn("Node {} was not found in the topology, can't add as a voter candidate", id); continue; } - add_node(id, it->second, _gossiper.is_alive(locator::host_id{id.uuid()})); + add_node(id, *node, _gossiper.is_alive(locator::host_id{id.uuid()})); } std::unordered_set voters_add; diff --git a/service/topology_coordinator.cc b/service/topology_coordinator.cc index 98b18aa34e..e16cf4f295 100644 --- a/service/topology_coordinator.cc +++ b/service/topology_coordinator.cc @@ -2137,8 +2137,8 @@ class topology_coordinator : public endpoint_lifecycle_subscriber { .set("node_state", node_state::normal); rtbuilder.done(); auto reason = ::format("bootstrap: joined a zero-token node {}", node.id); - co_await update_topology_state(std::move(guard), {builder.build(), rtbuilder.build()}, reason); co_await _voter_handler.on_node_added(node.id, _as); + co_await update_topology_state(std::move(guard), {builder.build(), rtbuilder.build()}, reason); break; } @@ -2191,9 +2191,9 @@ class topology_coordinator : public endpoint_lifecycle_subscriber { builder2.with_node(replaced_id) .set("node_state", node_state::left); rtbuilder.done(); + co_await _voter_handler.on_node_added(node.id, _as); co_await update_topology_state(take_guard(std::move(node)), {builder.build(), builder2.build(), rtbuilder.build()}, fmt::format("replace: replaced node {} with the new zero-token node {}", replaced_id, node.id)); - co_await _voter_handler.on_node_added(node.id, _as); break; } @@ -2455,14 +2455,13 @@ class topology_coordinator : public endpoint_lifecycle_subscriber { .set("node_state", node_state::normal); muts.emplace_back(builder.build()); muts.emplace_back(rtbuilder.build()); - co_await update_topology_state(take_guard(std::move(node)), std::move(muts), - "bootstrap: read fence completed"); + co_await _voter_handler.on_node_added(node.id, _as); + co_await update_topology_state(take_guard(std::move(node)), std::move(muts), "bootstrap: read fence completed"); // Make sure the load balancer knows the capacity for the new node immediately. (void)_tablet_load_stats_refresh.trigger().handle_exception([] (auto ep) { rtlogger.warn("Error during tablet load stats refresh: {}", ep); }); } - co_await _voter_handler.on_node_added(node.id, _as); break; case node_state::removing: { co_await utils::get_local_injector().inject("delay_node_removal", utils::wait_for_message(std::chrono::minutes(5))); @@ -2517,10 +2516,10 @@ class topology_coordinator : public endpoint_lifecycle_subscriber { muts.push_back(rtbuilder.build()); co_await db::view::view_builder::generate_mutations_on_node_left(_db, _sys_ks, node.guard.write_timestamp(), locator::host_id(replaced_node_id.uuid()), muts); + co_await _voter_handler.on_node_added(node.id, _as); co_await update_topology_state(take_guard(std::move(node)), std::move(muts), "replace: read fence completed"); } - co_await _voter_handler.on_node_added(node.id, _as); break; default: on_fatal_internal_error(rtlogger, ::format( @@ -2669,10 +2668,10 @@ class topology_coordinator : public endpoint_lifecycle_subscriber { auto str = fmt::format("complete rollback of {} to state normal after {} failure", node.id, node.rs->state); + co_await _voter_handler.on_node_added(node.id, _as); + rtlogger.info("{}", str); co_await update_topology_state(std::move(node.guard), {builder.build(), rtbuilder.build()}, str); - - co_await _voter_handler.on_node_added(node.id, _as); } break; case topology::transition_state::truncate_table: