From 1f58c1e76216104fdce28c68cfa6724091b444f1 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Mon, 12 Jun 2023 18:17:50 +0200 Subject: [PATCH] storage_service: remove optimization in cleanup_group0_config_if_needed The `topology_coordinator::cleanup_group0_config_if_needed` function first checks whether the number of group 0 members is larger than the number of non-left entries in the topology table, then attempts to remove nodes in left state from group 0 and prints a warning if no such nodes are found. There are some problems with this check: - Currently, a node is added to group 0 before it inserts its entry to the topology table. Such a node may cause the check to succeed but no nodes will be removed, which will cause the warning to be printed needlessly. - Cluster features on raft will reverse the situation and it will be possible for an entry in system.topology to exist without the corresponding node being a part of group 0. This, in turn, may cause the check not to pass when it should and nodes could be removed later than necessary. This commit gets rid of the optimization and the warning, and the topology coordinator will always compute the set of nodes that should be removed. Additionally, the set of nodes to remove is now computed differently: instead of iterating over left nodes and including only those that are in group 0, we now iterate over group 0 members and include those that are in `left` state. As the number of left nodes can potentially grow unbounded and the number of group 0 members is more likely to be bounded, this should give better performance in long-running clusters. --- service/storage_service.cc | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 68294d8016..3df5ab8bcb 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -714,27 +714,22 @@ class topology_coordinator { future<> cleanup_group0_config_if_needed() { auto& topo = _topo_sm._topology; auto rconf = _group0.group0_server().get_configuration(); - if (rconf.current.size() > topo.normal_nodes.size() + topo.new_nodes.size() + topo.transition_nodes.size()) { - // Raft config is larger than the sum of all nodes not in 'left' state. + if (!rconf.is_joint()) { // Find nodes that 'left' but still in the config and remove them - size_t found = 0; - co_await coroutine::parallel_for_each(topo.left_nodes | boost::adaptors::filtered( - [&rconf] (raft::server_id id) { return rconf.contains(id); }), - [&] (raft::server_id id) -> future<> { - found++; + auto to_remove = boost::copy_range>( + rconf.current + | boost::adaptors::transformed([&] (const raft::config_member& m) { return m.addr.id; }) + | boost::adaptors::filtered([&] (const raft::server_id& id) { return topo.left_nodes.contains(id); })); + if (!to_remove.empty()) { // Remove from group 0 nodes that left. They may failed to do so by themselves try { slogger.trace("raft topology: topology coordinator fiber removing {}" - " from the raft since it is in `left` state", id); - co_await _group0.group0_server().modify_config({}, {id}, &_as); + " from raft since they are in `left` state", to_remove); + co_await _group0.group0_server().modify_config({}, to_remove, &_as); } catch (const raft::commit_status_unknown&) { slogger.trace("raft topology: topology coordinator fiber got unknown status" - " while removing {} from the raft", id); + " while removing {} from raft", to_remove); } - }); - if (!found) { - slogger.warn("raft topology: raft config is larger then sum of all nodes" - " in non left state but no nodes in left state were found"); } } }