From d28c841fa93fea5e42da89ef8d352a91fef57693 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Tue, 27 Jan 2026 15:48:05 +0100 Subject: [PATCH] raft topology: generate notification about released nodes only once Hints destined for some other node can only be drained after the other node is no longer a replica of any vnode or tablet. In case when tablets are present, a node might still technically be a replica of some tablets after it moved to left state. When it no longer is a replica of any tablet, it becomes "released" and storage service generates a notification about it. Hinted handoff listens to this notification and kicks off draining hints after getting it. The current implementation of the "released" notification would trigger every time raft topology state is reloaded and a left node without any tokens is present in the raft topology. Although draining hints is idempotent, generating duplicate notifications is wasteful and recently became very noisy after in 44de563 verbosity of the draining-related log messages have been increased. The verbosity increase itself makes sense as draining is supposed to be a rare operation, but the duplicate notification bug now needs to be addressed. Fix the duplicate notification problem by passing the list of previously released nodes to the `storage_service::raft_topology_update_ip` function and filtering based on it. If this function processes the topology state for the first time, it will not produce any notifications. This is fine as hinted handoff is prepared to detect "released" nodes during the startup sequence in main.cc and start draining the hints there, if needed. Fixes: #28301 Refs: #25031 --- service/storage_service.cc | 13 ++++++++++--- service/storage_service.hh | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 059ec0966d..1554326bef 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -541,7 +541,7 @@ static std::unordered_set get_released_nodes(const service::to // Synchronizes the local node state (token_metadata, system.peers/system.local tables, // gossiper) to align it with the other raft topology nodes. -future storage_service::sync_raft_topology_nodes(mutable_token_metadata_ptr tmptr, std::unordered_set prev_normal) { +future storage_service::sync_raft_topology_nodes(mutable_token_metadata_ptr tmptr, std::unordered_set prev_normal, std::optional> prev_released) { nodes_to_notify_after_sync nodes_to_notify; rtlogger.trace("Start sync_raft_topology_nodes"); @@ -695,8 +695,11 @@ future storage_service::sync_raft_t } } - const auto nodes_to_release = get_released_nodes(t, *tmptr); + if (prev_released) { + auto nodes_to_release = get_released_nodes(t, *tmptr); + std::erase_if(nodes_to_release, [&] (const auto& host_id) { return prev_released->contains(host_id); }); std::copy(nodes_to_release.begin(), nodes_to_release.end(), std::back_inserter(nodes_to_notify.released)); + } co_await when_all_succeed(sys_ks_futures.begin(), sys_ks_futures.end()).discard_result(); @@ -733,6 +736,10 @@ future<> storage_service::topology_state_load(state_change_hint hint) { rtlogger.debug("reload raft topology state"); std::unordered_set prev_normal = _topology_state_machine._topology.normal_nodes | std::views::keys | std::ranges::to(); + std::optional> prev_released; + if (!_topology_state_machine._topology.is_empty()) { + prev_released = get_released_nodes(_topology_state_machine._topology, get_token_metadata()); + } std::unordered_set tablet_hosts = co_await replica::read_required_hosts(_qp); @@ -833,7 +840,7 @@ future<> storage_service::topology_state_load(state_change_hint hint) { }, topology.tstate); tmptr->set_read_new(read_new); - auto nodes_to_notify = co_await sync_raft_topology_nodes(tmptr, std::move(prev_normal)); + auto nodes_to_notify = co_await sync_raft_topology_nodes(tmptr, std::move(prev_normal), std::move(prev_released)); std::optional tablets; if (hint.tablets_hint) { diff --git a/service/storage_service.hh b/service/storage_service.hh index 02852b8bfb..b29ff5f536 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -1115,7 +1115,7 @@ private: // gossiper) to align it with the other raft topology nodes. // Optional target_node can be provided to restrict the synchronization to the specified node. // Returns a structure that describes which notifications to trigger after token metadata is updated. - future sync_raft_topology_nodes(mutable_token_metadata_ptr tmptr, std::unordered_set prev_normal); + future sync_raft_topology_nodes(mutable_token_metadata_ptr tmptr, std::unordered_set prev_normal, std::optional> prev_released); // Triggers notifications (on_joined, on_left) based on the recent changes to token metadata, as described by the passed in structure. // This function should be called on the result of `sync_raft_topology_nodes`, after the global token metadata is updated. future<> notify_nodes_after_sync(nodes_to_notify_after_sync&& nodes_to_notify);