From aa87fecce2ea5beb34d8c2776f7e2161e6b3ecba Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 30 Oct 2024 15:51:21 +0200 Subject: [PATCH] gossiper: add is_alive that works on host_id The function checks if a node with provided id is alive. If it fails to map id to ip or there is no state for the ip found the node is considered to be dead. --- gms/gossiper.cc | 24 ++++++++++++++++++++++++ gms/gossiper.hh | 3 +++ 2 files changed, 27 insertions(+) diff --git a/gms/gossiper.cc b/gms/gossiper.cc index 9671b50a95..ec642f3f89 100644 --- a/gms/gossiper.cc +++ b/gms/gossiper.cc @@ -2410,6 +2410,30 @@ bool gossiper::is_alive(inet_address ep) const { return is_alive; } +bool gossiper::is_alive(locator::host_id id) const { + auto ip_opt = _address_map.find(id); + + if (!ip_opt) { + // if host ID is not in the gossiper state (and hence not in the address map) it is dead + return false; + } + + auto ep = get_endpoint_state_ptr(*ip_opt); + if (!ep) { + // _address_map may have stale entry since we rely on gc to remove entries there + // FIXME: add function to address_map to remove immediately + return false; + } + + if (id != ep->get_host_id()) { + // If IDs do not match it means that the node with provided ID was replaced + // with a new node with same IP address and hence it is dead + return false; + } + + return is_alive(*ip_opt); +} + future<> gossiper::wait_alive(std::vector nodes, std::chrono::milliseconds timeout) { return wait_alive([nodes = std::move(nodes)] { return nodes; }, timeout); } diff --git a/gms/gossiper.hh b/gms/gossiper.hh index 5c5c44fd9b..a7cee90935 100644 --- a/gms/gossiper.hh +++ b/gms/gossiper.hh @@ -13,6 +13,7 @@ #include #include #include +#include "locator/host_id.hh" #include "utils/atomic_vector.hh" #include "utils/UUID.hh" #include "gms/generation-number.hh" @@ -516,6 +517,8 @@ private: public: bool is_alive(inet_address ep) const; + bool is_alive(locator::host_id id) const; + bool is_dead_state(const endpoint_state& eps) const; // Wait for nodes to be alive on all shards future<> wait_alive(std::vector nodes, std::chrono::milliseconds timeout);