From 50ee9620334fcc45e442c6258001c4941adff84a Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 12 Jan 2025 11:09:02 +0200 Subject: [PATCH 01/57] service: address_map: add lookup function that expects address to exist We will add code that expects id to ip mapping to exist. If it does not it is better to fail earlier during testing, so add a function that calls internal error in case there is no mapping. --- service/address_map.hh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/service/address_map.hh b/service/address_map.hh index e13c01f0d5..c3716ed82c 100644 --- a/service/address_map.hh +++ b/service/address_map.hh @@ -298,6 +298,15 @@ public: return entry._addr; } + // Same as find() above but expects mapping to exist + gms::inet_address get(locator::host_id id) const { + try { + return find(id).value(); + } catch (std::bad_optional_access& err) { + on_internal_error(rslog, fmt::format("No ip address for {} when one is expected", id)); + } + } + // Find an id with a given mapping. // // If a mapping is expiring, the last access timestamp is updated automatically. From 0d4d066fe39aa0c3a24141dc6cb4954f31b764b9 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 5 Dec 2024 16:08:33 +0200 Subject: [PATCH 02/57] hints: simplify can_send() function Since there is gossiper::is_alive version that works on host_id now there is no need to convert _ep_key to ip which simplifies the code a lot. --- db/hints/internal/hint_sender.cc | 33 ++++++++------------------------ 1 file changed, 8 insertions(+), 25 deletions(-) diff --git a/db/hints/internal/hint_sender.cc b/db/hints/internal/hint_sender.cc index b6dc0cf033..cc15c54ff7 100644 --- a/db/hints/internal/hint_sender.cc +++ b/db/hints/internal/hint_sender.cc @@ -82,33 +82,16 @@ bool hint_sender::can_send() noexcept { } const auto tmptr = _shard_manager._proxy.get_token_metadata_ptr(); - const auto maybe_ep = std::invoke([&] () noexcept -> std::optional { - try { - return tmptr->get_endpoint_for_host_id_if_known(_ep_key); - } catch (...) { - return std::nullopt; - } - }); - try { - // `hint_sender` can never target this node, so if the returned optional is empty, - // that must mean the current locator::token_metadata doesn't store the information - // about the target node. - if (maybe_ep && _gossiper.is_alive(*maybe_ep)) { - _state.remove(state::ep_state_left_the_ring); - return true; - } else { - if (!_state.contains(state::ep_state_left_the_ring)) { - _state.set_if(!tmptr->is_normal_token_owner(_ep_key)); - } - // If the node is not part of the ring, we will send hints to all new replicas. - // Note that if the optional -- `maybe_ep` -- is empty, that could mean that `_ep_key` - // is the locator::host_id of THIS node. However, that's impossible because instances - // of `hint_sender` are only created for OTHER nodes, so this logic is correct. - return _state.contains(state::ep_state_left_the_ring); + if (_gossiper.is_alive(_ep_key)) { + _state.remove(state::ep_state_left_the_ring); + return true; + } else { + if (!_state.contains(state::ep_state_left_the_ring)) { + _state.set_if(!tmptr->is_normal_token_owner(_ep_key)); } - } catch (...) { - return false; + // If the node is not part of the ring, we will send hints to all new replicas. + return _state.contains(state::ep_state_left_the_ring); } } From 755ee9a2c565ded8770c6f23f359ca15fdf6ded6 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 5 Dec 2024 16:30:53 +0200 Subject: [PATCH 03/57] api: do not use token_metadata to retrieve ip to id mapping in token_metadata RESTful endpoints We want to drop ip knowledge from the token_metadata, so use gossiper to retrieve the mapping instead. --- api/api.cc | 4 ++-- api/api_init.hh | 2 +- api/token_metadata.cc | 45 ++++++++++++++++++++++++++------------- api/token_metadata.hh | 3 ++- locator/token_metadata.cc | 30 +++++++++----------------- locator/token_metadata.hh | 2 +- main.cc | 2 +- 7 files changed, 47 insertions(+), 41 deletions(-) diff --git a/api/api.cc b/api/api.cc index 86be4257ef..1753153f87 100644 --- a/api/api.cc +++ b/api/api.cc @@ -188,8 +188,8 @@ future<> unset_server_snapshot(http_context& ctx) { return ctx.http_server.set_routes([&ctx] (routes& r) { unset_snapshot(ctx, r); }); } -future<> set_server_token_metadata(http_context& ctx, sharded& tm) { - return ctx.http_server.set_routes([&ctx, &tm] (routes& r) { set_token_metadata(ctx, r, tm); }); +future<> set_server_token_metadata(http_context& ctx, sharded& tm, sharded& g) { + return ctx.http_server.set_routes([&ctx, &tm, &g] (routes& r) { set_token_metadata(ctx, r, tm, g); }); } future<> unset_server_token_metadata(http_context& ctx) { diff --git a/api/api_init.hh b/api/api_init.hh index 7c74f69ba9..9cd9193645 100644 --- a/api/api_init.hh +++ b/api/api_init.hh @@ -112,7 +112,7 @@ future<> set_server_authorization_cache(http_context& ctx, sharded unset_server_authorization_cache(http_context& ctx); future<> set_server_snapshot(http_context& ctx, sharded& snap_ctl); future<> unset_server_snapshot(http_context& ctx); -future<> set_server_token_metadata(http_context& ctx, sharded& tm); +future<> set_server_token_metadata(http_context& ctx, sharded& tm, sharded& g); future<> unset_server_token_metadata(http_context& ctx); future<> set_server_gossip(http_context& ctx, sharded& g); future<> unset_server_gossip(http_context& ctx); diff --git a/api/token_metadata.cc b/api/token_metadata.cc index a8c3234bef..081388d329 100644 --- a/api/token_metadata.cc +++ b/api/token_metadata.cc @@ -10,6 +10,7 @@ #include "api/api-doc/storage_service.json.hh" #include "api/api-doc/endpoint_snitch_info.json.hh" #include "locator/token_metadata.hh" +#include "gms/gossiper.hh" using namespace seastar::httpd; @@ -18,7 +19,7 @@ namespace api { namespace ss = httpd::storage_service_json; using namespace json; -void set_token_metadata(http_context& ctx, routes& r, sharded& tm) { +void set_token_metadata(http_context& ctx, routes& r, sharded& tm, sharded& g) { ss::local_hostid.set(r, [&tm](std::unique_ptr req) { auto id = tm.local().get()->get_my_id(); if (!bool(id)) { @@ -33,22 +34,25 @@ void set_token_metadata(http_context& ctx, routes& r, sharded req) { + ss::get_node_tokens.set(r, [&tm, &g] (std::unique_ptr req) { gms::inet_address addr(req->get_path_param("endpoint")); auto& local_tm = *tm.local().get(); - const auto host_id = local_tm.get_host_id_if_known(addr); + std::optional host_id; + try { + host_id = g.local().get_host_id(addr); + } catch (...) {} return make_ready_future(stream_range_as_array(host_id ? local_tm.get_tokens(*host_id): std::vector{}, [](const dht::token& i) { return fmt::to_string(i); })); }); - ss::get_leaving_nodes.set(r, [&tm](const_req req) { + ss::get_leaving_nodes.set(r, [&tm, &g](const_req req) { const auto& local_tm = *tm.local().get(); const auto& leaving_host_ids = local_tm.get_leaving_endpoints(); std::unordered_set eps; eps.reserve(leaving_host_ids.size()); for (const auto host_id: leaving_host_ids) { - eps.insert(local_tm.get_endpoint_for_host_id(host_id)); + eps.insert(g.local().get_address_map().get(host_id)); } return container_to_vec(eps); }); @@ -58,20 +62,23 @@ void set_token_metadata(http_context& ctx, routes& r, sharded eps; eps.reserve(points.size()); for (const auto& [token, host_id]: points) { - eps.insert(local_tm.get_endpoint_for_host_id(host_id)); + eps.insert(g.local().get_address_map().get(host_id)); } return container_to_vec(eps); }); - ss::get_host_id_map.set(r, [&tm](const_req req) { + ss::get_host_id_map.set(r, [&tm, &g](const_req req) { std::vector res; - return map_to_key_value(tm.local().get()->get_endpoint_to_host_id_map(), res); + auto map = tm.local().get()->get_host_ids() | + std::views::transform([&g] (locator::host_id id) { return std::make_pair(g.local().get_address_map().get(id), id); }) | + std::ranges::to(); + return map_to_key_value(std::move(map), res); }); static auto host_or_broadcast = [&tm](const_req req) { @@ -79,26 +86,34 @@ void set_token_metadata(http_context& ctx, routes& r, shardedget_topology().my_address() : gms::inet_address(host); }; - httpd::endpoint_snitch_info_json::get_datacenter.set(r, [&tm](const_req req) { + httpd::endpoint_snitch_info_json::get_datacenter.set(r, [&tm, &g](const_req req) { auto& topology = tm.local().get()->get_topology(); auto ep = host_or_broadcast(req); - if (!topology.has_endpoint(ep)) { + std::optional host_id; + try { + host_id = g.local().get_host_id(ep); + } catch (...) {} + if (!host_id || !topology.has_node(*host_id)) { // Cannot return error here, nodetool status can race, request // info about just-left node and not handle it nicely return locator::endpoint_dc_rack::default_location.dc; } - return topology.get_datacenter(ep); + return topology.get_datacenter(*host_id); }); - httpd::endpoint_snitch_info_json::get_rack.set(r, [&tm](const_req req) { + httpd::endpoint_snitch_info_json::get_rack.set(r, [&tm, &g](const_req req) { auto& topology = tm.local().get()->get_topology(); auto ep = host_or_broadcast(req); - if (!topology.has_endpoint(ep)) { + std::optional host_id; + try { + host_id = g.local().get_host_id(ep); + } catch (...) {} + if (!host_id || !topology.has_node(*host_id)) { // Cannot return error here, nodetool status can race, request // info about just-left node and not handle it nicely return locator::endpoint_dc_rack::default_location.rack; } - return topology.get_rack(ep); + return topology.get_rack(*host_id); }); } diff --git a/api/token_metadata.hh b/api/token_metadata.hh index 0bab6d999f..3e804050fc 100644 --- a/api/token_metadata.hh +++ b/api/token_metadata.hh @@ -15,10 +15,11 @@ class routes; } namespace locator { class shared_token_metadata; } +namespace gms { class gossiper; } namespace api { struct http_context; -void set_token_metadata(http_context& ctx, seastar::httpd::routes& r, seastar::sharded& tm); +void set_token_metadata(http_context& ctx, seastar::httpd::routes& r, seastar::sharded& tm, seastar::sharded& g); void unset_token_metadata(http_context& ctx, seastar::httpd::routes& r); } diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index 35c7efd792..3c369e44ce 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -162,8 +162,8 @@ public: /** Return the end-point for a unique host ID.*/ inet_address get_endpoint_for_host_id(host_id) const; - /** @return a copy of the endpoint-to-id map for read-only operations */ - std::unordered_map get_endpoint_to_host_id_map() const; + /** @return a copy of host id set for read-only operations */ + std::unordered_set get_host_ids() const; void add_bootstrap_token(token t, host_id endpoint); @@ -567,21 +567,11 @@ inet_address token_metadata_impl::get_endpoint_for_host_id(host_id host_id) cons } } -std::unordered_map token_metadata_impl::get_endpoint_to_host_id_map() const { - const auto& nodes = _topology.get_nodes_by_endpoint(); - std::unordered_map map; - map.reserve(nodes.size()); - for (const auto& [endpoint, node] : nodes) { - if (node.get().left() || node.get().is_none()) { - continue; - } - if (const auto& host_id = node.get().host_id()) { - map[endpoint] = host_id; - } else { - tlogger.info("get_endpoint_to_host_id_map: endpoint {} has null host_id: state={}", endpoint, node.get().get_state()); - } - } - return map; +std::unordered_set token_metadata_impl::get_host_ids() const { + return _topology.get_nodes() | + std::views::filter([&] (const node& n) { return !n.left() && !n.is_none(); }) | + std::views::transform([] (const node& n) { return n.host_id(); }) | + std::ranges::to(); } bool token_metadata_impl::is_normal_token_owner(host_id endpoint) const { @@ -1067,9 +1057,9 @@ token_metadata::get_endpoint_for_host_id(host_id host_id) const { return _impl->get_endpoint_for_host_id(host_id); } -std::unordered_map -token_metadata::get_endpoint_to_host_id_map() const { - return _impl->get_endpoint_to_host_id_map(); +std::unordered_set +token_metadata::get_host_ids() const { + return _impl->get_host_ids(); } void diff --git a/locator/token_metadata.hh b/locator/token_metadata.hh index 0f604bdf85..541c91f086 100644 --- a/locator/token_metadata.hh +++ b/locator/token_metadata.hh @@ -245,7 +245,7 @@ public: inet_address get_endpoint_for_host_id(locator::host_id host_id) const; /** @return a copy of the endpoint-to-id map for read-only operations */ - std::unordered_map get_endpoint_to_host_id_map() const; + std::unordered_set get_host_ids() const; /// Returns host_id of the local node. host_id get_my_id() const; diff --git a/main.cc b/main.cc index fbc7ac992e..6a7a3b66d0 100644 --- a/main.cc +++ b/main.cc @@ -1093,7 +1093,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl // token_metadata.stop().get(); //}); - api::set_server_token_metadata(ctx, token_metadata).get(); + api::set_server_token_metadata(ctx, token_metadata, gossiper).get(); auto stop_tokens_api = defer_verbose_shutdown("token metadata API", [&ctx] { api::unset_server_token_metadata(ctx).get(); }); From 4d7c05ad8207e9e5f3ccbc8119aee8a9301e75be Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 8 Dec 2024 12:05:37 +0200 Subject: [PATCH 04/57] hints: move create_hint_sync_point function to host ids One of its caller is in the RESTful API which gets ips from the user, so we convert ips to ids inside the API handler using gossiper before calling the function. We need to deprecate ip based API and move to host id based. --- api/api.cc | 6 +++--- api/api_init.hh | 2 +- api/hinted_handoff.cc | 13 +++++++------ api/hinted_handoff.hh | 3 ++- db/hints/manager.cc | 13 +++---------- db/hints/manager.hh | 2 +- main.cc | 2 +- repair/row_level.cc | 2 +- service/storage_proxy.cc | 2 +- service/storage_proxy.hh | 2 +- 10 files changed, 21 insertions(+), 26 deletions(-) diff --git a/api/api.cc b/api/api.cc index 1753153f87..670750e684 100644 --- a/api/api.cc +++ b/api/api.cc @@ -273,10 +273,10 @@ future<> unset_server_cache(http_context& ctx) { return ctx.http_server.set_routes([&ctx] (routes& r) { unset_cache_service(ctx, r); }); } -future<> set_hinted_handoff(http_context& ctx, sharded& proxy) { +future<> set_hinted_handoff(http_context& ctx, sharded& proxy, sharded& g) { return register_api(ctx, "hinted_handoff", - "The hinted handoff API", [&proxy] (http_context& ctx, routes& r) { - set_hinted_handoff(ctx, r, proxy); + "The hinted handoff API", [&proxy, &g] (http_context& ctx, routes& r) { + set_hinted_handoff(ctx, r, proxy, g); }); } diff --git a/api/api_init.hh b/api/api_init.hh index 9cd9193645..205090c1cb 100644 --- a/api/api_init.hh +++ b/api/api_init.hh @@ -124,7 +124,7 @@ future<> set_server_storage_proxy(http_context& ctx, sharded unset_server_storage_proxy(http_context& ctx); future<> set_server_stream_manager(http_context& ctx, sharded& sm); future<> unset_server_stream_manager(http_context& ctx); -future<> set_hinted_handoff(http_context& ctx, sharded& p); +future<> set_hinted_handoff(http_context& ctx, sharded& p, sharded& g); future<> unset_hinted_handoff(http_context& ctx); future<> set_server_cache(http_context& ctx); future<> unset_server_cache(http_context& ctx); diff --git a/api/hinted_handoff.cc b/api/hinted_handoff.cc index 73cd41baff..00fc513bf2 100644 --- a/api/hinted_handoff.cc +++ b/api/hinted_handoff.cc @@ -14,6 +14,7 @@ #include "gms/inet_address.hh" #include "service/storage_proxy.hh" +#include "gms/gossiper.hh" namespace api { @@ -21,18 +22,18 @@ using namespace json; using namespace seastar::httpd; namespace hh = httpd::hinted_handoff_json; -void set_hinted_handoff(http_context& ctx, routes& r, sharded& proxy) { - hh::create_hints_sync_point.set(r, [&proxy] (std::unique_ptr req) -> future { - auto parse_hosts_list = [] (sstring arg) { +void set_hinted_handoff(http_context& ctx, routes& r, sharded& proxy, sharded& g) { + hh::create_hints_sync_point.set(r, [&proxy, &g] (std::unique_ptr req) -> future { + auto parse_hosts_list = [&g] (sstring arg) { std::vector hosts_str = split(arg, ","); - std::vector hosts; + std::vector hosts; hosts.reserve(hosts_str.size()); for (const auto& host_str : hosts_str) { try { gms::inet_address host; host = gms::inet_address(host_str); - hosts.push_back(host); + hosts.push_back(g.local().get_host_id(host)); } catch (std::exception& e) { throw httpd::bad_param_exception(format("Failed to parse host address {}: {}", host_str, e.what())); } @@ -41,7 +42,7 @@ void set_hinted_handoff(http_context& ctx, routes& r, sharded target_hosts = parse_hosts_list(req->get_query_param("target_hosts")); + std::vector target_hosts = parse_hosts_list(req->get_query_param("target_hosts")); return proxy.local().create_hint_sync_point(std::move(target_hosts)).then([] (db::hints::sync_point sync_point) { return json::json_return_type(sync_point.encode()); }); diff --git a/api/hinted_handoff.hh b/api/hinted_handoff.hh index 7a83daa30d..13b9f5acb6 100644 --- a/api/hinted_handoff.hh +++ b/api/hinted_handoff.hh @@ -10,12 +10,13 @@ #include #include "api/api_init.hh" +#include "gms/gossiper.hh" namespace service { class storage_proxy; } namespace api { -void set_hinted_handoff(http_context& ctx, httpd::routes& r, sharded& p); +void set_hinted_handoff(http_context& ctx, httpd::routes& r, sharded& p, sharded& g); void unset_hinted_handoff(http_context& ctx, httpd::routes& r); } diff --git a/db/hints/manager.cc b/db/hints/manager.cc index 95d5ea0f2a..17332a0f52 100644 --- a/db/hints/manager.cc +++ b/db/hints/manager.cc @@ -266,21 +266,14 @@ void manager::forbid_hints_for_eps_with_pending_hints() { } } -sync_point::shard_rps manager::calculate_current_sync_point(std::span target_eps) const { +sync_point::shard_rps manager::calculate_current_sync_point(std::span target_eps) const { sync_point::shard_rps rps; - const auto tmptr = _proxy.get_token_metadata_ptr(); for (auto addr : target_eps) { - const auto hid = tmptr->get_host_id_if_known(addr); - // Ignore the IPs that we cannot map. - if (!hid) { - continue; - } - - auto it = _ep_managers.find(*hid); + auto it = _ep_managers.find(addr); if (it != _ep_managers.end()) { const hint_endpoint_manager& ep_man = it->second; - rps[*hid] = ep_man.last_written_replay_position(); + rps[addr] = ep_man.last_written_replay_position(); } } diff --git a/db/hints/manager.hh b/db/hints/manager.hh index 69d18199ed..afe7e96648 100644 --- a/db/hints/manager.hh +++ b/db/hints/manager.hh @@ -278,7 +278,7 @@ public: /// /// \param target_eps The list of endpoints the sync point should correspond to. When empty, the function assumes all endpoints. /// \return Sync point corresponding to the specified endpoints. - sync_point::shard_rps calculate_current_sync_point(std::span target_eps) const; + sync_point::shard_rps calculate_current_sync_point(std::span target_eps) const; /// \brief Waits until hint replay reach replay positions described in `rps`. future<> wait_for_sync_point(abort_source& as, const sync_point::shard_rps& rps); diff --git a/main.cc b/main.cc index 6a7a3b66d0..8ccb6d846c 100644 --- a/main.cc +++ b/main.cc @@ -2262,7 +2262,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl supervisor::notify("allow replaying hints"); proxy.invoke_on_all(&service::storage_proxy::allow_replaying_hints).get(); - api::set_hinted_handoff(ctx, proxy).get(); + api::set_hinted_handoff(ctx, proxy, gossiper).get(); auto stop_hinted_handoff_api = defer_verbose_shutdown("hinted handoff API", [&ctx] { api::unset_hinted_handoff(ctx).get(); }); diff --git a/repair/row_level.cc b/repair/row_level.cc index ac05787d91..ca70ae15f1 100644 --- a/repair/row_level.cc +++ b/repair/row_level.cc @@ -2317,7 +2317,7 @@ future repair_service::repair_flush_hints_ auto flush_time = now; if (cache_disabled || (now - _flush_hints_batchlog_time > cache_time)) { // Empty targets meants all nodes - db::hints::sync_point sync_point = co_await _sp.local().create_hint_sync_point(std::vector{}); + db::hints::sync_point sync_point = co_await _sp.local().create_hint_sync_point(std::vector{}); lowres_clock::time_point deadline = lowres_clock::now() + req.hints_timeout; try { bool bm_throw = utils::get_local_injector().enter("repair_flush_hints_batchlog_handler_bm_uninitialized"); diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc index da28f1059d..818de6f8e6 100644 --- a/service/storage_proxy.cc +++ b/service/storage_proxy.cc @@ -6795,7 +6795,7 @@ const db::hints::host_filter& storage_proxy::get_hints_host_filter() const { return _hints_manager.get_host_filter(); } -future storage_proxy::create_hint_sync_point(std::vector target_hosts) const { +future storage_proxy::create_hint_sync_point(std::vector target_hosts) const { db::hints::sync_point spoint; spoint.regular_per_shard_rps.resize(smp::count); spoint.mv_per_shard_rps.resize(smp::count); diff --git a/service/storage_proxy.hh b/service/storage_proxy.hh index 85756c3a09..6c1b0812df 100644 --- a/service/storage_proxy.hh +++ b/service/storage_proxy.hh @@ -710,7 +710,7 @@ public: future<> change_hints_host_filter(db::hints::host_filter new_filter); const db::hints::host_filter& get_hints_host_filter() const; - future create_hint_sync_point(std::vector target_hosts) const; + future create_hint_sync_point(std::vector target_hosts) const; future<> wait_for_hint_sync_point(const db::hints::sync_point spoint, clock_type::time_point deadline); const stats& get_stats() const { From f685c7d0afddc46e1159db6299212f1fc1e08ca9 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 8 Dec 2024 12:38:01 +0200 Subject: [PATCH 05/57] hints: use gossiper to map ip to id in wait_for_sync_point We want to drop ips from token_metadata so move to different API to map ip to id. --- db/hints/manager.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/db/hints/manager.cc b/db/hints/manager.cc index 17332a0f52..7cdb515352 100644 --- a/db/hints/manager.cc +++ b/db/hints/manager.cc @@ -335,10 +335,11 @@ future<> manager::wait_for_sync_point(abort_source& as, const sync_point::shard_ for (const auto& [addr, rp] : rps) { if (std::holds_alternative(addr)) { - const auto maybe_hid = tmptr->get_host_id_if_known(std::get(addr)); - // Ignore the IPs we cannot map. - if (maybe_hid) [[likely]] { - hid_rps.emplace(*maybe_hid, rp); + try { + const auto hid = _gossiper_anchor->get_host_id(std::get(addr)); + hid_rps.emplace(hid, rp); + } catch (...) { + // Ignore the IPs we cannot map. } } else { hid_rps.emplace(std::get(addr), rp); From 844cb090bf7a2761aed8ddcf025e013ee4470e0f Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 8 Dec 2024 13:25:24 +0200 Subject: [PATCH 06/57] view: do not use get_endpoint_for_host_id_if_known to check if a node is part of the topology Check directly in the topology instead. --- db/view/view.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/db/view/view.cc b/db/view/view.cc index 03e654d2df..7f73336234 100644 --- a/db/view/view.cc +++ b/db/view/view.cc @@ -2671,7 +2671,7 @@ future<> view_builder::migrate_to_v2(locator::token_metadata_ptr tmptr, db::syst // In the v1 table we may have left over rows that belong to nodes that were removed // and we didn't clean them, so do that now. auto host_id = row.get_as("host_id"); - if (!tmptr->get_endpoint_for_host_id_if_known(locator::host_id(host_id))) { + if (!tmptr->get_topology().find_node(locator::host_id(host_id))) { vlogger.warn("Dropping a row from view_build_status: host {} does not exist", host_id); continue; } @@ -3151,7 +3151,7 @@ future view_builder::check_view_build_ongoing(const locator::token_metadat return view_status(ks_name, cf_name).then([&tm] (view_statuses_type&& view_statuses) { return std::ranges::any_of(view_statuses, [&tm] (const view_statuses_type::value_type& view_status) { // Only consider status of known hosts. - return view_status.second == "STARTED" && tm.get_endpoint_for_host_id_if_known(view_status.first); + return view_status.second == "STARTED" && tm.get_topology().find_node(view_status.first); }); }); } From 8c85350d4b790806f8ced00a6936730e278138cd Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 8 Dec 2024 13:35:30 +0200 Subject: [PATCH 07/57] db/virtual_tables: use host id from the gossiper endpoint state in cluster_status table The state always has host id now, so there is no point to looks it up in the token metadata. --- db/virtual_tables.cc | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/db/virtual_tables.cc b/db/virtual_tables.cc index b42be72948..b9b3e0d5f0 100644 --- a/db/virtual_tables.cc +++ b/db/virtual_tables.cc @@ -75,7 +75,7 @@ public: std::vector muts; muts.reserve(gossiper.num_endpoints()); - gossiper.for_each_endpoint_state([&] (const gms::inet_address& endpoint, const gms::endpoint_state&) { + gossiper.for_each_endpoint_state([&] (const gms::inet_address& endpoint, const gms::endpoint_state& eps) { static thread_local auto s = build_schema(); mutation m(s, partition_key::from_single_value(*s, data_value(endpoint).serialize_nonnull())); row& cr = m.partition().clustered_row(*schema(), clustering_key::make_empty()).cells(); @@ -86,24 +86,20 @@ public: } set_cell(cr, "load", gossiper.get_application_state_value(endpoint, gms::application_state::LOAD)); - auto hostid = tm.get_host_id_if_known(endpoint); - if (hostid) { - if (ss.raft_topology_change_enabled() && !gossiper.is_shutdown(endpoint)) { - set_cell(cr, "status", boost::to_upper_copy(fmt::format("{}", ss.get_node_state(*hostid)))); - } - set_cell(cr, "host_id", hostid->uuid()); + auto hostid = eps.get_host_id(); + if (ss.raft_topology_change_enabled() && !gossiper.is_shutdown(endpoint)) { + set_cell(cr, "status", boost::to_upper_copy(fmt::format("{}", ss.get_node_state(hostid)))); } + set_cell(cr, "host_id", hostid.uuid()); - if (hostid) { - sstring dc = tm.get_topology().get_location(endpoint).dc; - set_cell(cr, "dc", dc); - } + sstring dc = tm.get_topology().get_location(endpoint).dc; + set_cell(cr, "dc", dc); if (ownership.contains(endpoint)) { set_cell(cr, "owns", ownership[endpoint]); } - set_cell(cr, "tokens", int32_t(hostid ? tm.get_tokens(*hostid).size() : 0)); + set_cell(cr, "tokens", int32_t(tm.get_tokens(hostid).size())); muts.push_back(freeze(std::move(m))); }); From ae821ba07a83380aa54c4407671ad5af9350bb1a Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 8 Dec 2024 14:13:03 +0200 Subject: [PATCH 08/57] repair: use gossiper to map ip to host id instead of token_metadata We want to drop ips from token_metadata so move to different API to map ip to id. --- repair/repair.cc | 26 +++++++++++++++----------- repair/task_manager_module.hh | 4 +++- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/repair/repair.cc b/repair/repair.cc index 67b68eb806..51c0aa8d7d 100644 --- a/repair/repair.cc +++ b/repair/repair.cc @@ -226,6 +226,7 @@ repair_neighbors::repair_neighbors(std::vector nodes, std::vec // Return all of the neighbors with whom we share the provided range. static std::vector get_neighbors( + const gms::gossiper& gossiper, const locator::effective_replication_map& erm, const sstring& ksname, query::range range, const std::vector& data_centers, @@ -282,17 +283,19 @@ static std::vector get_neighbors( } catch(...) { throw std::runtime_error(format("Unknown host specified: {}", host)); } - auto endpoint = erm.get_token_metadata().get_host_id_if_known(ip); - if (endpoint) { + + try { + auto endpoint = gossiper.get_host_id(ip); + if (endpoint == my_address) { found_me = true; - } else if (neighbor_set.contains(*endpoint)) { - ret.push_back(*endpoint); + } else if (neighbor_set.contains(endpoint)) { + ret.push_back(endpoint); // If same host is listed twice, don't add it again later - neighbor_set.erase(*endpoint); - } else { - rlogger.warn("Provided host ip {} has no corresponding host id", ip); + neighbor_set.erase(endpoint); } + } catch (...) { + rlogger.warn("Provided host ip {} has no corresponding host id", ip); } // Nodes which aren't neighbors for this range are ignored. // This allows the user to give a list of "good" nodes, where @@ -329,6 +332,7 @@ static std::vector get_neighbors( } static future> get_hosts_participating_in_repair( + const gms::gossiper& gossiper, const locator::effective_replication_map& erm, const sstring& ksname, const dht::token_range_vector& ranges, @@ -344,7 +348,7 @@ static future> get_hosts_participating_in_repair( participating_hosts.insert(my_address); co_await do_for_each(ranges, [&] (const dht::token_range& range) { - const auto nbs = get_neighbors(erm, ksname, range, data_centers, hosts, ignore_nodes); + const auto nbs = get_neighbors(gossiper, erm, ksname, range, data_centers, hosts, ignore_nodes); for (const auto& nb : nbs) { participating_hosts.insert(nb); } @@ -676,7 +680,7 @@ void repair::shard_repair_task_impl::check_in_abort_or_shutdown() { repair_neighbors repair::shard_repair_task_impl::get_repair_neighbors(const dht::token_range& range) { return neighbors.empty() ? - repair_neighbors(get_neighbors(*erm, _status.keyspace, range, data_centers, hosts, ignore_nodes, _small_table_optimization)) : + repair_neighbors(get_neighbors(gossiper, *erm, _status.keyspace, range, data_centers, hosts, ignore_nodes, _small_table_optimization)) : neighbors[range]; } @@ -1306,7 +1310,7 @@ future repair_service::do_repair_start(gms::gossip_address_map& addr_map, s } auto ranges_parallelism = options.ranges_parallelism == -1 ? std::nullopt : std::optional(options.ranges_parallelism); - auto task = co_await _repair_module->make_and_start_task({}, id, std::move(keyspace), "", germs, std::move(cfs), std::move(ranges), std::move(options.hosts), std::move(options.data_centers), std::move(ignore_nodes), small_table_optimization, ranges_parallelism); + auto task = co_await _repair_module->make_and_start_task({}, id, std::move(keyspace), "", germs, std::move(cfs), std::move(ranges), std::move(options.hosts), std::move(options.data_centers), std::move(ignore_nodes), small_table_optimization, ranges_parallelism, _gossiper.local()); co_return id.id; } @@ -1331,7 +1335,7 @@ future<> repair::user_requested_repair_task_impl::run() { auto normal_nodes = germs->get().get_token_metadata().get_normal_token_owners(); participants = std::list(normal_nodes.begin(), normal_nodes.end()); } else { - participants = get_hosts_participating_in_repair(germs->get(), keyspace, ranges, data_centers, hosts, ignore_nodes).get(); + participants = get_hosts_participating_in_repair(_gossiper, germs->get(), keyspace, ranges, data_centers, hosts, ignore_nodes).get(); } auto [hints_batchlog_flushed, flush_time] = rs.flush_hints(id, keyspace, cfs, ignore_nodes).get(); diff --git a/repair/task_manager_module.hh b/repair/task_manager_module.hh index c801ca1c3e..1fbad623a3 100644 --- a/repair/task_manager_module.hh +++ b/repair/task_manager_module.hh @@ -49,8 +49,9 @@ private: std::unordered_set _ignore_nodes; bool _small_table_optimization; std::optional _ranges_parallelism; + gms::gossiper& _gossiper; public: - user_requested_repair_task_impl(tasks::task_manager::module_ptr module, repair_uniq_id id, std::string keyspace, std::string entity, lw_shared_ptr germs, std::vector cfs, dht::token_range_vector ranges, std::vector hosts, std::vector data_centers, std::unordered_set ignore_nodes, bool small_table_optimization, std::optional ranges_parallelism) noexcept + user_requested_repair_task_impl(tasks::task_manager::module_ptr module, repair_uniq_id id, std::string keyspace, std::string entity, lw_shared_ptr germs, std::vector cfs, dht::token_range_vector ranges, std::vector hosts, std::vector data_centers, std::unordered_set ignore_nodes, bool small_table_optimization, std::optional ranges_parallelism, gms::gossiper& gossiper) noexcept : repair_task_impl(module, id.uuid(), id.id, "keyspace", std::move(keyspace), "", std::move(entity), tasks::task_id::create_null_id(), streaming::stream_reason::repair) , _germs(germs) , _cfs(std::move(cfs)) @@ -60,6 +61,7 @@ public: , _ignore_nodes(std::move(ignore_nodes)) , _small_table_optimization(small_table_optimization) , _ranges_parallelism(ranges_parallelism) + , _gossiper(gossiper) {} virtual tasks::is_abortable is_abortable() const noexcept override { From 448282dc93b080f27a011b503491a2bb6d43b438 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 8 Dec 2024 14:19:20 +0200 Subject: [PATCH 09/57] storage_proxy: used gossiper for map ip to host id in connection_dropped callback We want to drop ips from token_metadata so move to different API to map ip to id. --- service/storage_proxy.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc index 818de6f8e6..86a7958bcc 100644 --- a/service/storage_proxy.cc +++ b/service/storage_proxy.cc @@ -1058,7 +1058,9 @@ private: void connection_dropped(gms::inet_address addr, std::optional id) { slogger.debug("Drop hit rate info for {} because of disconnect", addr); if (!id) { - id = _sp.get_token_metadata_ptr()->get_host_id_if_known(addr); + try { + id = _gossiper.get_host_id(addr); + } catch (...) {} } if (!id) { return; From b3f8b579c0ce97a1063f03ca71acc005896be93e Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 8 Dec 2024 14:57:20 +0200 Subject: [PATCH 10/57] gossiper: add get_endpoint_state_ptr() function that works on host id Will be used later to simplify code. --- gms/gossiper.cc | 8 ++++++++ gms/gossiper.hh | 1 + 2 files changed, 9 insertions(+) diff --git a/gms/gossiper.cc b/gms/gossiper.cc index 19747be504..c303230316 100644 --- a/gms/gossiper.cc +++ b/gms/gossiper.cc @@ -1505,6 +1505,14 @@ endpoint_state_ptr gossiper::get_endpoint_state_ptr(inet_address ep) const noexc } } +endpoint_state_ptr gossiper::get_endpoint_state_ptr(locator::host_id id) const noexcept { + auto ip = _address_map.find(id); + if (!ip) { + return nullptr; + } + return get_endpoint_state_ptr(*ip); +} + void gossiper::update_timestamp(const endpoint_state_ptr& eps) noexcept { const_cast(*eps).update_timestamp(); } diff --git a/gms/gossiper.hh b/gms/gossiper.hh index 2ff817f700..595f7103ca 100644 --- a/gms/gossiper.hh +++ b/gms/gossiper.hh @@ -435,6 +435,7 @@ public: // The endpoint_state is immutable (except for its update_timestamp), guaranteed not to change while // the endpoint_state_ptr is held. endpoint_state_ptr get_endpoint_state_ptr(inet_address ep) const noexcept; + endpoint_state_ptr get_endpoint_state_ptr(locator::host_id ep) const noexcept; // Return this node's endpoint_state_ptr endpoint_state_ptr get_this_endpoint_state_ptr() const noexcept { From f5fa4d97425cfd5b44d31f90521ad06938007b8c Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 8 Dec 2024 14:58:06 +0200 Subject: [PATCH 11/57] topology coordinator: drop get_endpoint_for_host_id_if_known usage Now that we have gossiper::get_endpoint_state_ptr that works on host ids there is no need to translate id to ip at all. --- service/topology_coordinator.cc | 35 ++++++++++++++------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/service/topology_coordinator.cc b/service/topology_coordinator.cc index 4e8ca60ef2..a50f8838d5 100644 --- a/service/topology_coordinator.cc +++ b/service/topology_coordinator.cc @@ -3057,11 +3057,11 @@ future<> topology_coordinator::build_coordinator_state(group0_guard guard) { rtlogger.info("building initial raft topology state and CDC generation"); guard = co_await start_operation(); - auto get_application_state = [&] (locator::host_id host_id, gms::inet_address ep, const gms::application_state_map& epmap, gms::application_state app_state) -> sstring { + auto get_application_state = [&] (locator::host_id host_id, const gms::application_state_map& epmap, gms::application_state app_state) -> sstring { const auto it = epmap.find(app_state); if (it == epmap.end()) { - throw std::runtime_error(format("failed to build initial raft topology state from gossip for node {}/{}: application state {} is missing in gossip", - host_id, ep, app_state)); + throw std::runtime_error(format("failed to build initial raft topology state from gossip for node {}: application state {} is missing in gossip", + host_id, app_state)); } // it's versioned_value::value(), not std::optional::value() - it does not throw return it->second.value(); @@ -3069,17 +3069,13 @@ future<> topology_coordinator::build_coordinator_state(group0_guard guard) { // Create a new CDC generation auto get_sharding_info_for_host_id = [&] (locator::host_id host_id) -> std::pair { - const auto ep = tmptr->get_endpoint_for_host_id_if_known(host_id); - if (!ep) { - throw std::runtime_error(format("IP of node with ID {} is not known", host_id)); - } - const auto eptr = _gossiper.get_endpoint_state_ptr(*ep); + const auto eptr = _gossiper.get_endpoint_state_ptr(host_id); if (!eptr) { - throw std::runtime_error(format("no gossiper endpoint state for node {}/{}", host_id, *ep)); + throw std::runtime_error(format("no gossiper endpoint state for node {}", host_id)); } const auto& epmap = eptr->get_application_state_map(); - const auto shard_count = std::stoi(get_application_state(host_id, *ep, epmap, gms::application_state::SHARD_COUNT)); - const auto ignore_msb = std::stoi(get_application_state(host_id, *ep, epmap, gms::application_state::IGNORE_MSB_BITS)); + const auto shard_count = std::stoi(get_application_state(host_id, epmap, gms::application_state::SHARD_COUNT)); + const auto ignore_msb = std::stoi(get_application_state(host_id, epmap, gms::application_state::IGNORE_MSB_BITS)); return std::make_pair(shard_count, ignore_msb); }; auto [cdc_gen_uuid, guard_, mutation] = co_await prepare_and_broadcast_cdc_generation_data(tmptr, std::move(guard), std::nullopt, get_sharding_info_for_host_id); @@ -3096,23 +3092,22 @@ future<> topology_coordinator::build_coordinator_state(group0_guard guard) { } const auto& host_id = node.get().host_id(); - const auto& ep = node.get().endpoint(); - const auto eptr = _gossiper.get_endpoint_state_ptr(ep); + const auto eptr = _gossiper.get_endpoint_state_ptr(host_id); if (!eptr) { - throw std::runtime_error(format("failed to build initial raft topology state from gossip for node {}/{} as gossip contains no data for it", host_id, ep)); + throw std::runtime_error(format("failed to build initial raft topology state from gossip for node {} as gossip contains no data for it", host_id)); } const auto& epmap = eptr->get_application_state_map(); - const auto datacenter = get_application_state(host_id, ep, epmap, gms::application_state::DC); - const auto rack = get_application_state(host_id, ep, epmap, gms::application_state::RACK); + const auto datacenter = get_application_state(host_id, epmap, gms::application_state::DC); + const auto rack = get_application_state(host_id, epmap, gms::application_state::RACK); const auto tokens_v = tmptr->get_tokens(host_id); const std::unordered_set tokens(tokens_v.begin(), tokens_v.end()); - const auto release_version = get_application_state(host_id, ep, epmap, gms::application_state::RELEASE_VERSION); + const auto release_version = get_application_state(host_id, epmap, gms::application_state::RELEASE_VERSION); const auto num_tokens = tokens.size(); - const auto shard_count = get_application_state(host_id, ep, epmap, gms::application_state::SHARD_COUNT); - const auto ignore_msb = get_application_state(host_id, ep, epmap, gms::application_state::IGNORE_MSB_BITS); - const auto supported_features_s = get_application_state(host_id, ep, epmap, gms::application_state::SUPPORTED_FEATURES); + const auto shard_count = get_application_state(host_id, epmap, gms::application_state::SHARD_COUNT); + const auto ignore_msb = get_application_state(host_id, epmap, gms::application_state::IGNORE_MSB_BITS); + const auto supported_features_s = get_application_state(host_id, epmap, gms::application_state::SUPPORTED_FEATURES); const auto supported_features = gms::feature_service::to_feature_set(supported_features_s); if (enabled_features.empty()) { From 5d4d9fd31dd7c65c3a085047edc4b4ced88a039c Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 9 Dec 2024 12:19:35 +0200 Subject: [PATCH 12/57] storage_service: force_remove_completion use address map to resolve id to ip instead of token metadata We want to drop ips from token_metadata so move to different API to map ip to id. --- service/storage_service.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index ac0095f397..22cdaf339e 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -7227,7 +7227,7 @@ future<> storage_service::force_remove_completion() { auto leaving = tm.get_leaving_endpoints(); slogger.warn("Removal not confirmed, Leaving={}", leaving); for (auto host_id : leaving) { - const auto endpoint = tm.get_endpoint_for_host_id_if_known(host_id); + const auto endpoint = ss._address_map.find(host_id); if (!endpoint) { slogger.warn("No endpoint is found for host_id {}", host_id); continue; From 0c930199f817675fbf185861933638047f39e90a Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 9 Dec 2024 12:49:51 +0200 Subject: [PATCH 13/57] storage_service: use gossiper to map id to ip instead of token_metadata in node_ops_cmd_handler We want to drop ips from token_metadata so move to different API to map ip to id. --- service/storage_service.cc | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 22cdaf339e..e7374af720 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -4469,11 +4469,11 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad // and waits for ring_delay. It's possible the node being decommissioned might // die after it has sent this notification. If this happens, the node would // have already been removed from this token_metadata, so we wouldn't find it here. - const auto node_id = tmptr->get_host_id_if_known(node); - slogger.info("decommission[{}]: Removed node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator); - if (node_id) { - tmptr->del_leaving_endpoint(*node_id); - } + try { + const auto node_id = _gossiper.get_host_id(node); + slogger.info("decommission[{}]: Removed node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator); + tmptr->del_leaving_endpoint(node_id); + } catch (...) {} } return update_topology_change_info(tmptr, ::format("decommission {}", req.leaving_nodes)); }); @@ -4489,7 +4489,10 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad check_again = false; for (auto& node : req.leaving_nodes) { auto tmptr = get_token_metadata_ptr(); - const auto host_id = tmptr->get_host_id_if_known(node); + std::optional host_id; + try { + host_id = _gossiper.get_host_id(node); + } catch(...) {}; if (host_id && tmptr->is_normal_token_owner(*host_id)) { check_again = true; if (std::chrono::steady_clock::now() > start_time + std::chrono::seconds(60)) { From 58f8395bc2e64f52e43d24f0861ff8d6b16d7d1a Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 16 Jan 2025 16:35:13 +0200 Subject: [PATCH 14/57] storage_service: use gossiper instead of token_metadata to map ip to id in gossiper notifications We want to drop ips from token_metadata so move to different API to map ip to id. --- service/storage_service.cc | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index e7374af720..b71e55d8bd 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -2637,19 +2637,18 @@ future<> storage_service::on_join(gms::inet_address endpoint, gms::endpoint_stat future<> storage_service::on_alive(gms::inet_address endpoint, gms::endpoint_state_ptr state, gms::permit_id pid) { const auto& tm = get_token_metadata(); - const auto tm_host_id_opt = tm.get_host_id_if_known(endpoint); - slogger.debug("endpoint={}/{} on_alive: permit_id={}", endpoint, tm_host_id_opt, pid); + const auto host_id = state->get_host_id(); + slogger.debug("endpoint={}/{} on_alive: permit_id={}", endpoint, host_id, pid); const auto* node = tm.get_topology().find_node(endpoint); if (node && node->is_member()) { co_await notify_up(endpoint); } else if (raft_topology_change_enabled()) { slogger.debug("ignore on_alive since topology changes are using raft and " - "endpoint {}/{} is not a topology member", endpoint, tm_host_id_opt); + "endpoint {}/{} is not a topology member", endpoint, host_id); } else { auto tmlock = co_await get_token_metadata_lock(); auto tmptr = co_await get_mutable_token_metadata_ptr(); const auto dc_rack = get_dc_rack_for(endpoint); - const auto host_id = _gossiper.get_host_id(endpoint); tmptr->update_host_id(host_id, endpoint); tmptr->update_topology(host_id, dc_rack); co_await replicate_to_all_cores(std::move(tmptr)); @@ -2740,15 +2739,26 @@ future<> storage_service::on_remove(gms::inet_address endpoint, gms::permit_id p co_return; } + locator::host_id host_id; + + try { + // It seems gossiper does not check for endpoint existence before calling the callback + // so the lookup may fail, but there is nothing to do in this case. + host_id = _gossiper.get_host_id(endpoint); + } catch (...) { + co_return; + } + + // We should handle the case when the host id is mapped to a different address. + // This could happen when an address for the host id changes and the callback here is called + // with the old ip. We should just skip the remove in that case. + if (_address_map.get(host_id) != endpoint) { + co_return; + } + auto tmlock = co_await get_token_metadata_lock(); auto tmptr = co_await get_mutable_token_metadata_ptr(); - // We should handle the case when we aren't able to find endpoint -> ip mapping in token_metadata. - // This could happen e.g. when the new endpoint has bigger generation in handle_state_normal - the code - // in handle_state_normal will remap host_id to the new IP and we won't find - // old IP here. We should just skip the remove in that case. - if (const auto host_id = tmptr->get_host_id_if_known(endpoint); host_id) { - tmptr->remove_endpoint(*host_id); - } + tmptr->remove_endpoint(host_id); co_await update_topology_change_info(tmptr, ::format("on_remove {}", endpoint)); co_await replicate_to_all_cores(std::move(tmptr)); } From 6e6b2cfa6369dae83dd0546655dee92883bc46a9 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 10 Dec 2024 18:40:43 +0200 Subject: [PATCH 15/57] storage_service: use existing util function instead of re-iplementing it locator/util.hh already has get_range_to_address_map which is exactly like the one in the storage_service. So remove the later one and use the former instead. --- locator/util.cc | 2 +- locator/util.hh | 5 +++++ service/storage_service.cc | 24 +----------------------- service/storage_service.hh | 11 ----------- 4 files changed, 7 insertions(+), 35 deletions(-) diff --git a/locator/util.cc b/locator/util.cc index fc1cba5394..d7f8768cda 100644 --- a/locator/util.cc +++ b/locator/util.cc @@ -47,7 +47,7 @@ get_all_ranges(const std::vector& sorted_tokens) { } // Caller is responsible to hold token_metadata valid until the returned future is resolved -static future> +future> get_range_to_address_map(locator::effective_replication_map_ptr erm, const std::vector& sorted_tokens) { co_return co_await construct_range_to_endpoint_map(erm, co_await get_all_ranges(sorted_tokens)); diff --git a/locator/util.hh b/locator/util.hh index e85c04048e..3a3360d3d9 100644 --- a/locator/util.hh +++ b/locator/util.hh @@ -9,6 +9,9 @@ #pragma once #include "dht/token_range_endpoints.hh" +#include "dht/i_partitioner_fwd.hh" +#include "inet_address_vectors.hh" +#include "locator/abstract_replication_strategy.hh" namespace replica { class database; @@ -20,4 +23,6 @@ namespace gms { namespace locator { future> describe_ring(const replica::database& db, const gms::gossiper& gossiper, const sstring& keyspace, bool include_only_local_dc = false); + future> get_range_to_address_map( + locator::effective_replication_map_ptr erm, const std::vector& sorted_tokens); } \ No newline at end of file diff --git a/service/storage_service.cc b/service/storage_service.cc index b71e55d8bd..b99260d8b4 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -2303,14 +2303,7 @@ future<> storage_service::bootstrap(std::unordered_set& bootstrap_tokens, future> storage_service::get_range_to_address_map(locator::effective_replication_map_ptr erm) const { - return get_range_to_address_map(erm, erm->get_token_metadata_ptr()->sorted_tokens()); -} - -// Caller is responsible to hold token_metadata valid until the returned future is resolved -future> -storage_service::get_range_to_address_map(locator::effective_replication_map_ptr erm, - const std::vector& sorted_tokens) const { - co_return co_await construct_range_to_endpoint_map(erm, co_await get_all_ranges(sorted_tokens)); + return locator::get_range_to_address_map(erm, erm->get_token_metadata_ptr()->sorted_tokens()); } future<> storage_service::handle_state_bootstrap(inet_address endpoint, gms::permit_id pid) { @@ -5282,21 +5275,6 @@ storage_service::describe_ring_for_table(const sstring& keyspace_name, const sst co_return ranges; } -future> -storage_service::construct_range_to_endpoint_map( - locator::effective_replication_map_ptr erm, - const dht::token_range_vector& ranges) const { - std::unordered_map res; - res.reserve(ranges.size()); - for (auto r : ranges) { - res[r] = erm->get_natural_endpoints( - r.end() ? r.end()->value() : dht::maximum_token()); - co_await coroutine::maybe_yield(); - } - co_return res; -} - - std::map storage_service::get_token_to_endpoint_map() { const auto& tm = get_token_metadata(); std::map result; diff --git a/service/storage_service.hh b/service/storage_service.hh index 549f67c2f6..40da8c869c 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -428,8 +428,6 @@ private: public: future> get_range_to_address_map(locator::effective_replication_map_ptr erm) const; - future> get_range_to_address_map(locator::effective_replication_map_ptr erm, - const std::vector& sorted_tokens) const; /** * The same as {@code describeRing(String)} but converts TokenRange to the String for JMX compatibility @@ -466,15 +464,6 @@ public: */ future> get_tablet_to_endpoint_map(table_id table); - /** - * Construct the range to endpoint mapping based on the true view - * of the world. - * @param ranges - * @return mapping of ranges to the replicas responsible for them. - */ - future> construct_range_to_endpoint_map( - locator::effective_replication_map_ptr erm, - const dht::token_range_vector& ranges) const; public: virtual future<> on_join(gms::inet_address endpoint, gms::endpoint_state_ptr ep_state, gms::permit_id) override; /* From f03a575f3d73a44547443e5b185ec5afa17c6ec7 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 15 Dec 2024 12:46:08 +0200 Subject: [PATCH 16/57] storage_service: move storage_service::get_natural_endpoints to use host ids internally and translate to ips before returning The function is called by RESful API so has to return ips for backwards compatibility, but internally we can use host ids as long as possible and convert to ips just before returning. This also drops usage of ip based erm function which we want to get rid of. --- service/storage_service.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index b99260d8b4..afac37dabf 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -7342,10 +7342,13 @@ storage_service::get_natural_endpoints(const sstring& keyspace, partition_key pk = partition_key::from_nodetool_style_string(schema, key); dht::token token = schema->get_partitioner().get_token(*schema, pk.view()); const auto& ks = _db.local().find_keyspace(keyspace); + host_id_vector_replica_set replicas; if (ks.uses_tablets()) { - return table.get_effective_replication_map()->get_natural_endpoints(token); + replicas = table.get_effective_replication_map()->get_natural_replicas(token); + } else { + replicas = ks.get_vnode_effective_replication_map()->get_natural_replicas(token); } - return ks.get_vnode_effective_replication_map()->get_natural_endpoints(token); + return replicas | std::views::transform([&] (locator::host_id id) { return _address_map.get(id); }) | std::ranges::to(); } future<> endpoint_lifecycle_notifier::notify_down(gms::inet_address endpoint) { From 9ea53a86562e9a0311f5eab87678e35f0588fb7a Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 15 Dec 2024 13:40:33 +0200 Subject: [PATCH 17/57] storage_service: move describe ring and get_range_to_endpoint_map to use host ids inside and translate to ips at the last moment The functions are called from RESful API so has to return ips for backwards compatibility, but internally we can use host ids as long as possible and convert to ips just before returning. This also drops usage of ip based erm function which we want to get rid of. --- locator/util.cc | 20 ++++++++++---------- locator/util.hh | 2 +- service/storage_service.cc | 6 +++++- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/locator/util.cc b/locator/util.cc index d7f8768cda..55c0e0d491 100644 --- a/locator/util.cc +++ b/locator/util.cc @@ -12,14 +12,14 @@ namespace locator { -static future> +static future> construct_range_to_endpoint_map( locator::effective_replication_map_ptr erm, const dht::token_range_vector& ranges) { - std::unordered_map res; + std::unordered_map res; res.reserve(ranges.size()); for (auto r : ranges) { - res[r] = erm->get_natural_endpoints( + res[r] = erm->get_natural_replicas( r.end() ? r.end()->value() : dht::maximum_token()); co_await coroutine::maybe_yield(); } @@ -47,7 +47,7 @@ get_all_ranges(const std::vector& sorted_tokens) { } // Caller is responsible to hold token_metadata valid until the returned future is resolved -future> +future> get_range_to_address_map(locator::effective_replication_map_ptr erm, const std::vector& sorted_tokens) { co_return co_await construct_range_to_endpoint_map(erm, co_await get_all_ranges(sorted_tokens)); @@ -67,12 +67,12 @@ get_tokens_in_local_dc(const locator::token_metadata& tm) { co_return filtered_tokens; } -static future> +static future> get_range_to_address_map_in_local_dc( locator::effective_replication_map_ptr erm) { auto tmptr = erm->get_token_metadata_ptr(); auto orig_map = co_await get_range_to_address_map(erm, co_await get_tokens_in_local_dc(*tmptr)); - std::unordered_map filtered_map; + std::unordered_map filtered_map; filtered_map.reserve(orig_map.size()); auto local_dc_filter = tmptr->get_topology().get_local_dc_filter(); for (auto entry : orig_map) { @@ -90,7 +90,7 @@ get_range_to_address_map_in_local_dc( // return get_range_to_address_map(db.find_keyspace(keyspace).get_vnode_effective_replication_map()); // } -static future> +future> get_range_to_address_map(locator::effective_replication_map_ptr erm) { return get_range_to_address_map(erm, erm->get_token_metadata_ptr()->sorted_tokens()); } @@ -100,7 +100,7 @@ describe_ring(const replica::database& db, const gms::gossiper& gossiper, const std::vector ranges; auto erm = db.find_keyspace(keyspace).get_vnode_effective_replication_map(); - std::unordered_map range_to_address_map = co_await ( + std::unordered_map range_to_address_map = co_await ( include_only_local_dc ? get_range_to_address_map_in_local_dc(erm) : get_range_to_address_map(erm) @@ -119,10 +119,10 @@ describe_ring(const replica::database& db, const gms::gossiper& gossiper, const } for (auto endpoint : addresses) { dht::endpoint_details details; - details._host = endpoint; + details._host = gossiper.get_address_map().get(endpoint); details._datacenter = topology.get_datacenter(endpoint); details._rack = topology.get_rack(endpoint); - tr._rpc_endpoints.push_back(gossiper.get_rpc_address(endpoint)); + tr._rpc_endpoints.push_back(gossiper.get_rpc_address(details._host)); tr._endpoints.push_back(fmt::to_string(details._host)); tr._endpoint_details.push_back(details); } diff --git a/locator/util.hh b/locator/util.hh index 3a3360d3d9..c66e400680 100644 --- a/locator/util.hh +++ b/locator/util.hh @@ -23,6 +23,6 @@ namespace gms { namespace locator { future> describe_ring(const replica::database& db, const gms::gossiper& gossiper, const sstring& keyspace, bool include_only_local_dc = false); - future> get_range_to_address_map( + future> get_range_to_address_map( locator::effective_replication_map_ptr erm, const std::vector& sorted_tokens); } \ No newline at end of file diff --git a/service/storage_service.cc b/service/storage_service.cc index afac37dabf..c19e593c94 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -13,6 +13,7 @@ #include "compaction/task_manager_module.hh" #include "gc_clock.hh" #include "raft/raft.hh" +#include #include #include "service/qos/raft_service_level_distributed_data_accessor.hh" #include "service/qos/service_level_controller.hh" @@ -2303,7 +2304,10 @@ future<> storage_service::bootstrap(std::unordered_set& bootstrap_tokens, future> storage_service::get_range_to_address_map(locator::effective_replication_map_ptr erm) const { - return locator::get_range_to_address_map(erm, erm->get_token_metadata_ptr()->sorted_tokens()); + co_return (co_await locator::get_range_to_address_map(erm, erm->get_token_metadata_ptr()->sorted_tokens())) | + std::views::transform([&] (auto tid) { return std::make_pair(tid.first, + tid.second | std::views::transform([&] (auto id) { return _address_map.get(id); }) | std::ranges::to()); }) | + std::ranges::to(); } future<> storage_service::handle_state_bootstrap(inet_address endpoint, gms::permit_id pid) { From 542360e825d9087ef98b0c089bd639e6383f9c07 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 16 Dec 2024 13:22:20 +0200 Subject: [PATCH 18/57] test: drop inet_address usage from network_topology_strategy_test Move the test to work on host ids. IPs will be dropped eventually. --- test/boost/network_topology_strategy_test.cc | 32 ++++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/test/boost/network_topology_strategy_test.cc b/test/boost/network_topology_strategy_test.cc index 58cbb556d7..a4369457c6 100644 --- a/test/boost/network_topology_strategy_test.cc +++ b/test/boost/network_topology_strategy_test.cc @@ -50,7 +50,7 @@ struct ring_point { host_id id = host_id::create_random_id(); }; -void print_natural_endpoints(double point, const inet_address_vector_replica_set v) { +void print_natural_endpoints(double point, const host_id_vector_replica_set v) { testlog.debug("Natural endpoints for a token {}:", point); std::string str; std::ostringstream strm(str); @@ -104,11 +104,11 @@ void strategy_sanity_check( void endpoints_check( replication_strategy_ptr ars_ptr, const token_metadata_ptr& tm, - const inet_address_vector_replica_set& endpoints, + const host_id_vector_replica_set& endpoints, const locator::topology& topo, bool strict_dc_rf = false) { - auto&& nodes_per_dc = tm->get_datacenter_token_owners_ips(); + auto&& nodes_per_dc = tm->get_datacenter_token_owners(); const network_topology_strategy* nts_ptr = dynamic_cast(ars_ptr.get()); @@ -123,7 +123,7 @@ void endpoints_check( BOOST_CHECK_LE(total_rf, ars_ptr->get_replication_factor(*tm)); // Check the uniqueness - std::unordered_set ep_set(endpoints.begin(), endpoints.end()); + std::unordered_set ep_set(endpoints.begin(), endpoints.end()); BOOST_CHECK_EQUAL(endpoints.size(), ep_set.size()); // Check the per-DC RF @@ -166,7 +166,7 @@ void full_ring_check(const std::vector& ring_points, for (auto& rp : ring_points) { double cur_point1 = rp.point - 0.5; token t1(tests::d2t(cur_point1 / ring_points.size())); - auto endpoints1 = erm->get_natural_endpoints(t1); + auto endpoints1 = erm->get_natural_replicas(t1); endpoints_check(ars_ptr, tmptr, endpoints1, topo); @@ -179,7 +179,7 @@ void full_ring_check(const std::vector& ring_points, // double cur_point2 = rp.point - 0.2; token t2(tests::d2t(cur_point2 / ring_points.size())); - auto endpoints2 = erm->get_natural_endpoints(t2); + auto endpoints2 = erm->get_natural_replicas(t2); endpoints_check(ars_ptr, tmptr, endpoints2, topo); check_ranges_are_sorted(erm, rp.id).get(); @@ -193,17 +193,17 @@ void full_ring_check(const tablet_map& tmap, auto& tm = *tmptr; const auto& topo = tm.get_topology(); - auto to_endpoint_set = [&] (const tablet_replica_set& replicas) { - inet_address_vector_replica_set result; + auto to_replica_set = [&] (const tablet_replica_set& replicas) { + host_id_vector_replica_set result; result.reserve(replicas.size()); for (auto&& replica : replicas) { - result.emplace_back(tm.get_endpoint_for_host_id(replica.host)); + result.emplace_back(replica.host); } return result; }; for (tablet_id tb : tmap.tablet_ids()) { - endpoints_check(rs_ptr, tmptr, to_endpoint_set(tmap.get_tablet_info(tb).replicas), topo, true); + endpoints_check(rs_ptr, tmptr, to_replica_set(tmap.get_tablet_info(tb).replicas), topo, true); } } @@ -672,7 +672,7 @@ static size_t get_replication_factor(const sstring& dc, static bool has_sufficient_replicas(const sstring& dc, const std::unordered_map>& dc_replicas, - const std::unordered_map>& all_endpoints, + const std::unordered_map>& all_endpoints, const std::unordered_map& datacenters) noexcept { auto dc_replicas_it = dc_replicas.find(dc); if (dc_replicas_it == dc_replicas.end()) { @@ -690,7 +690,7 @@ static bool has_sufficient_replicas(const sstring& dc, static bool has_sufficient_replicas( const std::unordered_map>& dc_replicas, - const std::unordered_map>& all_endpoints, + const std::unordered_map>& all_endpoints, const std::unordered_map& datacenters) noexcept { for (auto& dc : datacenters | std::views::keys) { @@ -741,16 +741,16 @@ static locator::host_id_set calculate_natural_endpoints( // the token-owning members of a DC // const std::unordered_map> - all_endpoints = tm.get_datacenter_token_owners_ips(); + std::unordered_set> + all_endpoints = tm.get_datacenter_token_owners(); // // all racks (with non-token owners filtered out) in a DC so we can check // when we have exhausted all racks in a DC // const std::unordered_map>> - racks = tm.get_datacenter_racks_token_owners_ips(); + std::unordered_set>> + racks = tm.get_datacenter_racks_token_owners(); // not aware of any cluster members SCYLLA_ASSERT(!all_endpoints.empty() && !racks.empty()); From 5262bbafffe575000c2ab5f09433500d0cb9923f Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 15 Dec 2024 13:56:12 +0200 Subject: [PATCH 19/57] locator: drop no longer used ip based functions --- locator/abstract_replication_strategy.cc | 23 ----------------------- locator/abstract_replication_strategy.hh | 5 ----- locator/tablets.cc | 4 ---- 3 files changed, 32 deletions(-) diff --git a/locator/abstract_replication_strategy.cc b/locator/abstract_replication_strategy.cc index 468426e8ff..c48a0a80ac 100644 --- a/locator/abstract_replication_strategy.cc +++ b/locator/abstract_replication_strategy.cc @@ -56,19 +56,6 @@ auto& ring_mapping::operator*() const { return std::as_const(_impl->map); } -template -static ResultSet resolve_endpoints(const SourceSet& host_ids, const token_metadata& tm) { - ResultSet result{}; - result.reserve(host_ids.size()); - for (const auto& host_id: host_ids) { - // Empty host_id is used as a marker for local address. - // The reason for this hack is that we need local_strategy to - // work before the local host_id is loaded from the system.local table. - result.push_back(host_id ? tm.get_endpoint_for_host_id(host_id) : tm.get_topology().my_address()); - } - return result; -} - logging::logger rslogger("replication_strategy"); abstract_replication_strategy::abstract_replication_strategy( @@ -507,20 +494,10 @@ host_id_vector_replica_set vnode_effective_replication_map::do_get_replicas(cons return it->second; } -inet_address_vector_replica_set vnode_effective_replication_map::do_get_natural_endpoints(const token& tok, - bool is_vnode) const -{ - return resolve_endpoints(do_get_replicas(tok, is_vnode), *_tmptr); -} - host_id_vector_replica_set vnode_effective_replication_map::get_replicas(const token& tok) const { return do_get_replicas(tok, false); } -inet_address_vector_replica_set vnode_effective_replication_map::get_natural_endpoints(const token& search_token) const { - return do_get_natural_endpoints(search_token, false); -} - host_id_vector_replica_set vnode_effective_replication_map::get_natural_replicas(const token& search_token) const { return get_replicas(search_token); } diff --git a/locator/abstract_replication_strategy.hh b/locator/abstract_replication_strategy.hh index 34ed725f2c..df13168195 100644 --- a/locator/abstract_replication_strategy.hh +++ b/locator/abstract_replication_strategy.hh @@ -231,9 +231,6 @@ public: /// new replica. /// /// The returned addresses are present in the topology object associated with this instance. - virtual inet_address_vector_replica_set get_natural_endpoints(const token& search_token) const = 0; - - /// Same as above but returns host ids instead of addresses virtual host_id_vector_replica_set get_natural_replicas(const token& search_token) const = 0; /// Same as above but returns host ids instead of addresses @@ -333,7 +330,6 @@ private: friend class abstract_replication_strategy; friend class effective_replication_map_factory; public: // effective_replication_map - inet_address_vector_replica_set get_natural_endpoints(const token& search_token) const override; host_id_vector_replica_set get_natural_replicas(const token& search_token) const override; host_id_vector_topology_change get_pending_replicas(const token& search_token) const override; host_id_vector_replica_set get_replicas_for_reading(const token& token) const override; @@ -397,7 +393,6 @@ public: private: future do_get_ranges(noncopyable_function consider_range_for_endpoint) const; - inet_address_vector_replica_set do_get_natural_endpoints(const token& tok, bool is_vnode) const; host_id_vector_replica_set do_get_replicas(const token& tok, bool is_vnode) const; stop_iteration for_each_natural_endpoint_until(const token& vnode_tok, const noncopyable_function& func) const; diff --git a/locator/tablets.cc b/locator/tablets.cc index cafe0fe22a..935a171f88 100644 --- a/locator/tablets.cc +++ b/locator/tablets.cc @@ -879,10 +879,6 @@ public: return to_host_set(get_replicas_for_write(search_token)); } - virtual inet_address_vector_replica_set get_natural_endpoints(const token& search_token) const override { - return to_replica_set(get_replicas_for_write(search_token)); - } - virtual host_id_vector_replica_set get_natural_replicas(const token& search_token) const override { return to_host_set(get_replicas_for_write(search_token)); } From 8cc09f435806f67023bc59af7e8113135f4707d4 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 15 Dec 2024 18:31:37 +0200 Subject: [PATCH 20/57] storage_service: do not use ip addresses from token_metadata in handling of a normal state Instead use gossiper and peers table to retrieve same information. Token_metadata is created from the mix of those two anyway. The goal is to drop ips from token_metadata entirely. --- service/storage_service.cc | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index c19e593c94..ad9cc6ddee 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -2370,11 +2370,26 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit if (tmptr->is_normal_token_owner(host_id)) { slogger.info("handle_state_normal: node {}/{} was already a normal token owner", endpoint, host_id); } - auto existing = tmptr->get_endpoint_for_host_id_if_known(host_id); // Old node in replace-with-same-IP scenario. std::optional replaced_id; + auto ips = _gossiper.get_nodes_with_host_id(host_id); + + std::optional existing; + + if (tmptr->get_topology().find_node(host_id)) { + // If node is not in the topology there is no existsing address + // If there are two addresses for the same id the "other" one is existing + // If there is only one it is existing + if (ips.size() == 2) { + if (ips.erase(endpoint) == 0) { + on_internal_error(slogger, fmt::format("Gossiper has two ips {} for host id {} but none of them is {}", ips, endpoint, host_id)); + } + } + existing = *ips.begin(); + } + if (existing && *existing != endpoint) { // This branch in taken when a node changes its IP address. @@ -2420,11 +2435,12 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit // We do this after update_normal_tokens, allowing for tokens to be properly // migrated to the new host_id. - slogger.info("Host ID {} continues to be owned by {}", host_id, endpoint); - if (const auto old_host_id = tmptr->get_host_id_if_known(endpoint); old_host_id && *old_host_id != host_id) { - // Replace with same IP scenario - slogger.info("The IP {} previously owned host ID {}", endpoint, *old_host_id); - replaced_id = *old_host_id; + auto peers = co_await _sys_ks.local().load_host_ids(); + if (peers.contains(endpoint) && peers[endpoint] != host_id) { + replaced_id = peers[endpoint]; + slogger.info("The IP {} previously owned host ID {}", endpoint, *replaced_id); + } else { + slogger.info("Host ID {} continues to be owned by {}", host_id, endpoint); } } else { // This branch is taken if this node wasn't involved in node_ops From 7a3237c6879b5eba391b8d2cd8a3c39a10209cb0 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 18 Dec 2024 17:05:40 +0200 Subject: [PATCH 21/57] messaging_service: drop get_raw_version and knows_version The are unused. The version is always fixed. --- api/messaging_service.cc | 2 +- message/messaging_service.cc | 10 ---------- message/messaging_service.hh | 4 ---- service/migration_manager.cc | 4 +--- 4 files changed, 2 insertions(+), 18 deletions(-) diff --git a/api/messaging_service.cc b/api/messaging_service.cc index b1665c8c5c..3fc38b127c 100644 --- a/api/messaging_service.cc +++ b/api/messaging_service.cc @@ -114,7 +114,7 @@ void set_messaging_service(http_context& ctx, routes& r, sharded req) { diff --git a/message/messaging_service.cc b/message/messaging_service.cc index eb7b814d8f..22b6e35eab 100644 --- a/message/messaging_service.cc +++ b/message/messaging_service.cc @@ -257,16 +257,6 @@ const uint64_t* messaging_service::get_dropped_messages() const { return _dropped_messages; } -int32_t messaging_service::get_raw_version(const gms::inet_address& endpoint) const { - // FIXME: messaging service versioning - return current_version; -} - -bool messaging_service::knows_version(const gms::inet_address& endpoint) const { - // FIXME: messaging service versioning - return true; -} - future<> messaging_service::unregister_handler(messaging_verb verb) { return _rpc->unregister_handler(verb); } diff --git a/message/messaging_service.hh b/message/messaging_service.hh index eb5c52613c..873aa83c90 100644 --- a/message/messaging_service.hh +++ b/message/messaging_service.hh @@ -272,10 +272,6 @@ public: const uint64_t* get_dropped_messages() const; - int32_t get_raw_version(const gms::inet_address& endpoint) const; - - bool knows_version(const gms::inet_address& endpoint) const; - enum class encrypt_what { none, rack, diff --git a/service/migration_manager.cc b/service/migration_manager.cc index 0a1acd0f12..6f4d6d5d6c 100644 --- a/service/migration_manager.cc +++ b/service/migration_manager.cc @@ -934,9 +934,7 @@ future<> migration_manager::announce_without_raft(std::vector schema, auto all_live = _gossiper.get_live_members(); auto live_members = all_live | std::views::filter([this, my_address = _messaging.broadcast_address()] (const gms::inet_address& endpoint) { // only push schema to nodes with known and equal versions - return endpoint != my_address && - _messaging.knows_version(endpoint) && - _messaging.get_raw_version(endpoint) == netw::messaging_service::current_version; + return endpoint != my_address; }); // FIXME: gossiper should return host id set auto live_host_ids = live_members | std::views::transform([&] (const gms::inet_address& ip) { return _gossiper.get_host_id(ip); }); From 36ccc897e809d867325452c7372d72d018c0a618 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 18 Dec 2024 17:06:21 +0200 Subject: [PATCH 22/57] gossiper: change get_live_members and all its users to work on host ids --- gms/gossiper.cc | 8 ++++++-- gms/gossiper.hh | 3 ++- service/migration_manager.cc | 6 ++---- service/storage_service.cc | 10 +++++----- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/gms/gossiper.cc b/gms/gossiper.cc index c303230316..a8ec00dc81 100644 --- a/gms/gossiper.cc +++ b/gms/gossiper.cc @@ -924,7 +924,7 @@ future> gossiper::get_live_members_synchronized() { return container().invoke_on(0, [] (gms::gossiper& g) -> future> { // Make sure the value we return is synchronized on all shards auto lock = co_await g.lock_endpoint_update_semaphore(); - co_return g.get_live_members(); + co_return g.get_live_members_helper(); }); } @@ -1175,7 +1175,7 @@ future<> gossiper::unregister_(shared_ptr su return _subscribers.remove(subscriber); } -std::set gossiper::get_live_members() const { +std::set gossiper::get_live_members_helper() const { std::set live_members(_live_endpoints.begin(), _live_endpoints.end()); auto myip = get_broadcast_address(); logger.debug("live_members before={}", live_members); @@ -1186,6 +1186,10 @@ std::set gossiper::get_live_members() const { return live_members; } +std::set gossiper::get_live_members() const { + return get_live_members_helper() | std::views::transform([this] (inet_address ip) { return get_host_id(ip); }) | std::ranges::to(); +} + std::set gossiper::get_live_token_owners() const { std::set token_owners; auto normal_token_owners = get_token_metadata_ptr()->get_normal_token_owners(); diff --git a/gms/gossiper.hh b/gms/gossiper.hh index 595f7103ca..1b647370ff 100644 --- a/gms/gossiper.hh +++ b/gms/gossiper.hh @@ -296,7 +296,7 @@ public: */ future<> unregister_(shared_ptr subscriber); - std::set get_live_members() const; + std::set get_live_members() const; std::set get_live_token_owners() const; @@ -528,6 +528,7 @@ public: future<> wait_alive(std::vector nodes, std::chrono::milliseconds timeout); future<> wait_alive(std::vector nodes, std::chrono::milliseconds timeout); future<> wait_alive(noncopyable_function()> get_nodes, std::chrono::milliseconds timeout); + std::set get_live_members_helper() const; // Wait for `n` live nodes to show up in gossip (including ourself). future<> wait_for_live_nodes_to_show_up(size_t n); diff --git a/service/migration_manager.cc b/service/migration_manager.cc index 6f4d6d5d6c..ef3ffa98cf 100644 --- a/service/migration_manager.cc +++ b/service/migration_manager.cc @@ -932,13 +932,11 @@ future<> migration_manager::announce_without_raft(std::vector schema, try { using namespace std::placeholders; auto all_live = _gossiper.get_live_members(); - auto live_members = all_live | std::views::filter([this, my_address = _messaging.broadcast_address()] (const gms::inet_address& endpoint) { + auto live_members = all_live | std::views::filter([my_address = _gossiper.my_host_id()] (const locator::host_id& endpoint) { // only push schema to nodes with known and equal versions return endpoint != my_address; }); - // FIXME: gossiper should return host id set - auto live_host_ids = live_members | std::views::transform([&] (const gms::inet_address& ip) { return _gossiper.get_host_id(ip); }); - co_await coroutine::parallel_for_each(live_host_ids, + co_await coroutine::parallel_for_each(live_members, std::bind(std::mem_fn(&migration_manager::push_schema_mutation), this, std::placeholders::_1, schema)); } catch (...) { mlogger.error("failed to announce migration to all nodes: {}", std::current_exception()); diff --git a/service/storage_service.cc b/service/storage_service.cc index ad9cc6ddee..8a446813d3 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -3609,17 +3609,17 @@ future>> storage_service::descr std::unordered_map> results; netw::messaging_service& ms = _messaging.local(); return map_reduce(std::move(live_hosts), [&ms, as = abort_source()] (auto host) mutable { - auto f0 = ser::migration_manager_rpc_verbs::send_schema_check(&ms, netw::msg_addr{ host, 0 }, as); + auto f0 = ser::migration_manager_rpc_verbs::send_schema_check(&ms, host, as); return std::move(f0).then_wrapped([host] (auto f) { if (f.failed()) { f.ignore_ready_future(); - return std::pair>(host, std::nullopt); + return std::pair>(host, std::nullopt); } - return std::pair>(host, f.get()); + return std::pair>(host, f.get()); }); - }, std::move(results), [] (auto results, auto host_and_version) { + }, std::move(results), [this] (auto results, auto host_and_version) { auto version = host_and_version.second ? host_and_version.second->to_sstring() : UNREACHABLE; - results.try_emplace(version).first->second.emplace_back(fmt::to_string(host_and_version.first)); + results.try_emplace(version).first->second.emplace_back(fmt::to_string(_address_map.get(host_and_version.first))); return results; }).then([this] (auto results) { // we're done: the results map is ready to return to the client. the rest is just debug logging: From da9b7b26262340875e67d533e3cd3e3417ea44c4 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 15 Jan 2025 11:55:26 +0200 Subject: [PATCH 23/57] storage_service: drop ip based topology::get_datacenter() usage We want to drop ips from the topology eventually. --- service/storage_service.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 8a446813d3..74c30d6d5c 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -2735,7 +2735,7 @@ future<> storage_service::maybe_reconnect_to_preferred_ip(inet_address ep, inet_ } const auto& topo = get_token_metadata().get_topology(); - if (topo.get_datacenter() == topo.get_datacenter(ep) && _messaging.local().get_preferred_ip(ep) != local_ip) { + if (topo.get_datacenter() == topo.get_datacenter(_gossiper.get_host_id(ep)) && _messaging.local().get_preferred_ip(ep) != local_ip) { slogger.debug("Initiated reconnect to an Internal IP {} for the {}", local_ip, ep); co_await _messaging.invoke_on_all([ep, local_ip] (auto& local_ms) { local_ms.cache_preferred_ip(ep, local_ip); From 1b6e1456e59ba75d499f45dddf31dc0fc1f5ee60 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 18 Dec 2024 18:07:49 +0200 Subject: [PATCH 24/57] messaging_service: drop the usage of ip based token_metadata APIs We want to drop ips from token_metadata so move to use host id based counterparts. Messaging service gets a function that maps from ips to id when is starts listening. --- main.cc | 13 ++++++++++++- message/messaging_service.cc | 21 +++++++++++++-------- message/messaging_service.hh | 6 ++++-- test/lib/cql_test_env.cc | 2 +- test/manual/message.cc | 2 +- 5 files changed, 31 insertions(+), 13 deletions(-) diff --git a/main.cc b/main.cc index 8ccb6d846c..700cb6d83f 100644 --- a/main.cc +++ b/main.cc @@ -2118,7 +2118,18 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl group0_service.setup_group0_if_exist(sys_ks.local(), ss.local(), qp.local(), mm.local()).get(); with_scheduling_group(maintenance_scheduling_group, [&] { - return messaging.invoke_on_all(&netw::messaging_service::start_listen, std::ref(token_metadata)); + return messaging.invoke_on_all([&] (auto& ms) { + return ms.start_listen(token_metadata.local(), [&gossiper] (gms::inet_address ip) { + if (ip == gossiper.local().get_broadcast_address()) { + return gossiper.local().my_host_id(); + } + try { + return gossiper.local().get_host_id(ip); + } catch (...) { + return locator::host_id{}; + } + }); + }); }).get(); const auto generation_number = gms::generation_type(sys_ks.local().increment_and_get_generation().get()); diff --git a/message/messaging_service.cc b/message/messaging_service.cc index 22b6e35eab..8dc7ce6e9e 100644 --- a/message/messaging_service.cc +++ b/message/messaging_service.cc @@ -299,8 +299,9 @@ future<> messaging_service::start() { return make_ready_future<>(); } -future<> messaging_service::start_listen(locator::shared_token_metadata& stm) { +future<> messaging_service::start_listen(locator::shared_token_metadata& stm, std::function address_to_host_id_mapper) { _token_metadata = &stm; + _address_to_host_id_mapper = std::move(address_to_host_id_mapper); do_start_listen(); return make_ready_future<>(); } @@ -308,20 +309,21 @@ future<> messaging_service::start_listen(locator::shared_token_metadata& stm) { bool messaging_service::topology_known_for(inet_address addr) const { // The token metadata pointer is nullptr before // the service is start_listen()-ed and after it's being shutdown()-ed. + const locator::node* node; return _token_metadata - && _token_metadata->get()->get_topology().has_endpoint(addr); + && (node = _token_metadata->get()->get_topology().find_node(_address_to_host_id_mapper(addr))) && !node->is_none(); } // Precondition: `topology_known_for(addr)`. bool messaging_service::is_same_dc(inet_address addr) const { const auto& topo = _token_metadata->get()->get_topology(); - return topo.get_datacenter(addr) == topo.get_datacenter(); + return topo.get_datacenter(_address_to_host_id_mapper(addr)) == topo.get_datacenter(); } // Precondition: `topology_known_for(addr)`. bool messaging_service::is_same_rack(inet_address addr) const { const auto& topo = _token_metadata->get()->get_topology(); - return topo.get_rack(addr) == topo.get_rack(); + return topo.get_rack(_address_to_host_id_mapper(addr)) == topo.get_rack(); } // The socket metrics domain defines the way RPC metrics are grouped @@ -334,12 +336,15 @@ bool messaging_service::is_same_rack(inet_address addr) const { // that the isolation cookie suits very well, because these cookies // are different for different indices and are more informative than // plain numbers -sstring messaging_service::client_metrics_domain(unsigned idx, inet_address addr) const { +sstring messaging_service::client_metrics_domain(unsigned idx, inet_address addr, std::optional id) const { sstring ret = _scheduling_info_for_connection_index[idx].isolation_cookie; + if (!id) { + id = _address_to_host_id_mapper(addr); + } if (_token_metadata) { const auto& topo = _token_metadata->get()->get_topology(); - if (topo.has_endpoint(addr)) { - ret += ":" + topo.get_datacenter(addr); + if (topo.has_node(*id)) { + ret += ":" + topo.get_datacenter(*id); } } return ret; @@ -1106,7 +1111,7 @@ shared_ptr messaging_service::ge opts.tcp_nodelay = must_tcp_nodelay; opts.reuseaddr = true; opts.isolation_cookie = _scheduling_info_for_connection_index[idx].isolation_cookie; - opts.metrics_domain = client_metrics_domain(idx, id.addr); // not just `addr` as the latter may be internal IP + opts.metrics_domain = client_metrics_domain(idx, id.addr, host_id); // not just `addr` as the latter may be internal IP SCYLLA_ASSERT(!must_encrypt || _credentials); diff --git a/message/messaging_service.hh b/message/messaging_service.hh index 873aa83c90..083caee6ea 100644 --- a/message/messaging_service.hh +++ b/message/messaging_service.hh @@ -337,6 +337,8 @@ private: private: config _cfg; locator::shared_token_metadata* _token_metadata = nullptr; + // a function that maps from ip to host id if known (returns default constructable host_id if there is no mapping) + std::function _address_to_host_id_mapper; // map: Node broadcast address -> Node internal IP, and the reversed mapping, for communication within the same data center std::unordered_map _preferred_ip_cache, _preferred_to_endpoint; std::unique_ptr _rpc; @@ -378,7 +380,7 @@ public: ~messaging_service(); future<> start(); - future<> start_listen(locator::shared_token_metadata& stm); + future<> start_listen(locator::shared_token_metadata& stm, std::function address_to_host_id_mapper); uint16_t port() const noexcept { return _cfg.port; } @@ -455,7 +457,7 @@ private: bool is_host_banned(locator::host_id); - sstring client_metrics_domain(unsigned idx, inet_address addr) const; + sstring client_metrics_domain(unsigned idx, inet_address addr, std::optional id) const; public: // Return rpc::protocol::client for a shard which is a ip + cpuid pair. diff --git a/test/lib/cql_test_env.cc b/test/lib/cql_test_env.cc index eae4ffc455..82f5971340 100644 --- a/test/lib/cql_test_env.cc +++ b/test/lib/cql_test_env.cc @@ -755,7 +755,7 @@ private: // Once the seastar issue is fixed, we can just keep the tmp socket aliva across // the listen invoke below. tmp = {}; - _ms.invoke_on_all(&netw::messaging_service::start_listen, std::ref(_token_metadata)).get(); + _ms.invoke_on_all(&netw::messaging_service::start_listen, std::ref(_token_metadata), [host_id] (gms::inet_address ip) {return host_id; }).get(); } } catch (std::system_error& e) { // if we still hit a used port (quick other process), just shut down ms and try again. diff --git a/test/manual/message.cc b/test/manual/message.cc index 7e153edd3e..d32a9891a8 100644 --- a/test/manual/message.cc +++ b/test/manual/message.cc @@ -224,7 +224,7 @@ int main(int ac, char ** av) { auto deinit_testers = deferred_action([&testers] { testers.invoke_on_all(&tester::deinit_handler).get(); }); - messaging.invoke_on_all(&netw::messaging_service::start_listen, std::ref(token_metadata)).get(); + messaging.invoke_on_all(&netw::messaging_service::start_listen, std::ref(token_metadata), [] (gms::inet_address ip){ return locator::host_id{}; }).get(); if (config.contains("server")) { auto ip = config["server"].as(); auto cpuid = config["cpuid"].as(); From 315db647dd82b27809091fc0a6587ccff8265865 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 19 Dec 2024 13:03:38 +0200 Subject: [PATCH 25/57] consistency_level: drop templates since the same types of ranges are used by all the callers --- db/consistency_level.cc | 19 ++++++------------- db/consistency_level.hh | 10 ++-------- 2 files changed, 8 insertions(+), 21 deletions(-) diff --git a/db/consistency_level.cc b/db/consistency_level.cc index 1ecefa837e..7bdaaaa046 100644 --- a/db/consistency_level.cc +++ b/db/consistency_level.cc @@ -113,11 +113,10 @@ bool is_datacenter_local(consistency_level l) { return l == consistency_level::LOCAL_ONE || l == consistency_level::LOCAL_QUORUM; } -template > std::unordered_map count_per_dc_endpoints( const locator::effective_replication_map& erm, - const Range& live_endpoints, - const PendingRange& pending_endpoints = std::array()) { + const host_id_vector_replica_set& live_endpoints, + const host_id_vector_topology_change& pending_endpoints = host_id_vector_topology_change()) { using namespace locator; auto& rs = erm.get_replication_strategy(); @@ -148,12 +147,11 @@ std::unordered_map count_per_dc_endpoints( return dc_endpoints; } -template bool assure_sufficient_live_nodes_each_quorum( consistency_level cl, const locator::effective_replication_map& erm, - const Range& live_endpoints, - const PendingRange& pending_endpoints) { + const host_id_vector_replica_set& live_endpoints, + const host_id_vector_topology_change& pending_endpoints) { using namespace locator; auto& rs = erm.get_replication_strategy(); @@ -175,12 +173,11 @@ bool assure_sufficient_live_nodes_each_quorum( return false; } -template void assure_sufficient_live_nodes( consistency_level cl, const locator::effective_replication_map& erm, - const Range& live_endpoints, - const PendingRange& pending_endpoints) { + const host_id_vector_replica_set& live_endpoints, + const host_id_vector_topology_change& pending_endpoints) { size_t need = block_for(erm, cl); auto adjust_live_for_error = [] (size_t live, size_t pending) { @@ -228,10 +225,6 @@ void assure_sufficient_live_nodes( } } -template void assure_sufficient_live_nodes(consistency_level, const locator::effective_replication_map&, const inet_address_vector_replica_set&, const std::array&); -template void assure_sufficient_live_nodes(db::consistency_level, const locator::effective_replication_map&, const inet_address_vector_replica_set&, const utils::small_vector&); -template void assure_sufficient_live_nodes(db::consistency_level, const locator::effective_replication_map&, const host_id_vector_replica_set&, const host_id_vector_topology_change&); - host_id_vector_replica_set filter_for_query(consistency_level cl, const locator::effective_replication_map& erm, diff --git a/db/consistency_level.hh b/db/consistency_level.hh index 787702b127..f9619a97c4 100644 --- a/db/consistency_level.hh +++ b/db/consistency_level.hh @@ -61,15 +61,9 @@ is_sufficient_live_nodes(consistency_level cl, const locator::effective_replication_map& erm, const host_id_vector_replica_set& live_endpoints); -template> void assure_sufficient_live_nodes( consistency_level cl, const locator::effective_replication_map& erm, - const Range& live_endpoints, - const PendingRange& pending_endpoints = std::array()); - -extern template void assure_sufficient_live_nodes(consistency_level, const locator::effective_replication_map&, const inet_address_vector_replica_set&, const std::array&); -extern template void assure_sufficient_live_nodes(db::consistency_level, const locator::effective_replication_map&, const inet_address_vector_replica_set&, const utils::small_vector&); -extern template void assure_sufficient_live_nodes(db::consistency_level, const locator::effective_replication_map&, const host_id_vector_replica_set&, const host_id_vector_topology_change&); - + const host_id_vector_replica_set& live_endpoints, + const host_id_vector_topology_change& pending_endpoints = host_id_vector_topology_change()); } From 25eb98ecbcd763f76035152c3acda55dc82cb7a8 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 19 Dec 2024 14:46:19 +0200 Subject: [PATCH 26/57] locator: topology: drop no longer used ip based overloads --- locator/topology.hh | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/locator/topology.hh b/locator/topology.hh index 95b5c5eeed..2dec983987 100644 --- a/locator/topology.hh +++ b/locator/topology.hh @@ -323,11 +323,6 @@ public: const sstring& get_datacenter(host_id id) const { return get_location(id).dc; } - // Get datacenter of a node identified by endpoint - // The specified node must exist. - const sstring& get_datacenter(inet_address ep) const { - return get_location(ep).dc; - } // Get rack of this node const sstring& get_rack() const noexcept { @@ -338,11 +333,6 @@ public: const sstring& get_rack(host_id id) const { return get_location(id).rack; } - // Get rack of a node identified by endpoint - // The specified node must exist. - const sstring& get_rack(inet_address ep) const { - return get_location(ep).rack; - } auto get_local_dc_filter() const noexcept { return [ this, local_dc = get_datacenter() ] (auto ep) { From 8433947932cd1397730e0d0ef09168c8b9f97ce1 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 19 Dec 2024 14:52:49 +0200 Subject: [PATCH 27/57] locator: topology: remove get_location overload that works on ip and its last users --- db/virtual_tables.cc | 7 +++++-- locator/topology.cc | 18 ------------------ locator/topology.hh | 3 --- test/boost/locator_topology_test.cc | 12 +++--------- 4 files changed, 8 insertions(+), 32 deletions(-) diff --git a/db/virtual_tables.cc b/db/virtual_tables.cc index b9b3e0d5f0..95cb7523da 100644 --- a/db/virtual_tables.cc +++ b/db/virtual_tables.cc @@ -92,8 +92,11 @@ public: } set_cell(cr, "host_id", hostid.uuid()); - sstring dc = tm.get_topology().get_location(endpoint).dc; - set_cell(cr, "dc", dc); + if (tm.get_topology().has_node(hostid)) { + // Not all entries in gossiper are present in the topology + sstring dc = tm.get_topology().get_location(hostid).dc; + set_cell(cr, "dc", dc); + } if (ownership.contains(endpoint)) { set_cell(cr, "owns", ownership[endpoint]); diff --git a/locator/topology.cc b/locator/topology.cc index 179b1ae87f..12f3ae6c23 100644 --- a/locator/topology.cc +++ b/locator/topology.cc @@ -555,24 +555,6 @@ bool topology::has_endpoint(inet_address ep) const return has_node(ep); } -const endpoint_dc_rack& topology::get_location(const inet_address& ep) const { - if (auto node = find_node(ep)) { - return node->dc_rack(); - } - // We should do the following check after lookup in nodes. - // In tests, there may be no config for local node, so fall back to get_location() - // only if no mapping is found. Otherwise, get_location() will return empty location - // from config or random node, neither of which is correct. - if (ep == _cfg.this_endpoint) { - return get_location(); - } - // FIXME -- this shouldn't happen. After topology is stable and is - // correctly populated with endpoints, this should be replaced with - // on_internal_error() - tlogger.warn("Requested location for node {} not in topology. backtrace {}", ep, lazy_backtrace()); - return endpoint_dc_rack::default_location; -} - void topology::sort_by_proximity(locator::host_id address, host_id_vector_replica_set& addresses) const { if (can_sort_by_proximity()) { do_sort_by_proximity(address, addresses); diff --git a/locator/topology.hh b/locator/topology.hh index 2dec983987..6dfe80869b 100644 --- a/locator/topology.hh +++ b/locator/topology.hh @@ -310,9 +310,6 @@ public: const endpoint_dc_rack& get_location(host_id id) const { return find_node(id)->dc_rack(); } - // Get dc/rack location of a node identified by endpoint - // The specified node must exist. - const endpoint_dc_rack& get_location(const inet_address& ep) const; // Get datacenter of this node const sstring& get_datacenter() const noexcept { diff --git a/test/boost/locator_topology_test.cc b/test/boost/locator_topology_test.cc index c41ded826c..99e815e847 100644 --- a/test/boost/locator_topology_test.cc +++ b/test/boost/locator_topology_test.cc @@ -122,7 +122,7 @@ SEASTAR_THREAD_TEST_CASE(test_update_node) { auto node = const_cast(topo.this_node()); topo.update_node(*node, std::nullopt, ep1, std::nullopt, std::nullopt); - + BOOST_REQUIRE_EQUAL(topo.find_node(id1), node); BOOST_REQUIRE_THROW(topo.update_node(*node, host_id::create_null_id(), std::nullopt, std::nullopt, std::nullopt), std::runtime_error); @@ -138,31 +138,27 @@ SEASTAR_THREAD_TEST_CASE(test_update_node) { topo.update_node(*node, std::nullopt, std::nullopt, dc_rack1, std::nullopt); BOOST_REQUIRE(topo.get_location(id1) == dc_rack1); - BOOST_REQUIRE(topo.get_location(ep1) == dc_rack1); auto dc_rack2 = endpoint_dc_rack{"DC2", "RACK2"}; topo.update_node(*node, std::nullopt, std::nullopt, dc_rack2, std::nullopt); BOOST_REQUIRE(topo.get_location(id1) == dc_rack2); - BOOST_REQUIRE(topo.get_location(ep1) == dc_rack2); BOOST_REQUIRE_NE(node->get_state(), locator::node::state::being_decommissioned); topo.update_node(*node, std::nullopt, std::nullopt, std::nullopt, locator::node::state::being_decommissioned); - + BOOST_REQUIRE_EQUAL(node->get_state(), locator::node::state::being_decommissioned); auto dc_rack3 = endpoint_dc_rack{"DC3", "RACK3"}; // Note: engage state option, but keep node::state value the same // to reproduce #13502 topo.update_node(*node, std::nullopt, ep3, dc_rack3, locator::node::state::being_decommissioned); - + BOOST_REQUIRE_EQUAL(topo.find_node(id1), node); BOOST_REQUIRE_EQUAL(topo.find_node(ep1), nullptr); BOOST_REQUIRE_EQUAL(topo.find_node(ep2), nullptr); BOOST_REQUIRE_EQUAL(topo.find_node(ep3), node); BOOST_REQUIRE(topo.get_location(id1) == dc_rack3); - BOOST_REQUIRE(topo.get_location(ep2) == endpoint_dc_rack::default_location); - BOOST_REQUIRE(topo.get_location(ep3) == dc_rack3); BOOST_REQUIRE_EQUAL(node->get_state(), locator::node::state::being_decommissioned); // In state::left the node will remain indexed only by its host_id @@ -172,8 +168,6 @@ SEASTAR_THREAD_TEST_CASE(test_update_node) { BOOST_REQUIRE_EQUAL(topo.find_node(ep2), nullptr); BOOST_REQUIRE_EQUAL(topo.find_node(ep3), nullptr); BOOST_REQUIRE(topo.get_location(id1) == dc_rack3); - BOOST_REQUIRE(topo.get_location(ep2) == endpoint_dc_rack::default_location); - BOOST_REQUIRE(topo.get_location(ep3) == endpoint_dc_rack::default_location); BOOST_REQUIRE_EQUAL(node->get_state(), locator::node::state::left); } From 2ea8df2cf5b948bee21fe0025c0ae77f4e9b180c Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 19 Dec 2024 15:31:30 +0200 Subject: [PATCH 28/57] storage_proxy: drop is_alive that works on ip since it is not used any more --- service/storage_proxy.cc | 22 +++++++--------------- service/storage_proxy.hh | 3 +-- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc index 86a7958bcc..ecb059bef9 100644 --- a/service/storage_proxy.cc +++ b/service/storage_proxy.cc @@ -186,10 +186,6 @@ locator::host_id storage_proxy::my_host_id(const locator::effective_replication_ return erm.get_topology().my_host_id(); } -bool storage_proxy::is_me(gms::inet_address addr) const noexcept { - return local_db().get_token_metadata().get_topology().is_me(addr); -} - bool storage_proxy::is_me(const locator::effective_replication_map& erm, locator::host_id id) const noexcept { return erm.get_topology().is_me(id); } @@ -3392,7 +3388,7 @@ storage_proxy::create_write_response_handler_helper(schema_ptr s, const dht::tok live_endpoints.reserve(all.size()); dead_endpoints.reserve(all.size()); std::partition_copy(all.begin(), all.end(), std::back_inserter(live_endpoints), - std::back_inserter(dead_endpoints), std::bind_front(&storage_proxy::is_alive_id, this, std::cref(*erm))); + std::back_inserter(dead_endpoints), std::bind_front(&storage_proxy::is_alive, this, std::cref(*erm))); db::per_partition_rate_limit::info rate_limit_info; if (allow_limit && _db.local().can_apply_per_partition_rate_limit(*s, db::operation_type::write)) { @@ -3769,7 +3765,7 @@ storage_proxy::get_paxos_participants(const sstring& ks_name, const locator::eff auto all_as_spans = std::array{std::span(natural_endpoints), std::span(pending_endpoints)}; std::ranges::copy(all_as_spans | std::views::join | - std::views::filter(std::bind_front(&storage_proxy::is_alive_id, this, std::cref(erm))), std::back_inserter(live_endpoints)); + std::views::filter(std::bind_front(&storage_proxy::is_alive, this, std::cref(erm))), std::back_inserter(live_endpoints)); if (live_endpoints.size() < required_participants) { throw exceptions::unavailable_exception(cl_for_paxos, required_participants, live_endpoints.size()); @@ -4019,7 +4015,7 @@ storage_proxy::mutate_atomically_result(std::vector mutations, db::con std::ranges::to>()); } auto local_rack = topology.get_rack(); - auto chosen_endpoints = endpoint_filter(std::bind_front(&storage_proxy::is_alive_id, &_p, std::cref(*_ermp)), local_addr, + auto chosen_endpoints = endpoint_filter(std::bind_front(&storage_proxy::is_alive, &_p, std::cref(*_ermp)), local_addr, local_rack, local_token_owners); if (chosen_endpoints.empty()) { @@ -4173,7 +4169,7 @@ future<> storage_proxy::send_to_endpoint( std::array{std::span(pending_endpoints), std::span(target.begin(), target.end())} | std::views::join, std::inserter(targets, targets.begin()), std::back_inserter(dead_endpoints), - std::bind_front(&storage_proxy::is_alive_id, this, std::cref(*erm))); + std::bind_front(&storage_proxy::is_alive, this, std::cref(*erm))); slogger.trace("Creating write handler with live: {}; dead: {}", targets, dead_endpoints); db::assure_sufficient_live_nodes(cl, *erm, targets, pending_endpoints); return create_write_response_handler( @@ -6600,7 +6596,7 @@ future storage_proxy::cas(schema_ptr schema, shared_ptr reque host_id_vector_replica_set storage_proxy::get_live_endpoints(const locator::effective_replication_map& erm, const dht::token& token) const { host_id_vector_replica_set eps = erm.get_natural_replicas(token); - auto itend = std::ranges::remove_if(eps, std::not_fn(std::bind_front(&storage_proxy::is_alive_id, this, std::cref(erm)))).begin(); + auto itend = std::ranges::remove_if(eps, std::not_fn(std::bind_front(&storage_proxy::is_alive, this, std::cref(erm)))).begin(); eps.erase(itend, eps.end()); return eps; } @@ -6625,7 +6621,7 @@ void storage_proxy::sort_endpoints_by_proximity(const locator::effective_replica host_id_vector_replica_set storage_proxy::get_endpoints_for_reading(const sstring& ks_name, const locator::effective_replication_map& erm, const dht::token& token) const { auto endpoints = erm.get_replicas_for_reading(token); validate_read_replicas(erm, endpoints); - auto it = std::ranges::remove_if(endpoints, std::not_fn(std::bind_front(&storage_proxy::is_alive_id, this, std::cref(erm)))).begin(); + auto it = std::ranges::remove_if(endpoints, std::not_fn(std::bind_front(&storage_proxy::is_alive, this, std::cref(erm)))).begin(); endpoints.erase(it, endpoints.end()); sort_endpoints_by_proximity(erm, endpoints); return endpoints; @@ -6662,11 +6658,7 @@ storage_proxy::filter_replicas_for_read( return filter_replicas_for_read(cl, erm, live_endpoints, preferred_endpoints, db::read_repair_decision::NONE, nullptr, cf); } -bool storage_proxy::is_alive(const gms::inet_address& ep) const { - return _remote ? _remote->is_alive(ep) : is_me(ep); -} - -bool storage_proxy::is_alive_id(const locator::effective_replication_map& erm, const locator::host_id& ep) const { +bool storage_proxy::is_alive(const locator::effective_replication_map& erm, const locator::host_id& ep) const { return is_me(erm, ep) || (_remote ? _remote->is_alive(ep) : false); } diff --git a/service/storage_proxy.hh b/service/storage_proxy.hh index 6c1b0812df..a307a4ffb0 100644 --- a/service/storage_proxy.hh +++ b/service/storage_proxy.hh @@ -362,8 +362,7 @@ private: host_id_vector_replica_set filter_replicas_for_read(db::consistency_level, const locator::effective_replication_map&, host_id_vector_replica_set live_endpoints, const host_id_vector_replica_set& preferred_endpoints, db::read_repair_decision, std::optional* extra, replica::column_family*) const; // As above with read_repair_decision=NONE, extra=nullptr. host_id_vector_replica_set filter_replicas_for_read(db::consistency_level, const locator::effective_replication_map&, const host_id_vector_replica_set& live_endpoints, const host_id_vector_replica_set& preferred_endpoints, replica::column_family*) const; - bool is_alive(const gms::inet_address&) const; - bool is_alive_id(const locator::effective_replication_map& erm, const locator::host_id&) const; + bool is_alive(const locator::effective_replication_map& erm, const locator::host_id&) const; result<::shared_ptr> get_read_executor(lw_shared_ptr cmd, locator::effective_replication_map_ptr ermp, schema_ptr schema, From 8a0fea5fef19755211769bc8910b3e6eaa51cfd5 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 19 Dec 2024 15:32:30 +0200 Subject: [PATCH 29/57] locator: topology: drop is_me ip overload along with remaning users --- db/system_keyspace.cc | 2 +- locator/topology.hh | 4 ---- service/qos/service_level_controller.cc | 2 +- service/storage_service.hh | 2 +- 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/db/system_keyspace.cc b/db/system_keyspace.cc index 419e201d76..01edce0b7e 100644 --- a/db/system_keyspace.cc +++ b/db/system_keyspace.cc @@ -2080,7 +2080,7 @@ future<> system_keyspace::update_peer_info(gms::inet_address ep, locator::host_i if (!hid) { on_internal_error(slogger, format("update_peer_info called with empty host_id, ep {}", ep)); } - if (_db.get_token_metadata().get_topology().is_me(ep)) { + if (_db.get_token_metadata().get_topology().is_me(hid)) { on_internal_error(slogger, format("update_peer_info called for this node: {}", ep)); } diff --git a/locator/topology.hh b/locator/topology.hh index 6dfe80869b..01941b63e4 100644 --- a/locator/topology.hh +++ b/locator/topology.hh @@ -393,10 +393,6 @@ public: return id == my_host_id(); } - bool is_me(const inet_address& addr) const noexcept { - return addr == my_address(); - } - private: using random_engine_type = std::mt19937_64; diff --git a/service/qos/service_level_controller.cc b/service/qos/service_level_controller.cc index 9c1ebeb6c6..bdcbaac2d5 100644 --- a/service/qos/service_level_controller.cc +++ b/service/qos/service_level_controller.cc @@ -894,7 +894,7 @@ future<> service_level_controller::do_remove_service_level(sstring name, bool re void service_level_controller::on_join_cluster(const gms::inet_address& endpoint) { } void service_level_controller::on_leave_cluster(const gms::inet_address& endpoint, const locator::host_id& hid) { - if (this_shard_id() == global_controller && _token_metadata.get()->get_topology().is_me(endpoint)) { + if (this_shard_id() == global_controller && _token_metadata.get()->get_topology().is_me(hid)) { _global_controller_db->dist_data_update_aborter.request_abort(); _global_controller_db->group0_aborter.request_abort(); } diff --git a/service/storage_service.hh b/service/storage_service.hh index 40da8c869c..b9e012d1c1 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -327,7 +327,7 @@ private: return get_token_metadata_ptr()->get_topology().my_host_id(); } bool is_me(inet_address addr) const noexcept { - return get_token_metadata_ptr()->get_topology().is_me(addr); + return addr == get_broadcast_address(); } bool is_me(locator::host_id id) const noexcept { return get_token_metadata_ptr()->get_topology().is_me(id); From 415e8de36e879c556480bf32585a0a16a2ff281f Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 23 Dec 2024 13:43:43 +0200 Subject: [PATCH 30/57] locator: topology: change get_datacenter_endpoints and get_datacenter_racks to return host ids and amend users --- alternator/server.cc | 5 +++-- locator/topology.cc | 4 ++-- locator/topology.hh | 8 ++++---- test/boost/locator_topology_test.cc | 18 +++++++++--------- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/alternator/server.cc b/alternator/server.cc index f20f52786c..8a9a7d6884 100644 --- a/alternator/server.cc +++ b/alternator/server.cc @@ -217,7 +217,7 @@ protected: // If the DC does not exist, we return an empty list - not an error. sstring query_dc = req->get_query_param("dc"); sstring local_dc = query_dc.empty() ? topology.get_datacenter() : query_dc; - std::unordered_set local_dc_nodes; + std::unordered_set local_dc_nodes; const auto& endpoints = topology.get_datacenter_endpoints(); auto dc_it = endpoints.find(local_dc); if (dc_it != endpoints.end()) { @@ -227,7 +227,8 @@ protected: // DC, unless a single rack is selected by the "rack" query option. // If the rack does not exist, we return an empty list - not an error. sstring query_rack = req->get_query_param("rack"); - for (auto& ip : local_dc_nodes) { + for (auto& id : local_dc_nodes) { + auto ip = _gossiper.get_address_map().get(id); if (!query_rack.empty()) { auto rack = _gossiper.get_application_state_value(ip, gms::application_state::RACK); if (rack != query_rack) { diff --git a/locator/topology.cc b/locator/topology.cc index 12f3ae6c23..6c1417ca6e 100644 --- a/locator/topology.cc +++ b/locator/topology.cc @@ -394,7 +394,7 @@ void topology::index_node(const node& node) { if (!node.left() && !node.is_none()) { const auto& dc = node.dc_rack().dc; const auto& rack = node.dc_rack().rack; - const auto& endpoint = node.endpoint(); + const auto& endpoint = node.host_id(); _dc_nodes[dc].emplace(std::cref(node)); _dc_rack_nodes[dc][rack].emplace(std::cref(node)); _dc_endpoints[dc].insert(endpoint); @@ -416,7 +416,7 @@ void topology::unindex_node(const node& node) { bool found = _dc_nodes.at(dc).erase(std::cref(node)); if (found) { if (auto dit = _dc_endpoints.find(dc); dit != _dc_endpoints.end()) { - const auto& ep = node.endpoint(); + const auto& ep = node.host_id(); auto& eps = dit->second; eps.erase(ep); if (eps.empty()) { diff --git a/locator/topology.hh b/locator/topology.hh index 01941b63e4..8163c0e706 100644 --- a/locator/topology.hh +++ b/locator/topology.hh @@ -271,7 +271,7 @@ public: bool has_endpoint(inet_address) const; const std::unordered_map>& + std::unordered_set>& get_datacenter_endpoints() const { return _dc_endpoints; } @@ -292,7 +292,7 @@ public: const std::unordered_map>>& + std::unordered_set>>& get_datacenter_racks() const { return _dc_racks; } @@ -424,13 +424,13 @@ private: /** multi-map: DC -> endpoints in that DC */ std::unordered_map> + std::unordered_set> _dc_endpoints; /** map: DC -> (multi-map: rack -> endpoints in that rack) */ std::unordered_map>> + std::unordered_set>> _dc_racks; bool _sort_by_proximity = true; diff --git a/test/boost/locator_topology_test.cc b/test/boost/locator_topology_test.cc index 99e815e847..ccb321a11d 100644 --- a/test/boost/locator_topology_test.cc +++ b/test/boost/locator_topology_test.cc @@ -207,8 +207,8 @@ SEASTAR_THREAD_TEST_CASE(test_add_or_update_by_host_id) { } SEASTAR_THREAD_TEST_CASE(test_remove_endpoint) { - using dc_endpoints_t = std::unordered_map>; - using dc_racks_t = std::unordered_map>>; + using dc_endpoints_t = std::unordered_map>; + using dc_racks_t = std::unordered_map>>; using dcs_t = std::unordered_set; const auto id1 = host_id::create_random_id(); @@ -235,19 +235,19 @@ SEASTAR_THREAD_TEST_CASE(test_remove_endpoint) { topo.add_or_update_endpoint(id1, ep1, dc_rack1, node::state::normal); topo.add_node(id2, ep2, dc_rack2, node::state::normal); - BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{{"dc1", {ep1, ep2}}})); - BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{{"dc1", {{"rack1", {ep1}}, {"rack2", {ep2}}}}})); + BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{{"dc1", {id1, id2}}})); + BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{{"dc1", {{"rack1", {id1}}, {"rack2", {id2}}}}})); BOOST_REQUIRE_EQUAL(topo.get_datacenters(), (dcs_t{"dc1"})); topo.remove_endpoint(id2); - BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{{"dc1", {ep1}}})); - BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{{"dc1", {{"rack1", {ep1}}}}})); + BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{{"dc1", {id1}}})); + BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{{"dc1", {{"rack1", {id1}}}}})); BOOST_REQUIRE_EQUAL(topo.get_datacenters(), (dcs_t{"dc1"})); // Local endpoint cannot be removed topo.remove_endpoint(id1); - BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{{"dc1", {ep1}}})); - BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{{"dc1", {{"rack1", {ep1}}}}})); + BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{{"dc1", {id1}}})); + BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{{"dc1", {{"rack1", {id1}}}}})); BOOST_REQUIRE_EQUAL(topo.get_datacenters(), (dcs_t{"dc1"})); } @@ -411,7 +411,7 @@ SEASTAR_THREAD_TEST_CASE(test_left_node_is_kept_outside_dc) { } // left nodes are not members. - BOOST_REQUIRE(!topo.get_datacenter_endpoints().at(dc_rack1.dc).contains(ep3)); + BOOST_REQUIRE(!topo.get_datacenter_endpoints().at(dc_rack1.dc).contains(id3)); BOOST_REQUIRE(topo.get_datacenter(id3) == dc_rack1.dc); BOOST_REQUIRE(topo.get_rack(id3) == dc_rack1.rack); From c7d08fe1fec2eac557903e470a8bc757e9806fd3 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 25 Dec 2024 17:17:03 +0200 Subject: [PATCH 31/57] storage_service: change get_dc_rack_for() to work on host ids --- service/storage_service.cc | 20 ++++++++++---------- service/storage_service.hh | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 74c30d6d5c..a44ed865bc 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -2334,7 +2334,7 @@ future<> storage_service::handle_state_bootstrap(inet_address endpoint, gms::per } tmptr->remove_endpoint(host_id); } - tmptr->update_topology(host_id, get_dc_rack_for(endpoint), locator::node::state::bootstrapping); + tmptr->update_topology(host_id, get_dc_rack_for(host_id), locator::node::state::bootstrapping); tmptr->add_bootstrap_tokens(tokens, host_id); tmptr->update_host_id(host_id, endpoint); @@ -2544,7 +2544,7 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit do_notify_joined = true; } - const auto dc_rack = get_dc_rack_for(endpoint); + const auto dc_rack = get_dc_rack_for(host_id); tmptr->update_topology(host_id, dc_rack, locator::node::state::normal); co_await tmptr->update_normal_tokens(owned_tokens, host_id); if (replaced_id) { @@ -2661,7 +2661,7 @@ future<> storage_service::on_alive(gms::inet_address endpoint, gms::endpoint_sta } else { auto tmlock = co_await get_token_metadata_lock(); auto tmptr = co_await get_mutable_token_metadata_ptr(); - const auto dc_rack = get_dc_rack_for(endpoint); + const auto dc_rack = get_dc_rack_for(host_id); tmptr->update_host_id(host_id, endpoint); tmptr->update_topology(host_id, dc_rack); co_await replicate_to_all_cores(std::move(tmptr)); @@ -2881,7 +2881,7 @@ std::optional storage_service::get_dc_rack_for(const }; } -std::optional storage_service::get_dc_rack_for(inet_address endpoint) { +std::optional storage_service::get_dc_rack_for(locator::host_id endpoint) { auto eps = _gossiper.get_endpoint_state_ptr(endpoint); if (!eps) { return std::nullopt; @@ -3410,12 +3410,12 @@ storage_service::prepare_replacement_info(std::unordered_set } } - auto dc_rack = get_dc_rack_for(replace_address).value_or(locator::endpoint_dc_rack::default_location); - if (!replace_host_id) { replace_host_id = _gossiper.get_host_id(replace_address); } + auto dc_rack = get_dc_rack_for(replace_host_id).value_or(locator::endpoint_dc_rack::default_location); + auto ri = replacement_info { .tokens = std::move(tokens), .dc_rack = std::move(dc_rack), @@ -4568,7 +4568,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad // excluded from normal_endpoints (maybe_remove_node_being_replaced function). // In handle_state_normal we'll remap the IP to the new host_id. tmptr->update_topology(existing_node_id, std::nullopt, locator::node::state::being_replaced); - tmptr->update_topology(replacing_node_id, get_dc_rack_for(replacing_node), locator::node::state::replacing); + tmptr->update_topology(replacing_node_id, get_dc_rack_for(replacing_node_id), locator::node::state::replacing); tmptr->update_host_id(replacing_node_id, replacing_node); tmptr->add_replacing_endpoint(existing_node_id, replacing_node_id); } @@ -4586,7 +4586,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad req.ops_uuid, replacing_node, replacing_node_id, existing_node, existing_node_id, coordinator, *coordinator_host_id); tmptr->del_replacing_endpoint(existing_node_id); - const auto dc_rack = get_dc_rack_for(replacing_node); + const auto dc_rack = get_dc_rack_for(replacing_node_id); tmptr->update_topology(existing_node_id, dc_rack, locator::node::state::normal); tmptr->remove_endpoint(replacing_node_id); } @@ -4632,7 +4632,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad auto& endpoint = x.first; auto tokens = std::unordered_set(x.second.begin(), x.second.end()); const auto host_id = *coordinator_host_id; - const auto dc_rack = get_dc_rack_for(endpoint); + const auto dc_rack = get_dc_rack_for(host_id); slogger.info("bootstrap[{}]: Added node={}/{} as bootstrap, coordinator={}/{}", req.ops_uuid, endpoint, host_id, coordinator, *coordinator_host_id); tmptr->update_host_id(host_id, endpoint); @@ -5368,7 +5368,7 @@ future<> storage_service::update_topology_change_info(mutable_token_metadata_ptr return std::nullopt; } - return get_dc_rack_for(tm.get_endpoint_for_host_id(host_id)); + return get_dc_rack_for(host_id); }); co_await tmptr->update_topology_change_info(get_dc_rack_by_host_id); } catch (...) { diff --git a/service/storage_service.hh b/service/storage_service.hh index b9e012d1c1..9700235bc8 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -530,7 +530,7 @@ private: std::unordered_set get_tokens_for(inet_address endpoint); std::optional get_dc_rack_for(const gms::endpoint_state& ep_state); - std::optional get_dc_rack_for(inet_address endpoint); + std::optional get_dc_rack_for(locator::host_id endpoint); private: // Should be serialized under token_metadata_lock. future<> replicate_to_all_cores(mutable_token_metadata_ptr tmptr) noexcept; From ae8dc595e1df2375c9037a33ca2a414f202033f9 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 25 Dec 2024 17:24:20 +0200 Subject: [PATCH 32/57] hints: move id to ip translation into store_hint() function Also use gossiper to translate instead of token_metadata since we want to get rid of ip base APIs there. --- db/hints/manager.cc | 3 ++- db/hints/manager.hh | 2 +- service/storage_proxy.cc | 6 ++---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/db/hints/manager.cc b/db/hints/manager.cc index 7cdb515352..015d492b78 100644 --- a/db/hints/manager.cc +++ b/db/hints/manager.cc @@ -430,9 +430,10 @@ bool manager::have_ep_manager(const std::variant(ep)); } -bool manager::store_hint(endpoint_id host_id, gms::inet_address ip, schema_ptr s, lw_shared_ptr fm, +bool manager::store_hint(endpoint_id host_id, schema_ptr s, lw_shared_ptr fm, tracing::trace_state_ptr tr_state) noexcept { + auto ip = _gossiper_anchor->get_address_map().get(host_id); if (utils::get_local_injector().enter("reject_incoming_hints")) { manager_logger.debug("Rejecting a hint to {} / {} due to an error injection", host_id, ip); ++_stats.dropped; diff --git a/db/hints/manager.hh b/db/hints/manager.hh index afe7e96648..7f075714f0 100644 --- a/db/hints/manager.hh +++ b/db/hints/manager.hh @@ -171,7 +171,7 @@ public: void register_metrics(const sstring& group_name); future<> start(shared_ptr gossiper_ptr); future<> stop(); - bool store_hint(endpoint_id host_id, gms::inet_address ip, schema_ptr s, lw_shared_ptr fm, + bool store_hint(endpoint_id host_id, schema_ptr s, lw_shared_ptr fm, tracing::trace_state_ptr tr_state) noexcept; /// \brief Changes the host_filter currently used, stopping and starting endpoint_managers relevant to the new host_filter. diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc index ecb059bef9..43b4da3806 100644 --- a/service/storage_proxy.cc +++ b/service/storage_proxy.cc @@ -1244,8 +1244,7 @@ public: tracing::trace_state_ptr tr_state) override { auto m = _mutations[hid]; if (m) { - const auto ep = ermptr->get_token_metadata().get_endpoint_for_host_id(hid); - return hm.store_hint(hid, ep, _schema, std::move(m), tr_state); + return hm.store_hint(hid, _schema, std::move(m), tr_state); } else { return false; } @@ -1303,8 +1302,7 @@ public: } virtual bool store_hint(db::hints::manager& hm, locator::host_id hid, locator::effective_replication_map_ptr ermptr, tracing::trace_state_ptr tr_state) override { - const auto ep = ermptr->get_token_metadata().get_endpoint_for_host_id(hid); - return hm.store_hint(hid, ep, _schema, _mutation, tr_state); + return hm.store_hint(hid, _schema, _mutation, tr_state); } virtual future<> apply_locally(storage_proxy& sp, storage_proxy::clock_type::time_point timeout, tracing::trace_state_ptr tr_state, db::per_partition_rate_limit::info rate_limit_info, From 593308a05189f13a797b7e4ae0daec65afbf2577 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 25 Dec 2024 17:24:38 +0200 Subject: [PATCH 33/57] node_ops, cdc: drop remaining token_metadata::get_endpoint_for_host_id() usage Use address map to translate id to ip instead. We want to drop ips from token_metadata. --- cdc/generation.cc | 2 +- node_ops/node_ops_ctl.cc | 2 +- service/storage_service.cc | 29 +++++++++++------------------ 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/cdc/generation.cc b/cdc/generation.cc index 8ace3cea7e..22645a7c27 100644 --- a/cdc/generation.cc +++ b/cdc/generation.cc @@ -402,7 +402,7 @@ future generation_service::legacy_make_new_generation(const throw std::runtime_error( format("Can't find endpoint for token {}", end)); } - const auto ep = tmptr->get_endpoint_for_host_id(*endpoint); + const auto ep = _gossiper.get_address_map().get(*endpoint); auto sc = get_shard_count(ep, _gossiper); return {sc > 0 ? sc : 1, get_sharding_ignore_msb(ep, _gossiper)}; } diff --git a/node_ops/node_ops_ctl.cc b/node_ops/node_ops_ctl.cc index 1eaaf2e333..64a1ab3618 100644 --- a/node_ops/node_ops_ctl.cc +++ b/node_ops/node_ops_ctl.cc @@ -143,7 +143,7 @@ future<> node_ops_ctl::abort_on_error(node_ops_cmd cmd, std::exception_ptr ex) n future<> node_ops_ctl::send_to_all(node_ops_cmd cmd) { req.cmd = cmd; req.ignore_nodes = ignore_nodes | - std::views::transform([&] (locator::host_id id) { return tmptr->get_endpoint_for_host_id(id); }) | + std::views::transform([&] (locator::host_id id) { return ss.gossiper().get_address_map().get(id); }) | std::ranges::to(); sstring op_desc = ::format("{}", cmd); nlogger.info("{}[{}]: Started {}", desc, uuid(), req); diff --git a/service/storage_service.cc b/service/storage_service.cc index a44ed865bc..da07059ccf 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -356,16 +356,6 @@ static future<> set_gossip_tokens(gms::gossiper& g, ); } -static std::unordered_map get_token_to_endpoint(const locator::token_metadata& tm) { - const auto& map = tm.get_token_to_endpoint(); - std::unordered_map result; - result.reserve(map.size()); - for (const auto [t, id]: map) { - result.insert({t, tm.get_endpoint_for_host_id(id)}); - } - return result; -} - /* * The helper waits for two things * 1) for schema agreement @@ -1964,7 +1954,7 @@ future<> storage_service::join_topology(sharded for (auto token : bootstrap_tokens) { auto existing = tmptr->get_endpoint(token); if (existing) { - auto eps = _gossiper.get_endpoint_state_ptr(tmptr->get_endpoint_for_host_id(*existing)); + auto eps = _gossiper.get_endpoint_state_ptr(*existing); if (eps && eps->get_update_timestamp() > gms::gossiper::clk::now() - delay) { throw std::runtime_error("Cannot replace a live node..."); } @@ -2468,7 +2458,10 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit // token_to_endpoint_map is used to track the current token owners for the purpose of removing replaced endpoints. // when any token is replaced by a new owner, we track the existing owner in `candidates_for_removal` // and eventually, if any candidate for removal ends up owning no tokens, it is removed from token_metadata. - std::unordered_map token_to_endpoint_map = get_token_to_endpoint(get_token_metadata()); + std::unordered_map token_to_endpoint_map = get_token_metadata().get_token_to_endpoint() | + std::views::transform([this] (auto& e) { + return std::make_pair(e.first, _address_map.get(e.second)); + }) | std::ranges::to(); std::unordered_set candidates_for_removal; // Here we convert endpoint tokens from gossiper to owned_tokens, which will be assigned as a new @@ -2587,7 +2580,7 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit const auto& tm = get_token_metadata(); auto ver = tm.get_ring_version(); for (auto& x : tm.get_token_to_endpoint()) { - slogger.debug("handle_state_normal: token_metadata.ring_version={}, token={} -> endpoint={}/{}", ver, x.first, tm.get_endpoint_for_host_id(x.second), x.second); + slogger.debug("handle_state_normal: token_metadata.ring_version={}, token={} -> endpoint={}/{}", ver, x.first, _address_map.get(x.second), x.second); } } _normal_state_handled_on_boot.insert(endpoint); @@ -3472,7 +3465,7 @@ storage_service::prepare_replacement_info(std::unordered_set } future> storage_service::get_ownership() { - return run_with_no_api_lock([] (storage_service& ss) { + return run_with_no_api_lock([this] (storage_service& ss) { const auto& tm = ss.get_token_metadata(); auto token_map = dht::token::describe_ownership(tm.sorted_tokens()); // describeOwnership returns tokens in an unspecified order, let's re-order them @@ -3480,7 +3473,7 @@ future> storage_service::get_ownership() { for (auto entry : token_map) { locator::host_id id = tm.get_endpoint(entry.first).value(); auto token_ownership = entry.second; - ownership[tm.get_endpoint_for_host_id(id)] += token_ownership; + ownership[_address_map.get(id)] += token_ownership; } return ownership; }); @@ -5299,10 +5292,10 @@ std::map storage_service::get_token_to_endpoint_map() { const auto& tm = get_token_metadata(); std::map result; for (const auto [t, id]: tm.get_token_to_endpoint()) { - result.insert({t, tm.get_endpoint_for_host_id(id)}); + result.insert({t, _address_map.get(id)}); } for (const auto [t, id]: tm.get_bootstrap_tokens()) { - result.insert({t, tm.get_endpoint_for_host_id(id)}); + result.insert({t, _address_map.get(id)}); } return result; } @@ -5312,7 +5305,7 @@ future> storage_service::get_tablet_to_endpoint_ma const auto& tmap = tm.tablets().get_tablet_map(table); std::map result; for (std::optional tid = tmap.first_tablet(); tid; tid = tmap.next_tablet(*tid)) { - result.emplace(tmap.get_last_token(*tid), tm.get_endpoint_for_host_id(tmap.get_primary_replica(*tid).host)); + result.emplace(tmap.get_last_token(*tid), _address_map.get(tmap.get_primary_replica(*tid).host)); co_await coroutine::maybe_yield(); } co_return result; From 7556e3d0453efb69e6f406fe5313727672618d12 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 1 Jan 2025 15:59:19 +0200 Subject: [PATCH 34/57] topology coordinator: remove gossiper entry only if host id matches provided one Currently the entry is removed only if ip is not used by any normal or transitioning node. This is done to not remove a wrong entry that just happen to use the same ip, but the same can be achieved by checking host id in the entry. --- service/storage_service.cc | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index da07059ccf..e456822588 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -428,20 +428,6 @@ future storage_service::sync_raft_t } }; - auto get_used_ips = [&, used_ips = std::optional>{}]() mutable - -> const std::unordered_set& - { - if (!used_ips) { - used_ips.emplace(); - for (const auto& [sid, rs]: boost::range::join(t.normal_nodes, t.transition_nodes)) { - if (const auto used_ip = am.find(locator::host_id{sid.uuid()})) { - used_ips->insert(*used_ip); - } - } - } - return *used_ips; - }; - using host_id_to_ip_map_t = std::unordered_map; auto get_host_id_to_ip_map = [&, map = std::optional{}]() mutable -> future { if (!map.has_value()) { @@ -464,7 +450,7 @@ future storage_service::sync_raft_t auto remove_ip = [&](inet_address ip, locator::host_id host_id, bool notify) -> future<> { sys_ks_futures.push_back(_sys_ks.local().remove_endpoint(ip)); - if (_gossiper.get_endpoint_state_ptr(ip) && !get_used_ips().contains(ip)) { + if (const auto ep = _gossiper.get_endpoint_state_ptr(ip); ep && ep->get_host_id() == host_id) { co_await _gossiper.force_remove_endpoint(ip, gms::null_permit_id); if (notify) { nodes_to_notify.left.push_back({ip, host_id}); From 8e55cc6c78fb371c09b0a7215d128a38767b7048 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Jan 2025 10:56:10 +0200 Subject: [PATCH 35/57] storage_service: fix logging When logger outputs a range it already does join, so no other join is needed. --- service/storage_service.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index e456822588..904f9ca56f 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -7457,7 +7457,7 @@ future<> storage_service::wait_for_normal_state_handled_on_boot() { static auto fmt_nodes_with_statuses = [this] (const auto& eps) { return eps | std::views::transform([this] (const auto& ep) { return ::format("({}, status={})", ep, _gossiper.get_gossip_status(ep)); - }) | std::views::join_with(','); + }); }; slogger.info("Started waiting for normal state handlers to finish"); From 70cc014307b10f76b13eff98f723c1dc8eca9b23 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Jan 2025 12:03:45 +0200 Subject: [PATCH 36/57] storage_service: ip_address_updater: check peers table instead of token_metadata whether ip was changed As part of changing IP address peers table is updated. If it has a new address the update can be skipped. --- service/storage_service.cc | 11 ++++++++++- service/storage_service.hh | 3 +++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 904f9ca56f..e5957608b7 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -909,7 +909,8 @@ class storage_service::ip_address_updater: public gms::i_endpoint_state_change_s locator::host_id id(utils::UUID(app_state_ptr->value())); rslog.debug("ip_address_updater::on_endpoint_change({}) {} {}", ev, endpoint, id); - auto prev_ip = _ss.get_token_metadata().get_endpoint_for_host_id_if_known(id); + // If id maps to different ip in peers table it needs to be updated which is done by sync_raft_topology_nodes below + std::optional prev_ip = co_await _ss.get_ip_from_peers_table(id); if (prev_ip == endpoint) { co_return; } @@ -2647,6 +2648,14 @@ future<> storage_service::on_alive(gms::inet_address endpoint, gms::endpoint_sta } } +future> storage_service::get_ip_from_peers_table(locator::host_id id) { + auto peers = co_await _sys_ks.local().load_host_ids(); + if (auto it = std::ranges::find_if(peers, [&id] (const auto& e) { return e.second == id; }); it != peers.end()) { + co_return it->first; + } + co_return std::nullopt; +} + future<> storage_service::on_change(gms::inet_address endpoint, const gms::application_state_map& states_, gms::permit_id pid) { // copy the states map locally since the coroutine may yield auto states = states_; diff --git a/service/storage_service.hh b/service/storage_service.hh index 9700235bc8..79820afcb5 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -604,6 +604,9 @@ private: future> get_changed_ranges_for_leaving(locator::vnode_effective_replication_map_ptr erm, locator::host_id endpoint); future<> maybe_reconnect_to_preferred_ip(inet_address ep, inet_address local_ip); + + // Return ip of the peers table entry with given host id + future> get_ip_from_peers_table(locator::host_id id); public: sstring get_release_version(); From 7c4c485651c998284531cfbe8671b4bf6fa459ec Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Jan 2025 13:34:43 +0200 Subject: [PATCH 37/57] host_id_or_endpoint: use gossiper to resolve ip to id and back mappings host_id_or_endpoint is a helper class that hold either id or ip and translate one into another on demand. Use gossiper to do a translation there instead of token_metadata since we want to drop ip based APIs from the later. --- db/hints/manager.cc | 8 ++++---- locator/token_metadata.cc | 14 +++++++------- locator/token_metadata.hh | 8 ++++++-- service/storage_service.cc | 2 +- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/db/hints/manager.cc b/db/hints/manager.cc index 015d492b78..1f996d193f 100644 --- a/db/hints/manager.cc +++ b/db/hints/manager.cc @@ -594,9 +594,9 @@ future<> manager::change_host_filter(host_filter filter) { // been created by mistake and they're invalid. The same for pre-host-ID hinted handoff // -- hint directories representing host IDs are NOT valid. if (hid_or_ep.has_host_id() && _uses_host_id) { - return std::make_optional(pair_type{hid_or_ep.id(), hid_or_ep.resolve_endpoint(*tmptr)}); + return std::make_optional(pair_type{hid_or_ep.id(), hid_or_ep.resolve_endpoint(*_gossiper_anchor)}); } else if (hid_or_ep.has_endpoint() && !_uses_host_id) { - return std::make_optional(pair_type{hid_or_ep.resolve_id(*tmptr), hid_or_ep.endpoint()}); + return std::make_optional(pair_type{hid_or_ep.resolve_id(*_gossiper_anchor), hid_or_ep.endpoint()}); } else { return std::nullopt; } @@ -817,7 +817,7 @@ future<> manager::initialize_endpoint_managers() { const auto maybe_host_id = std::invoke([&] () -> std::optional { try { - return maybe_host_id_or_ep->resolve_id(*tmptr); + return maybe_host_id_or_ep->resolve_id(*_gossiper_anchor); } catch (...) { return std::nullopt; } @@ -869,7 +869,7 @@ future<> manager::migrate_ip_directories() { continue; } - const locator::host_id host_id = hid_or_ep.resolve_id(*tmptr); + const locator::host_id host_id = hid_or_ep.resolve_id(*_gossiper_anchor); dirs_to_rename.push_back({.current_name = std::move(directory), .new_name = host_id.to_sstring()}); } catch (...) { // We cannot map the IP to the corresponding host ID either because diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index 3c369e44ce..75603c58e6 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -22,7 +22,7 @@ #include #include "utils/assert.hh" #include "utils/stall_free.hh" - +#include "gms/gossiper.hh" namespace locator { static logging::logger tlogger("token_metadata"); @@ -1391,22 +1391,22 @@ host_id_or_endpoint::host_id_or_endpoint(const sstring& s, param_type restrict) } } -host_id host_id_or_endpoint::resolve_id(const token_metadata& tm) const { +host_id host_id_or_endpoint::resolve_id(const gms::gossiper& g) const { if (has_host_id()) { return id(); } - auto opt_id = tm.get_host_id_if_known(endpoint()); - if (!opt_id) { + try { + return g.get_host_id(endpoint()); + } catch (...) { throw std::runtime_error(format("Host inet address {} not found in the cluster", endpoint())); } - return *opt_id; } -gms::inet_address host_id_or_endpoint::resolve_endpoint(const token_metadata& tm) const { +gms::inet_address host_id_or_endpoint::resolve_endpoint(const gms::gossiper& g) const { if (has_endpoint()) { return endpoint(); } - auto endpoint_opt = tm.get_endpoint_for_host_id_if_known(id()); + auto endpoint_opt = g.get_address_map().find(id()); if (!endpoint_opt) { throw std::runtime_error(format("Host ID {} not found in the cluster", id())); } diff --git a/locator/token_metadata.hh b/locator/token_metadata.hh index 541c91f086..b842076124 100644 --- a/locator/token_metadata.hh +++ b/locator/token_metadata.hh @@ -37,6 +37,10 @@ namespace replica { class keyspace; } +namespace gms { +class gossiper; +} + namespace locator { class abstract_replication_strategy; @@ -75,8 +79,8 @@ struct host_id_or_endpoint { // Map the host_id to endpoint or vice verse, using the token_metadata. // Throws runtime error if failed to resolve. - host_id resolve_id(const token_metadata&) const; - gms::inet_address resolve_endpoint(const token_metadata&) const; + host_id resolve_id(const gms::gossiper&) const; + gms::inet_address resolve_endpoint(const gms::gossiper&) const; }; using host_id_or_endpoint_list = std::vector; diff --git a/service/storage_service.cc b/service/storage_service.cc index e5957608b7..f611727e20 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -4189,7 +4189,7 @@ future<> storage_service::removenode(locator::host_id host_id, locator::host_id_ } for (auto& hoep : ignore_nodes_params) { - ctl.ignore_nodes.insert(hoep.resolve_id(*tmptr)); + ctl.ignore_nodes.insert(hoep.resolve_id(ss._gossiper)); } if (!removed_from_token_ring) { From fcfd0050234e5da11259e54bcd99fb0f52776ed7 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Jan 2025 13:48:24 +0200 Subject: [PATCH 38/57] token_metadata: drop no longer used functions --- locator/token_metadata.cc | 32 -------------------------------- locator/token_metadata.hh | 7 ------- 2 files changed, 39 deletions(-) diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index 75603c58e6..3320e37ed1 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -153,12 +153,6 @@ public: /** Return the unique host ID for an end-point. */ host_id get_host_id(inet_address endpoint) const; - /// Return the unique host ID for an end-point or nullopt if not found. - std::optional get_host_id_if_known(inet_address endpoint) const; - - /** Return the end-point for a unique host ID or nullopt if not found.*/ - std::optional get_endpoint_for_host_id_if_known(host_id) const; - /** Return the end-point for a unique host ID.*/ inet_address get_endpoint_for_host_id(host_id) const; @@ -543,22 +537,6 @@ host_id token_metadata_impl::get_host_id(inet_address endpoint) const { } } -std::optional token_metadata_impl::get_host_id_if_known(inet_address endpoint) const { - if (const auto* node = _topology.find_node(endpoint)) [[likely]] { - return node->host_id(); - } else { - return std::nullopt; - } -} - -std::optional token_metadata_impl::get_endpoint_for_host_id_if_known(host_id host_id) const { - if (const auto* node = _topology.find_node(host_id)) [[likely]] { - return node->endpoint(); - } else { - return std::nullopt; - } -} - inet_address token_metadata_impl::get_endpoint_for_host_id(host_id host_id) const { if (const auto* node = _topology.find_node(host_id)) [[likely]] { return node->endpoint(); @@ -1042,16 +1020,6 @@ token_metadata::get_host_id(inet_address endpoint) const { return _impl->get_host_id(endpoint); } -std::optional -token_metadata::get_host_id_if_known(inet_address endpoint) const { - return _impl->get_host_id_if_known(endpoint); -} - -std::optional -token_metadata::get_endpoint_for_host_id_if_known(host_id host_id) const { - return _impl->get_endpoint_for_host_id_if_known(host_id); -} - token_metadata::inet_address token_metadata::get_endpoint_for_host_id(host_id host_id) const { return _impl->get_endpoint_for_host_id(host_id); diff --git a/locator/token_metadata.hh b/locator/token_metadata.hh index b842076124..4ac6a2143b 100644 --- a/locator/token_metadata.hh +++ b/locator/token_metadata.hh @@ -239,13 +239,6 @@ public: /** Return the unique host ID for an end-point. */ host_id get_host_id(inet_address endpoint) const; - /// Return the unique host ID for an end-point or nullopt if not found. - std::optional get_host_id_if_known(inet_address endpoint) const; - - /** Return the end-point for a unique host ID or nullopt if not found. */ - std::optional get_endpoint_for_host_id_if_known(locator::host_id host_id) const; - - /** Return the end-point for a unique host ID */ inet_address get_endpoint_for_host_id(locator::host_id host_id) const; /** @return a copy of the endpoint-to-id map for read-only operations */ From 9197b88e48e583dcb77b191168b80cccb69ed9ae Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 8 Jan 2025 14:03:48 +0200 Subject: [PATCH 39/57] storage_service: drop loops from node ops replace_prepare handling since there can be only one replacing node The call already throw an error if there are more than one. Throw is there are zero as well and drop the loops. --- service/storage_service.cc | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index f611727e20..880faa173a 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -4534,13 +4534,17 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad slogger.warn("{}", msg); throw std::runtime_error(msg); } + if (req.replace_nodes.size() == 0) { + auto msg = ::format("replace[{}]: Replacing node was not specified", req.ops_uuid); + slogger.warn("{}", msg); + throw std::runtime_error(msg); + } if (!coordinator_host_id) { throw std::runtime_error("Coordinator host_id not found"); } - mutate_token_metadata([coordinator, coordinator_host_id, &req, this] (mutable_token_metadata_ptr tmptr) mutable { - for (auto& x: req.replace_nodes) { - auto existing_node = x.first; - auto replacing_node = x.second; + auto existing_node = req.replace_nodes.begin()->first; + auto replacing_node = req.replace_nodes.begin()->second; + mutate_token_metadata([coordinator, coordinator_host_id, existing_node, replacing_node, &req, this] (mutable_token_metadata_ptr tmptr) mutable { const auto existing_node_id = tmptr->get_host_id(existing_node); const auto replacing_node_id = *coordinator_host_id; slogger.info("replace[{}]: Added replacing_node={}/{} to replace existing_node={}/{}, coordinator={}/{}", @@ -4559,15 +4563,11 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad tmptr->update_topology(replacing_node_id, get_dc_rack_for(replacing_node_id), locator::node::state::replacing); tmptr->update_host_id(replacing_node_id, replacing_node); tmptr->add_replacing_endpoint(existing_node_id, replacing_node_id); - } return make_ready_future<>(); }).get(); auto ignore_nodes = std::move(req.ignore_nodes); - node_ops_insert(ops_uuid, coordinator, std::move(ignore_nodes), [this, coordinator, coordinator_host_id, req = std::move(req)] () mutable { - return mutate_token_metadata([this, coordinator, coordinator_host_id, req = std::move(req)] (mutable_token_metadata_ptr tmptr) mutable { - for (auto& x: req.replace_nodes) { - auto existing_node = x.first; - auto replacing_node = x.second; + node_ops_insert(ops_uuid, coordinator, std::move(ignore_nodes), [this, coordinator, coordinator_host_id, existing_node, replacing_node, req = std::move(req)] () mutable { + return mutate_token_metadata([this, coordinator, coordinator_host_id, existing_node, replacing_node, req = std::move(req)] (mutable_token_metadata_ptr tmptr) mutable { const auto existing_node_id = tmptr->get_host_id(existing_node); const auto replacing_node_id = *coordinator_host_id; slogger.info("replace[{}]: Removed replacing_node={}/{} to replace existing_node={}/{}, coordinator={}/{}", @@ -4577,7 +4577,6 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad const auto dc_rack = get_dc_rack_for(replacing_node_id); tmptr->update_topology(existing_node_id, dc_rack, locator::node::state::normal); tmptr->remove_endpoint(replacing_node_id); - } return update_topology_change_info(tmptr, ::format("replace {}", req.replace_nodes)); }); }); From 0db6136fa50c94c13aca0dba7b07e8ac0442d79c Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 8 Jan 2025 14:04:35 +0200 Subject: [PATCH 40/57] storage_service: fix indentation after the last patch --- service/storage_service.cc | 50 +++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 880faa173a..626433cd9f 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -4545,38 +4545,38 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad auto existing_node = req.replace_nodes.begin()->first; auto replacing_node = req.replace_nodes.begin()->second; mutate_token_metadata([coordinator, coordinator_host_id, existing_node, replacing_node, &req, this] (mutable_token_metadata_ptr tmptr) mutable { - const auto existing_node_id = tmptr->get_host_id(existing_node); - const auto replacing_node_id = *coordinator_host_id; - slogger.info("replace[{}]: Added replacing_node={}/{} to replace existing_node={}/{}, coordinator={}/{}", - req.ops_uuid, replacing_node, replacing_node_id, existing_node, existing_node_id, coordinator, *coordinator_host_id); + const auto existing_node_id = tmptr->get_host_id(existing_node); + const auto replacing_node_id = *coordinator_host_id; + slogger.info("replace[{}]: Added replacing_node={}/{} to replace existing_node={}/{}, coordinator={}/{}", + req.ops_uuid, replacing_node, replacing_node_id, existing_node, existing_node_id, coordinator, *coordinator_host_id); - // In case of replace-with-same-ip we need to map both host_id-s - // to the same IP. The locator::topology allows this specifically in case - // where one node is being_replaced and another is replacing, - // so here we adjust the state of the original node accordingly. - // The host_id -> IP map works as usual, and IP -> host_id will map - // IP to the being_replaced node - this is what is implied by the - // current code. The IP will be placed in pending_endpoints and - // excluded from normal_endpoints (maybe_remove_node_being_replaced function). - // In handle_state_normal we'll remap the IP to the new host_id. - tmptr->update_topology(existing_node_id, std::nullopt, locator::node::state::being_replaced); - tmptr->update_topology(replacing_node_id, get_dc_rack_for(replacing_node_id), locator::node::state::replacing); - tmptr->update_host_id(replacing_node_id, replacing_node); - tmptr->add_replacing_endpoint(existing_node_id, replacing_node_id); + // In case of replace-with-same-ip we need to map both host_id-s + // to the same IP. The locator::topology allows this specifically in case + // where one node is being_replaced and another is replacing, + // so here we adjust the state of the original node accordingly. + // The host_id -> IP map works as usual, and IP -> host_id will map + // IP to the being_replaced node - this is what is implied by the + // current code. The IP will be placed in pending_endpoints and + // excluded from normal_endpoints (maybe_remove_node_being_replaced function). + // In handle_state_normal we'll remap the IP to the new host_id. + tmptr->update_topology(existing_node_id, std::nullopt, locator::node::state::being_replaced); + tmptr->update_topology(replacing_node_id, get_dc_rack_for(replacing_node_id), locator::node::state::replacing); + tmptr->update_host_id(replacing_node_id, replacing_node); + tmptr->add_replacing_endpoint(existing_node_id, replacing_node_id); return make_ready_future<>(); }).get(); auto ignore_nodes = std::move(req.ignore_nodes); node_ops_insert(ops_uuid, coordinator, std::move(ignore_nodes), [this, coordinator, coordinator_host_id, existing_node, replacing_node, req = std::move(req)] () mutable { return mutate_token_metadata([this, coordinator, coordinator_host_id, existing_node, replacing_node, req = std::move(req)] (mutable_token_metadata_ptr tmptr) mutable { - const auto existing_node_id = tmptr->get_host_id(existing_node); - const auto replacing_node_id = *coordinator_host_id; - slogger.info("replace[{}]: Removed replacing_node={}/{} to replace existing_node={}/{}, coordinator={}/{}", - req.ops_uuid, replacing_node, replacing_node_id, existing_node, existing_node_id, coordinator, *coordinator_host_id); + const auto existing_node_id = tmptr->get_host_id(existing_node); + const auto replacing_node_id = *coordinator_host_id; + slogger.info("replace[{}]: Removed replacing_node={}/{} to replace existing_node={}/{}, coordinator={}/{}", + req.ops_uuid, replacing_node, replacing_node_id, existing_node, existing_node_id, coordinator, *coordinator_host_id); - tmptr->del_replacing_endpoint(existing_node_id); - const auto dc_rack = get_dc_rack_for(replacing_node_id); - tmptr->update_topology(existing_node_id, dc_rack, locator::node::state::normal); - tmptr->remove_endpoint(replacing_node_id); + tmptr->del_replacing_endpoint(existing_node_id); + const auto dc_rack = get_dc_rack_for(replacing_node_id); + tmptr->update_topology(existing_node_id, dc_rack, locator::node::state::normal); + tmptr->remove_endpoint(replacing_node_id); return update_topology_change_info(tmptr, ::format("replace {}", req.replace_nodes)); }); }); From a7a7cdcf429d3d78ee4ee01755f3282570f26639 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Jan 2025 14:05:51 +0200 Subject: [PATCH 41/57] storage_service: use gossiper to map ip to id in node_ops operations Replace operation is special though. In case of replacing with the same IP the gossiper will not have the mapping, and node_ops RPC unfortunately does not send host id of a replaced node. For replace we consult peers table instead to find the old owner of the IP. A node that is replacing (the coordinator of the replace) will not have it though, but luckily it is not needed since it updates metadata during join_topology() anyway. The only thing that is missing there is add_replacing_endpoint() call which the patch adds. --- service/storage_service.cc | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 626433cd9f..7e331106bb 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -1552,6 +1552,7 @@ future<> storage_service::join_topology(sharded tmptr->update_topology(ri->host_id, std::move(ri->dc_rack), locator::node::state::being_replaced); co_await tmptr->update_normal_tokens(bootstrap_tokens, ri->host_id); tmptr->update_host_id(ri->host_id, *replace_address); + tmptr->add_replacing_endpoint(ri->host_id, tmptr->get_my_id()); replaced_host_id = ri->host_id; @@ -4417,7 +4418,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad mutate_token_metadata([coordinator, &req, this] (mutable_token_metadata_ptr tmptr) mutable { for (auto& node : req.leaving_nodes) { slogger.info("removenode[{}]: Added node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator); - tmptr->add_leaving_endpoint(tmptr->get_host_id(node)); + tmptr->add_leaving_endpoint(_gossiper.get_host_id(node)); } return update_topology_change_info(tmptr, ::format("removenode {}", req.leaving_nodes)); }).get(); @@ -4425,7 +4426,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad return mutate_token_metadata([this, coordinator, req = std::move(req)] (mutable_token_metadata_ptr tmptr) mutable { for (auto& node : req.leaving_nodes) { slogger.info("removenode[{}]: Removed node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator); - tmptr->del_leaving_endpoint(tmptr->get_host_id(node)); + tmptr->del_leaving_endpoint(_gossiper.get_host_id(node)); } return update_topology_change_info(tmptr, ::format("removenode {}", req.leaving_nodes)); }); @@ -4451,7 +4452,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad }).get(); } else { slogger.info("removenode[{}]: Started to sync data for removing node={} using stream, coordinator={}", req.ops_uuid, node, coordinator); - removenode_with_stream(get_token_metadata().get_host_id(node), topo_guard, as).get(); + removenode_with_stream(_gossiper.get_host_id(node), topo_guard, as).get(); } } } else if (req.cmd == node_ops_cmd::removenode_abort) { @@ -4467,7 +4468,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad mutate_token_metadata([coordinator, &req, this] (mutable_token_metadata_ptr tmptr) mutable { for (auto& node : req.leaving_nodes) { slogger.info("decommission[{}]: Added node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator); - tmptr->add_leaving_endpoint(tmptr->get_host_id(node)); + tmptr->add_leaving_endpoint(_gossiper.get_host_id(node)); } return update_topology_change_info(tmptr, ::format("decommission {}", req.leaving_nodes)); }).get(); @@ -4544,8 +4545,13 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad } auto existing_node = req.replace_nodes.begin()->first; auto replacing_node = req.replace_nodes.begin()->second; - mutate_token_metadata([coordinator, coordinator_host_id, existing_node, replacing_node, &req, this] (mutable_token_metadata_ptr tmptr) mutable { - const auto existing_node_id = tmptr->get_host_id(existing_node); + auto existing_node_id = _sys_ks.local().load_host_ids().get()[existing_node]; + mutate_token_metadata([coordinator, coordinator_host_id, existing_node, replacing_node, existing_node_id, &req, this] (mutable_token_metadata_ptr tmptr) mutable { + if (is_me(*coordinator_host_id)) { + // A coordinor already updated token metadata in join_topology() + return make_ready_future<>(); + } + const auto replacing_node_id = *coordinator_host_id; slogger.info("replace[{}]: Added replacing_node={}/{} to replace existing_node={}/{}, coordinator={}/{}", req.ops_uuid, replacing_node, replacing_node_id, existing_node, existing_node_id, coordinator, *coordinator_host_id); @@ -4566,9 +4572,12 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad return make_ready_future<>(); }).get(); auto ignore_nodes = std::move(req.ignore_nodes); - node_ops_insert(ops_uuid, coordinator, std::move(ignore_nodes), [this, coordinator, coordinator_host_id, existing_node, replacing_node, req = std::move(req)] () mutable { - return mutate_token_metadata([this, coordinator, coordinator_host_id, existing_node, replacing_node, req = std::move(req)] (mutable_token_metadata_ptr tmptr) mutable { - const auto existing_node_id = tmptr->get_host_id(existing_node); + node_ops_insert(ops_uuid, coordinator, std::move(ignore_nodes), [this, coordinator, coordinator_host_id, existing_node, replacing_node, existing_node_id, req = std::move(req)] () mutable { + return mutate_token_metadata([this, coordinator, coordinator_host_id, existing_node, replacing_node, existing_node_id, req = std::move(req)] (mutable_token_metadata_ptr tmptr) mutable { + if (is_me(*coordinator_host_id)) { + // No need to cancel replace operation on a node replacing node since it will be killed anyway + return make_ready_future<>(); + } const auto replacing_node_id = *coordinator_host_id; slogger.info("replace[{}]: Removed replacing_node={}/{} to replace existing_node={}/{}, coordinator={}/{}", req.ops_uuid, replacing_node, replacing_node_id, existing_node, existing_node_id, coordinator, *coordinator_host_id); From 0ec9f7de64d47ba5bcaee563914ac5913963fb97 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Jan 2025 15:04:16 +0200 Subject: [PATCH 42/57] gossiper: drop get_unreachable_token_owners functions It is used by truncate code only and even there it only check if the returned set is not empty. Check for dead token owners in the truncation code directly. --- gms/gossiper.cc | 11 ----------- gms/gossiper.hh | 5 ----- service/storage_proxy.cc | 4 +++- 3 files changed, 3 insertions(+), 17 deletions(-) diff --git a/gms/gossiper.cc b/gms/gossiper.cc index a8ec00dc81..0521080f8c 100644 --- a/gms/gossiper.cc +++ b/gms/gossiper.cc @@ -1201,17 +1201,6 @@ std::set gossiper::get_live_token_owners() const { return token_owners; } -std::set gossiper::get_unreachable_token_owners() const { - std::set token_owners; - auto normal_token_owners = get_token_metadata_ptr()->get_normal_token_owners_ips(); - for (auto& node: normal_token_owners) { - if (!is_alive(node)) { - token_owners.insert(node); - } - } - return token_owners; -} - std::set gossiper::get_unreachable_nodes() const { std::set unreachable_nodes; auto nodes = get_token_metadata_ptr()->get_topology().get_all_host_ids(); diff --git a/gms/gossiper.hh b/gms/gossiper.hh index 1b647370ff..f3d5dd631c 100644 --- a/gms/gossiper.hh +++ b/gms/gossiper.hh @@ -306,11 +306,6 @@ public: std::set get_unreachable_members() const; std::set get_unreachable_host_ids() const; - /** - * @return a list of unreachable token owners - */ - std::set get_unreachable_token_owners() const; - /** * @return a list of unreachable nodes */ diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc index 43b4da3806..31d0411370 100644 --- a/service/storage_proxy.cc +++ b/service/storage_proxy.cc @@ -471,7 +471,9 @@ public: } future<> send_truncate_blocking(sstring keyspace, sstring cfname, std::chrono::milliseconds timeout_in_ms) { - if (!_gossiper.get_unreachable_token_owners().empty()) { + auto s = _sp.local_db().find_schema(keyspace, cfname); + auto erm_ptr = s->table().get_effective_replication_map(); + if (!std::ranges::all_of(erm_ptr->get_token_metadata().get_normal_token_owners(), std::bind_front(&storage_proxy::is_alive, &_sp, std::cref(*erm_ptr)))) { slogger.info("Cannot perform truncate, some hosts are down"); // Since the truncate operation is so aggressive and is typically only // invoked by an admin, for simplicity we require that all nodes are up From 3068e38baabf0e1545d1ff4d04fbe6f13102ae0e Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Jan 2025 15:23:00 +0200 Subject: [PATCH 43/57] locator: network_topology_strategy: use host_id based function to check number of endpoints in dcs --- locator/network_topology_strategy.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/locator/network_topology_strategy.cc b/locator/network_topology_strategy.cc index e59581a32e..776778b81c 100644 --- a/locator/network_topology_strategy.cc +++ b/locator/network_topology_strategy.cc @@ -235,7 +235,7 @@ public: } static void check_enough_endpoints(const token_metadata& tm, const std::unordered_map& dc_rf) { - auto dc_endpoints = tm.get_datacenter_token_owners_ips(); + auto dc_endpoints = tm.get_datacenter_token_owners(); auto endpoints_in = [&dc_endpoints](sstring dc) { auto i = dc_endpoints.find(dc); return i != dc_endpoints.end() ? i->second.size() : size_t(0); @@ -309,7 +309,7 @@ effective_replication_map_ptr network_topology_strategy::make_replication_map(ta static unsigned calculate_initial_tablets_from_topology(const schema& s, token_metadata_ptr tm, const std::unordered_map& rf) { unsigned initial_tablets = std::numeric_limits::min(); - for (const auto& dc : tm->get_datacenter_token_owners_ips()) { + for (const auto& dc : tm->get_datacenter_token_owners()) { unsigned shards_in_dc = 0; unsigned rf_in_dc = 1; From 97f95f1dbdaea51714371235493172326363f309 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Jan 2025 15:51:11 +0200 Subject: [PATCH 44/57] locator: token_metadata: remove unused ip based functions --- locator/token_metadata.cc | 77 ------------------------------- locator/token_metadata.hh | 11 +---- test/boost/token_metadata_test.cc | 1 - 3 files changed, 2 insertions(+), 87 deletions(-) diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index 3320e37ed1..27b8e99128 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -150,12 +150,6 @@ public: */ void update_host_id(const host_id& host_id, inet_address endpoint); - /** Return the unique host ID for an end-point. */ - host_id get_host_id(inet_address endpoint) const; - - /** Return the end-point for a unique host ID.*/ - inet_address get_endpoint_for_host_id(host_id) const; - /** @return a copy of host id set for read-only operations */ std::unordered_set get_host_ids() const; @@ -254,11 +248,8 @@ public: * Bootstrapping tokens are not taken into account. */ size_t count_normal_token_owners() const; - std::unordered_map> get_datacenter_token_owners_ips() const; std::unordered_map> get_datacenter_token_owners() const; - std::unordered_map>> - get_datacenter_racks_token_owners_ips() const; std::unordered_map>> get_datacenter_racks_token_owners() const; @@ -529,22 +520,6 @@ void token_metadata_impl::update_host_id(const host_id& host_id, inet_address en _topology.add_or_update_endpoint(host_id, endpoint); } -host_id token_metadata_impl::get_host_id(inet_address endpoint) const { - if (const auto* node = _topology.find_node(endpoint)) [[likely]] { - return node->host_id(); - } else { - on_internal_error(tlogger, format("host_id for endpoint {} is not found", endpoint)); - } -} - -inet_address token_metadata_impl::get_endpoint_for_host_id(host_id host_id) const { - if (const auto* node = _topology.find_node(host_id)) [[likely]] { - return node->endpoint(); - } else { - on_internal_error(tlogger, format("endpoint for host_id {} is not found", host_id)); - } -} - std::unordered_set token_metadata_impl::get_host_ids() const { return _topology.get_nodes() | std::views::filter([&] (const node& n) { return !n.left() && !n.is_none(); }) | @@ -773,17 +748,6 @@ void token_metadata_impl::for_each_token_owner(std::function }); } -std::unordered_map> -token_metadata_impl::get_datacenter_token_owners_ips() const { - std::unordered_map> datacenter_token_owners; - _topology.for_each_node([&] (const node& n) { - if (is_normal_token_owner(n.host_id())) { - datacenter_token_owners[n.dc_rack().dc].insert(n.endpoint()); - } - }); - return datacenter_token_owners; -} - std::unordered_map> token_metadata_impl::get_datacenter_token_owners() const { std::unordered_map> datacenter_token_owners; @@ -795,18 +759,6 @@ token_metadata_impl::get_datacenter_token_owners() const { return datacenter_token_owners; } -std::unordered_map>> -token_metadata_impl::get_datacenter_racks_token_owners_ips() const { - std::unordered_map>> dc_racks_token_owners; - _topology.for_each_node([&] (const node& n) { - const auto& dc_rack = n.dc_rack(); - if (is_normal_token_owner(n.host_id())) { - dc_racks_token_owners[dc_rack.dc][dc_rack.rack].insert(n.endpoint()); - } - }); - return dc_racks_token_owners; -} - std::unordered_map>> token_metadata_impl::get_datacenter_racks_token_owners() const { std::unordered_map>> dc_racks_token_owners; @@ -1015,16 +967,6 @@ token_metadata::update_host_id(const host_id& host_id, inet_address endpoint) { _impl->update_host_id(host_id, endpoint); } -host_id -token_metadata::get_host_id(inet_address endpoint) const { - return _impl->get_host_id(endpoint); -} - -token_metadata::inet_address -token_metadata::get_endpoint_for_host_id(host_id host_id) const { - return _impl->get_endpoint_for_host_id(host_id); -} - std::unordered_set token_metadata::get_host_ids() const { return _impl->get_host_ids(); @@ -1147,16 +1089,6 @@ token_metadata::get_normal_token_owners() const { return _impl->get_normal_token_owners(); } -std::unordered_set token_metadata::get_normal_token_owners_ips() const { - const auto& host_ids = _impl->get_normal_token_owners(); - std::unordered_set result; - result.reserve(host_ids.size()); - for (const auto& id: host_ids) { - result.insert(_impl->get_endpoint_for_host_id(id)); - } - return result; -} - void token_metadata::for_each_token_owner(std::function func) const { return _impl->for_each_token_owner(func); } @@ -1166,19 +1098,10 @@ token_metadata::count_normal_token_owners() const { return _impl->count_normal_token_owners(); } -std::unordered_map> token_metadata::get_datacenter_token_owners_ips() const { - return _impl->get_datacenter_token_owners_ips(); -} - std::unordered_map> token_metadata::get_datacenter_token_owners() const { return _impl->get_datacenter_token_owners(); } -std::unordered_map>> -token_metadata::get_datacenter_racks_token_owners_ips() const { - return _impl->get_datacenter_racks_token_owners_ips(); -} - std::unordered_map>> token_metadata::get_datacenter_racks_token_owners() const { return _impl->get_datacenter_racks_token_owners(); diff --git a/locator/token_metadata.hh b/locator/token_metadata.hh index 4ac6a2143b..f2c27fcb14 100644 --- a/locator/token_metadata.hh +++ b/locator/token_metadata.hh @@ -239,8 +239,6 @@ public: /** Return the unique host ID for an end-point. */ host_id get_host_id(inet_address endpoint) const; - inet_address get_endpoint_for_host_id(locator::host_id host_id) const; - /** @return a copy of the endpoint-to-id map for read-only operations */ std::unordered_set get_host_ids() const; @@ -324,23 +322,18 @@ public: const std::unordered_set& get_normal_token_owners() const; - std::unordered_set get_normal_token_owners_ips() const; - void for_each_token_owner(std::function func) const; /* Returns the number of different endpoints that own tokens in the ring. * Bootstrapping tokens are not taken into account. */ size_t count_normal_token_owners() const; - // Returns the map: DC -> addresses of token owners in that DC. + // Returns the map: DC -> host_id of token owners in that DC. // If there are no token owners in a DC, it is not present in the result. - std::unordered_map> get_datacenter_token_owners_ips() const; std::unordered_map> get_datacenter_token_owners() const; - // Returns the map: DC -> (map: rack -> addresses of token owners in that rack). + // Returns the map: DC -> (map: rack -> host_id of token owners in that rack). // If there are no token owners in a DC/rack, it is not present in the result. - std::unordered_map>> - get_datacenter_racks_token_owners_ips() const; std::unordered_map>> get_datacenter_racks_token_owners() const; diff --git a/test/boost/token_metadata_test.cc b/test/boost/token_metadata_test.cc index fe9edeb132..96aa2f9b0f 100644 --- a/test/boost/token_metadata_test.cc +++ b/test/boost/token_metadata_test.cc @@ -300,7 +300,6 @@ SEASTAR_THREAD_TEST_CASE(test_replace_node_with_same_endpoint) { token_metadata->add_replacing_endpoint(e1_id1, e1_id2); auto erm = create_erm(token_metadata, {{"replication_factor", "2"}}); - BOOST_REQUIRE_EQUAL(token_metadata->get_host_id(e1), e1_id1); BOOST_REQUIRE_EQUAL(erm->get_pending_replicas(dht::token::from_int64(1)), host_id_vector_topology_change{e1_id2}); BOOST_REQUIRE_EQUAL(erm->get_natural_replicas(dht::token::from_int64(1)), From 122d58b4ad8741016eaaa00302fb9282d4980bc6 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Jan 2025 16:21:31 +0200 Subject: [PATCH 45/57] api: view_build_statuses: do not use IP from the topology, but translate id to ip using address map instead --- api/api.cc | 4 ++-- api/api_init.hh | 2 +- api/storage_service.cc | 6 +++--- api/storage_service.hh | 2 +- db/view/view.cc | 4 ++-- db/view/view_builder.hh | 2 +- main.cc | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/api/api.cc b/api/api.cc index 670750e684..e2880dad6e 100644 --- a/api/api.cc +++ b/api/api.cc @@ -153,8 +153,8 @@ future<> unset_server_sstables_loader(http_context& ctx) { return ctx.http_server.set_routes([&ctx] (routes& r) { unset_sstables_loader(ctx, r); }); } -future<> set_server_view_builder(http_context& ctx, sharded& vb) { - return ctx.http_server.set_routes([&ctx, &vb] (routes& r) { set_view_builder(ctx, r, vb); }); +future<> set_server_view_builder(http_context& ctx, sharded& vb, sharded& g) { + return ctx.http_server.set_routes([&ctx, &vb, &g] (routes& r) { set_view_builder(ctx, r, vb, g); }); } future<> unset_server_view_builder(http_context& ctx) { diff --git a/api/api_init.hh b/api/api_init.hh index 205090c1cb..b09948a8da 100644 --- a/api/api_init.hh +++ b/api/api_init.hh @@ -100,7 +100,7 @@ future<> set_server_storage_service(http_context& ctx, sharded unset_server_storage_service(http_context& ctx); future<> set_server_sstables_loader(http_context& ctx, sharded& sst_loader); future<> unset_server_sstables_loader(http_context& ctx); -future<> set_server_view_builder(http_context& ctx, sharded& vb); +future<> set_server_view_builder(http_context& ctx, sharded& vb, sharded& g); future<> unset_server_view_builder(http_context& ctx); future<> set_server_repair(http_context& ctx, sharded& repair, sharded& am); future<> unset_server_repair(http_context& ctx); diff --git a/api/storage_service.cc b/api/storage_service.cc index 6e5cdbfbbf..b91d099027 100644 --- a/api/storage_service.cc +++ b/api/storage_service.cc @@ -533,11 +533,11 @@ void unset_sstables_loader(http_context& ctx, routes& r) { ss::start_restore.unset(r); } -void set_view_builder(http_context& ctx, routes& r, sharded& vb) { - ss::view_build_statuses.set(r, [&ctx, &vb] (std::unique_ptr req) { +void set_view_builder(http_context& ctx, routes& r, sharded& vb, sharded& g) { + ss::view_build_statuses.set(r, [&ctx, &vb, &g] (std::unique_ptr req) { auto keyspace = validate_keyspace(ctx, req); auto view = req->get_path_param("view"); - return vb.local().view_build_statuses(std::move(keyspace), std::move(view)).then([] (std::unordered_map status) { + return vb.local().view_build_statuses(std::move(keyspace), std::move(view), g.local()).then([] (std::unordered_map status) { std::vector res; return make_ready_future(map_to_key_value(std::move(status), res)); }); diff --git a/api/storage_service.hh b/api/storage_service.hh index 339bf8f37a..b26795a21b 100644 --- a/api/storage_service.hh +++ b/api/storage_service.hh @@ -75,7 +75,7 @@ void set_storage_service(http_context& ctx, httpd::routes& r, sharded& sst_loader); void unset_sstables_loader(http_context& ctx, httpd::routes& r); -void set_view_builder(http_context& ctx, httpd::routes& r, sharded& vb); +void set_view_builder(http_context& ctx, httpd::routes& r, sharded& vb, sharded& g); void unset_view_builder(http_context& ctx, httpd::routes& r); void set_repair(http_context& ctx, httpd::routes& r, sharded& repair, sharded& am); void unset_repair(http_context& ctx, httpd::routes& r); diff --git a/db/view/view.cc b/db/view/view.cc index 7f73336234..74b72dd1c2 100644 --- a/db/view/view.cc +++ b/db/view/view.cc @@ -2434,14 +2434,14 @@ future> view_builder::view_status( } future> -view_builder::view_build_statuses(sstring keyspace, sstring view_name) const { +view_builder::view_build_statuses(sstring keyspace, sstring view_name, const gms::gossiper& gossiper) const { std::unordered_map status = co_await view_status(std::move(keyspace), std::move(view_name)); std::unordered_map status_map; const auto& topo = _db.get_token_metadata().get_topology(); topo.for_each_node([&] (const locator::node& node) { auto it = status.find(node.host_id()); auto s = it != status.end() ? std::move(it->second) : "UNKNOWN"; - status_map.emplace(fmt::to_string(node.endpoint()), std::move(s)); + status_map.emplace(fmt::to_string(gossiper.get_address_map().get(node.host_id())), std::move(s)); }); co_return status_map; } diff --git a/db/view/view_builder.hh b/db/view/view_builder.hh index 89dfddc070..6b26a6eaa4 100644 --- a/db/view/view_builder.hh +++ b/db/view/view_builder.hh @@ -235,7 +235,7 @@ public: // For tests future<> wait_until_built(const sstring& ks_name, const sstring& view_name); - future> view_build_statuses(sstring keyspace, sstring view_name) const; + future> view_build_statuses(sstring keyspace, sstring view_name, const gms::gossiper& g) const; // Can only be called on shard-0 future<> mark_existing_views_as_built(); diff --git a/main.cc b/main.cc index 700cb6d83f..36c2694f73 100644 --- a/main.cc +++ b/main.cc @@ -2289,7 +2289,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl view_builder.invoke_on_all(&db::view::view_builder::start, std::ref(mm), utils::cross_shard_barrier()).get(); } - api::set_server_view_builder(ctx, view_builder).get(); + api::set_server_view_builder(ctx, view_builder, gossiper).get(); auto stop_vb_api = defer_verbose_shutdown("view builder API", [&ctx] { api::unset_server_view_builder(ctx).get(); }); From 5cd3627baafb2051ee6528ed9fa1178fdf3909d9 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Jan 2025 16:23:47 +0200 Subject: [PATCH 46/57] locator: drop unused function from tablet_effective_replication_map --- locator/tablets.cc | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/locator/tablets.cc b/locator/tablets.cc index 935a171f88..bdddae1ae7 100644 --- a/locator/tablets.cc +++ b/locator/tablets.cc @@ -765,19 +765,6 @@ class tablet_effective_replication_map : public effective_replication_map { tablet_sharder _sharder; mutable const tablet_map* _tmap = nullptr; private: - inet_address_vector_replica_set to_replica_set(const tablet_replica_set& replicas) const { - inet_address_vector_replica_set result; - result.reserve(replicas.size()); - auto& topo = _tmptr->get_topology(); - for (auto&& replica : replicas) { - auto* node = topo.find_node(replica.host); - if (node && !node->left()) { - result.emplace_back(node->endpoint()); - } - } - return result; - } - host_id_vector_replica_set to_host_set(const tablet_replica_set& replicas) const { host_id_vector_replica_set result; result.reserve(replicas.size()); From 83d15b8e32ffe911cf486e26d80212ce78d887e1 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Jan 2025 16:34:15 +0200 Subject: [PATCH 47/57] cql3: report host id instead of ip in error during SELECT FROM MUTATION_FRAGMENTS query We want to drop ip from the topology::node. --- cql3/statements/select_statement.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cql3/statements/select_statement.cc b/cql3/statements/select_statement.cc index 25d40eaf53..662bdeaf70 100644 --- a/cql3/statements/select_statement.cc +++ b/cql3/statements/select_statement.cc @@ -1820,7 +1820,7 @@ mutation_fragments_select_statement::do_execute(query_processor& qp, service::qu throw exceptions::invalid_request_exception(seastar::format( "Moving between coordinators is not allowed in SELECT FROM MUTATION_FRAGMENTS() statements, last page's coordinator was {}{}", last_host, - last_node ? fmt::format("({})", last_node->endpoint()) : "")); + last_node ? fmt::format("({})", last_node->host_id()) : "")); } } } From 49fa1130ef591c9eebd21be2fbe4bab303ed321d Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Jan 2025 16:35:13 +0200 Subject: [PATCH 48/57] topology coordinator: change connection dropping code to work on host ids Do not use ip from topology::node, but look it up in address map instead. We want to drop ip from the topology::node. --- service/storage_service.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 7e331106bb..8dc6a1783f 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -780,9 +780,9 @@ future<> storage_service::topology_state_load(state_change_hint hint) { { std::vector> futures; get_token_metadata_ptr()->get_topology().for_each_node([&](const locator::node& n) { - const auto ep = n.endpoint(); - if (ep != inet_address{} && !saved_tmpr->get_topology().has_endpoint(ep)) { - futures.push_back(remove_rpc_client_with_ignored_topology(ep, n.host_id())); + const auto ep = n.host_id(); + if (auto ip_opt = _address_map.find(ep); ip_opt && !saved_tmpr->get_topology().has_node(ep)) { + futures.push_back(remove_rpc_client_with_ignored_topology(*ip_opt, n.host_id())); } }); co_await when_all_succeed(futures.begin(), futures.end()).discard_result(); From 163099678e0bb464d519689df5c12ad8fd0a5649 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Jan 2025 16:39:39 +0200 Subject: [PATCH 49/57] storage_proxy: translate id to ip using address map in tablets's describe_ring code instead of taking one from the topology We want to drop ip from the locator::node. --- service/storage_service.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 8dc6a1783f..c5b5fe3b93 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -5278,10 +5278,11 @@ storage_service::describe_ring_for_table(const sstring& keyspace_name, const sst for (auto& r : replicas) { dht::endpoint_details details; const auto& node = topology.get_node(r.host); + const auto ip = _address_map.get(r.host); details._datacenter = node.dc_rack().dc; details._rack = node.dc_rack().rack; - details._host = node.endpoint(); - tr._rpc_endpoints.push_back(_gossiper.get_rpc_address(node.endpoint())); + details._host = ip; + tr._rpc_endpoints.push_back(_gossiper.get_rpc_address(ip)); tr._endpoints.push_back(fmt::to_string(details._host)); tr._endpoint_details.push_back(std::move(details)); } From fb28ff5176cff527ca1948a6ba40f5d6d68cf002 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Jan 2025 17:06:21 +0200 Subject: [PATCH 50/57] storage_service: check for outdated ip in on_change notification in the peers table The code checks that it does not run for an ip address that is no longer in use (after ip address change). To check that we can use peers table and see if the host id is mapped to the address. If yes, this is the latest address for this host id otherwise this is an outdated entry. --- service/storage_service.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index c5b5fe3b93..de306755a5 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -2694,14 +2694,14 @@ future<> storage_service::on_change(gms::inet_address endpoint, const gms::appli const auto host_id = _gossiper.get_host_id(endpoint); const auto& tm = get_token_metadata(); const auto* node = tm.get_topology().find_node(host_id); - // The check node->endpoint() == endpoint is needed when a node changes + // The check peers[host_id] == endpoint is needed when a node changes // its IP - on_change can be called by the gossiper for old IP as part // of its removal, after handle_state_normal has already been called for // the new one. Without the check, the do_update_system_peers_table call // overwrites the IP back to its old value. // In essence, the code under the 'if' should fire if the given IP belongs // to a cluster member. - if (node && node->is_member() && node->endpoint() == endpoint) { + if (node && node->is_member() && (co_await get_ip_from_peers_table(host_id)) == endpoint) { if (!is_me(endpoint)) { slogger.debug("endpoint={}/{} on_change: updating system.peers table", endpoint, host_id); if (auto info = get_peer_info_for_update(endpoint, states)) { From db73758655ecd45dbcbc3f6c4cdc2ae3406af73b Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Jan 2025 17:18:43 +0200 Subject: [PATCH 51/57] locator: topology: remove unused functions --- locator/topology.cc | 11 ----------- locator/topology.hh | 6 ------ 2 files changed, 17 deletions(-) diff --git a/locator/topology.cc b/locator/topology.cc index 6c1417ca6e..b4c9b971ca 100644 --- a/locator/topology.cc +++ b/locator/topology.cc @@ -544,17 +544,6 @@ bool topology::has_node(host_id id) const noexcept { return bool(node); } -bool topology::has_node(inet_address ep) const noexcept { - auto node = find_node(ep); - tlogger.trace("topology[{}]: has_node: endpoint={}: node={}", fmt::ptr(this), ep, node_printer(node)); - return bool(node); -} - -bool topology::has_endpoint(inet_address ep) const -{ - return has_node(ep); -} - void topology::sort_by_proximity(locator::host_id address, host_id_vector_replica_set& addresses) const { if (can_sort_by_proximity()) { do_sort_by_proximity(address, addresses); diff --git a/locator/topology.hh b/locator/topology.hh index 8163c0e706..27aaa69926 100644 --- a/locator/topology.hh +++ b/locator/topology.hh @@ -251,7 +251,6 @@ public: // Returns true if a node with given host_id is found bool has_node(host_id id) const noexcept; - bool has_node(inet_address id) const noexcept; /** * Stores current DC/rack assignment for ep @@ -265,11 +264,6 @@ public: bool remove_endpoint(locator::host_id ep); - /** - * Returns true iff contains given endpoint. - */ - bool has_endpoint(inet_address) const; - const std::unordered_map>& get_datacenter_endpoints() const { From d45ce6fa1240bb744acd9ab4c08315c7d1cbe063 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Jan 2025 17:35:33 +0200 Subject: [PATCH 52/57] storage_proxy: translate ips to ids in forward array using gossiper We already use it to translate reply_to, so do it for consistency and to drop ip based API usage. --- service/storage_proxy.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc index 31d0411370..5cb726b54d 100644 --- a/service/storage_proxy.cc +++ b/service/storage_proxy.cc @@ -105,13 +105,13 @@ namespace { template -utils::small_vector addr_vector_to_id(const locator::topology& topo, const utils::small_vector& set) { +utils::small_vector addr_vector_to_id(const gms::gossiper& g, const utils::small_vector& set) { return set | std::views::transform([&] (gms::inet_address ip) { - auto* node = topo.find_node(ip); - if (!node) { + try { + return g.get_host_id(ip); + } catch (...) { on_internal_error(slogger, fmt::format("addr_vector_to_id cannot map {} to host id", ip)); } - return node->host_id(); }) | std::ranges::to>(); } @@ -587,7 +587,7 @@ private: } auto reply_to_host_id = reply_to_id ? *reply_to_id : _gossiper.get_host_id(reply_to); - auto forward_host_id = forward_id ? std::move(*forward_id) : addr_vector_to_id(_sp._shared_token_metadata.get()->get_topology(), forward); + auto forward_host_id = forward_id ? std::move(*forward_id) : addr_vector_to_id(_gossiper, forward); if (reply_to_id) { _gossiper.get_mutable_address_map().opt_add_entry(reply_to_host_id, reply_to); From 12da203cae18807b266e760cdcaba515ec538b65 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Jan 2025 17:36:36 +0200 Subject: [PATCH 53/57] storage_service: use host_id to look for a node in on_alive handler --- service/storage_service.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index de306755a5..c65fb5f265 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -2633,7 +2633,7 @@ future<> storage_service::on_alive(gms::inet_address endpoint, gms::endpoint_sta const auto& tm = get_token_metadata(); const auto host_id = state->get_host_id(); slogger.debug("endpoint={}/{} on_alive: permit_id={}", endpoint, host_id, pid); - const auto* node = tm.get_topology().find_node(endpoint); + const auto* node = tm.get_topology().find_node(host_id); if (node && node->is_member()) { co_await notify_up(endpoint); } else if (raft_topology_change_enabled()) { From f9df092fd1c4b8c54416b252f81f45146faf8451 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Jan 2025 18:31:44 +0200 Subject: [PATCH 54/57] repair: drop unneeded code There is a code that creates a map from id to ip and then creates a vector from the keys of the map. Create a vector directly instead. --- repair/repair.cc | 6 +++--- repair/repair.hh | 3 --- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/repair/repair.cc b/repair/repair.cc index 51c0aa8d7d..bd54cb6d92 100644 --- a/repair/repair.cc +++ b/repair/repair.cc @@ -2150,11 +2150,11 @@ future<> repair_service::do_rebuild_replace_with_repair(std::unordered_map(); + return n.host_id(); + }) | std::ranges::to(); rlogger.debug("{}: keyspace={}, range={}, natural_enpoints={}, neighbors={}", op, keyspace_name, r, natural_eps, neighbors); if (!neighbors.empty()) { - range_sources[r] = repair_neighbors(neighbors); + range_sources[r] = repair_neighbors(std::move(neighbors)); ++it; } else { // Skip the range with zero neighbors diff --git a/repair/repair.hh b/repair/repair.hh index a65c4acc92..942ecc435b 100644 --- a/repair/repair.hh +++ b/repair/repair.hh @@ -143,9 +143,6 @@ public: explicit repair_neighbors(std::vector a) : all(std::move(a)) { } - explicit repair_neighbors(const std::unordered_map& a) - : all(a | std::views::keys | std::ranges::to>()) { - } repair_neighbors(std::vector a, std::vector m) : all(std::move(a)) , mandatory(std::move(m)) { From 50fb22c8f9fda42f16f1e94fc6e9566cf38361f4 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Jan 2025 18:33:00 +0200 Subject: [PATCH 55/57] locator: topology: drop indexing by ips Do not track id to ip mapping in the topology class any longer. There are no remaining users. --- locator/token_metadata.cc | 17 +--- locator/topology.cc | 97 +++----------------- locator/topology.hh | 27 +----- main.cc | 3 +- test/boost/locator_topology_test.cc | 97 ++++---------------- test/boost/network_topology_strategy_test.cc | 30 ++---- test/boost/tablets_test.cc | 6 +- test/lib/cql_test_env.cc | 3 +- test/perf/perf_sort_by_proximity.cc | 1 - 9 files changed, 52 insertions(+), 229 deletions(-) diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index 27b8e99128..519a664754 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -116,7 +116,7 @@ public: } void update_topology(host_id id, std::optional opt_dr, std::optional opt_st, std::optional shard_count = std::nullopt) { - _topology.add_or_update_endpoint(id, std::nullopt, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); + _topology.add_or_update_endpoint(id, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); } /** @@ -141,15 +141,6 @@ public: void debug_show() const; - /** - * Store an end-point to host ID mapping. Each ID must be unique, and - * cannot be changed after the fact. - * - * @param hostId - * @param endpoint - */ - void update_host_id(const host_id& host_id, inet_address endpoint); - /** @return a copy of host id set for read-only operations */ std::unordered_set get_host_ids() const; @@ -516,10 +507,6 @@ void token_metadata_impl::debug_show() const { reporter->arm_periodic(std::chrono::seconds(1)); } -void token_metadata_impl::update_host_id(const host_id& host_id, inet_address endpoint) { - _topology.add_or_update_endpoint(host_id, endpoint); -} - std::unordered_set token_metadata_impl::get_host_ids() const { return _topology.get_nodes() | std::views::filter([&] (const node& n) { return !n.left() && !n.is_none(); }) | @@ -964,7 +951,7 @@ token_metadata::debug_show() const { void token_metadata::update_host_id(const host_id& host_id, inet_address endpoint) { - _impl->update_host_id(host_id, endpoint); + // Do nothing for now. Remove later. } std::unordered_set diff --git a/locator/topology.cc b/locator/topology.cc index b4c9b971ca..caa4f1d2e0 100644 --- a/locator/topology.cc +++ b/locator/topology.cc @@ -55,10 +55,9 @@ thread_local const endpoint_dc_rack endpoint_dc_rack::default_location = { .rack = locator::production_snitch_base::default_rack, }; -node::node(const locator::topology* topology, locator::host_id id, inet_address endpoint, endpoint_dc_rack dc_rack, state state, shard_id shard_count, this_node is_this_node, node::idx_type idx) +node::node(const locator::topology* topology, locator::host_id id, endpoint_dc_rack dc_rack, state state, shard_id shard_count, this_node is_this_node, node::idx_type idx) : _topology(topology) , _host_id(id) - , _endpoint(endpoint) , _dc_rack(std::move(dc_rack)) , _state(state) , _shard_count(std::move(shard_count)) @@ -66,12 +65,12 @@ node::node(const locator::topology* topology, locator::host_id id, inet_address , _idx(idx) {} -node_holder node::make(const locator::topology* topology, locator::host_id id, inet_address endpoint, endpoint_dc_rack dc_rack, state state, shard_id shard_count, node::this_node is_this_node, node::idx_type idx) { - return std::make_unique(topology, std::move(id), std::move(endpoint), std::move(dc_rack), std::move(state), shard_count, is_this_node, idx); +node_holder node::make(const locator::topology* topology, locator::host_id id, endpoint_dc_rack dc_rack, state state, shard_id shard_count, node::this_node is_this_node, node::idx_type idx) { + return std::make_unique(topology, std::move(id), std::move(dc_rack), std::move(state), shard_count, is_this_node, idx); } node_holder node::clone() const { - return make(nullptr, host_id(), endpoint(), dc_rack(), get_state(), get_shard_count(), is_this_node()); + return make(nullptr, host_id(), dc_rack(), get_state(), get_shard_count(), is_this_node()); } std::string node::to_string(node::state s) { @@ -95,7 +94,6 @@ future<> topology::clear_gently() noexcept { _datacenters.clear(); _dc_rack_nodes.clear(); _dc_nodes.clear(); - _nodes_by_endpoint.clear(); _nodes_by_host_id.clear(); co_await utils::clear_gently(_nodes); } @@ -116,7 +114,7 @@ topology::topology(config cfg) { tlogger.trace("topology[{}]: constructing using config: endpoint={} id={} dc={} rack={}", fmt::ptr(this), cfg.this_endpoint, cfg.this_host_id, cfg.local_dc_rack.dc, cfg.local_dc_rack.rack); - add_node(cfg.this_host_id, cfg.this_endpoint, cfg.local_dc_rack, node::state::none); + add_node(cfg.this_host_id, cfg.local_dc_rack, node::state::none); } topology::topology(topology&& o) noexcept @@ -125,7 +123,6 @@ topology::topology(topology&& o) noexcept , _this_node(std::exchange(o._this_node, nullptr)) , _nodes(std::move(o._nodes)) , _nodes_by_host_id(std::move(o._nodes_by_host_id)) - , _nodes_by_endpoint(std::move(o._nodes_by_endpoint)) , _dc_nodes(std::move(o._dc_nodes)) , _dc_rack_nodes(std::move(o._dc_rack_nodes)) , _dc_endpoints(std::move(o._dc_endpoints)) @@ -171,7 +168,7 @@ void topology::set_host_id_cfg(host_id this_host_id) { tlogger.trace("topology[{}]: set host id to {}", fmt::ptr(this), this_host_id); _cfg.this_host_id = this_host_id; - add_or_update_endpoint(this_host_id, _cfg.this_endpoint); + add_or_update_endpoint(this_host_id); } future topology::clone_gently() const { @@ -188,21 +185,15 @@ future topology::clone_gently() const { co_return ret; } -const node& topology::add_node(host_id id, const inet_address& ep, const endpoint_dc_rack& dr, node::state state, shard_id shard_count) { +const node& topology::add_node(host_id id, const endpoint_dc_rack& dr, node::state state, shard_id shard_count) { if (dr.dc.empty() || dr.rack.empty()) { on_internal_error(tlogger, "Node must have valid dc and rack"); } - return add_node(node::make(this, id, ep, dr, state, shard_count)); + return add_node(node::make(this, id, dr, state, shard_count)); } bool topology::is_configured_this_node(const node& n) const { - if (_cfg.this_host_id && n.host_id()) { // Selection by host_id - return _cfg.this_host_id == n.host_id(); - } - if (_cfg.this_endpoint != inet_address()) { // Selection by endpoint - return _cfg.this_endpoint == n.endpoint(); - } - return false; // No selection; + return _cfg.this_host_id == n.host_id(); } const node& topology::add_node(node_holder nptr) { @@ -245,10 +236,9 @@ const node& topology::add_node(node_holder nptr) { return *node; } -void topology::update_node(node& node, std::optional opt_id, std::optional opt_ep, std::optional opt_dr, std::optional opt_st, std::optional opt_shard_count) { - tlogger.debug("topology[{}]: update_node: {}: to: host_id={} endpoint={} dc={} rack={} state={} shard_count={}, at {}", fmt::ptr(this), node_printer(&node), +void topology::update_node(node& node, std::optional opt_id, std::optional opt_dr, std::optional opt_st, std::optional opt_shard_count) { + tlogger.debug("topology[{}]: update_node: {}: to: host_id={} dc={} rack={} state={} shard_count={}, at {}", fmt::ptr(this), node_printer(&node), opt_id ? format("{}", *opt_id) : "unchanged", - opt_ep ? format("{}", *opt_ep) : "unchanged", opt_dr ? format("{}", opt_dr->dc) : "unchanged", opt_dr ? format("{}", opt_dr->rack) : "unchanged", opt_st ? format("{}", *opt_st) : "unchanged", @@ -272,16 +262,6 @@ void topology::update_node(node& node, std::optional opt_id, std::optio opt_id.reset(); } } - if (opt_ep) { - if (*opt_ep != node.endpoint()) { - if (*opt_ep == inet_address{}) { - on_internal_error(tlogger, seastar::format("Updating node endpoint to null is disallowed: {}: new endpoint={}", node_printer(&node), *opt_ep)); - } - changed = true; - } else { - opt_ep.reset(); - } - } if (opt_dr) { if (opt_dr->dc.empty() || opt_dr->dc == production_snitch_base::default_dc) { opt_dr->dc = node.dc_rack().dc; @@ -312,9 +292,6 @@ void topology::update_node(node& node, std::optional opt_id, std::optio if (opt_id) { node._host_id = *opt_id; } - if (opt_ep) { - node._endpoint = *opt_ep; - } if (opt_dr) { node._dc_rack = std::move(*opt_dr); } @@ -360,32 +337,6 @@ void topology::index_node(const node& node) { if (!inserted_host_id) { on_internal_error(tlogger, seastar::format("topology[{}]: {}: node already exists", fmt::ptr(this), node_printer(&node))); } - if (node.endpoint() != inet_address{}) { - auto eit = _nodes_by_endpoint.find(node.endpoint()); - if (eit != _nodes_by_endpoint.end()) { - if (eit->second.get().get_state() == node::state::none && eit->second.get().is_this_node()) { - // eit->second is default entry created for local node and it is replaced by existing node with the same ip - // it means this node is going to replace the existing node with the same ip, but it does not know it yet - // map ip to the old node - _nodes_by_endpoint.erase(node.endpoint()); - } else if (eit->second.get().get_state() == node::state::replacing && node.get_state() == node::state::being_replaced) { - // replace-with-same-ip, map ip to the old node - _nodes_by_endpoint.erase(node.endpoint()); - } else if (eit->second.get().get_state() == node::state::being_replaced && node.get_state() == node::state::replacing) { - // replace-with-same-ip, map ip to the old node, do nothing if it's already the case - } else if (eit->second.get().is_leaving() || eit->second.get().left()) { - _nodes_by_endpoint.erase(node.endpoint()); - } else if (!node.is_leaving() && !node.left()) { - if (node.host_id()) { - _nodes_by_host_id.erase(node.host_id()); - } - on_internal_error(tlogger, seastar::format("topology[{}]: {}: node endpoint already mapped to {}", fmt::ptr(this), node_printer(&node), node_printer(&eit->second.get()))); - } - } - if (!node.left() && !node.is_none()) { - _nodes_by_endpoint.try_emplace(node.endpoint(), std::cref(node)); - } - } // We keep location of left nodes because they may still appear in tablet replica sets // and algorithms expect to know which dc they belonged to. View replica pairing needs stable @@ -442,10 +393,6 @@ void topology::unindex_node(const node& node) { if (host_it != _nodes_by_host_id.end() && host_it->second == node) { _nodes_by_host_id.erase(host_it); } - auto ep_it = _nodes_by_endpoint.find(node.endpoint()); - if (ep_it != _nodes_by_endpoint.end() && ep_it->second.get() == node) { - _nodes_by_endpoint.erase(ep_it); - } if (_this_node == &node) { _this_node = nullptr; } @@ -484,16 +431,6 @@ node* topology::find_node(host_id id) noexcept { return make_mutable(const_cast(this)->find_node(id)); } -// Finds a node by its endpoint -// Returns nullptr if not found -const node* topology::find_node(const inet_address& ep) const noexcept { - auto it = _nodes_by_endpoint.find(ep); - if (it != _nodes_by_endpoint.end()) { - return &it->second.get(); - } - return nullptr; -} - // Finds a node by its index // Returns nullptr if not found const node* topology::find_node(node::idx_type idx) const noexcept { @@ -503,23 +440,19 @@ const node* topology::find_node(node::idx_type idx) const noexcept { return _nodes.at(idx).get(); } -const node& topology::add_or_update_endpoint(host_id id, std::optional opt_ep, std::optional opt_dr, std::optional opt_st, std::optional shard_count) +const node& topology::add_or_update_endpoint(host_id id, std::optional opt_dr, std::optional opt_st, std::optional shard_count) { - tlogger.trace("topology[{}]: add_or_update_endpoint: host_id={} ep={} dc={} rack={} state={} shards={}, at {}", fmt::ptr(this), - id, opt_ep, opt_dr.value_or(endpoint_dc_rack{}).dc, opt_dr.value_or(endpoint_dc_rack{}).rack, opt_st.value_or(node::state::none), shard_count, + tlogger.trace("topology[{}]: add_or_update_endpoint: host_id={} dc={} rack={} state={} shards={}, at {}", fmt::ptr(this), + id, opt_dr.value_or(endpoint_dc_rack{}).dc, opt_dr.value_or(endpoint_dc_rack{}).rack, opt_st.value_or(node::state::none), shard_count, lazy_backtrace()); auto* n = find_node(id); if (n) { - update_node(*n, std::nullopt, opt_ep, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); - return *n; - } else if (opt_ep && (n = make_mutable(find_node(*opt_ep)))) { - update_node(*n, id, std::nullopt, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); + update_node(*n, std::nullopt, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); return *n; } return add_node(id, - opt_ep.value_or(inet_address{}), opt_dr.value_or(endpoint_dc_rack::default_location), opt_st.value_or(node::state::none), shard_count.value_or(0)); diff --git a/locator/topology.hh b/locator/topology.hh index 27aaa69926..d1c141b506 100644 --- a/locator/topology.hh +++ b/locator/topology.hh @@ -59,7 +59,6 @@ public: private: const locator::topology* _topology; locator::host_id _host_id; - inet_address _endpoint; endpoint_dc_rack _dc_rack; state _state; shard_id _shard_count = 0; @@ -72,7 +71,6 @@ private: public: node(const locator::topology* topology, locator::host_id id, - inet_address endpoint, endpoint_dc_rack dc_rack, state state, shard_id shard_count = 0, @@ -94,10 +92,6 @@ public: return _host_id; } - const inet_address& endpoint() const noexcept { - return _endpoint; - } - const endpoint_dc_rack& dc_rack() const noexcept { return _dc_rack; } @@ -164,7 +158,6 @@ public: private: static node_holder make(const locator::topology* topology, locator::host_id id, - inet_address endpoint, endpoint_dc_rack dc_rack, state state, shard_id shard_count = 0, @@ -211,7 +204,7 @@ public: } // Adds a node with given host_id, endpoint, and DC/rack. - const node& add_node(host_id id, const inet_address& ep, const endpoint_dc_rack& dr, node::state state, + const node& add_node(host_id id, const endpoint_dc_rack& dr, node::state state, shard_id shard_count = 0); // Optionally updates node's current host_id, endpoint, or DC/rack. @@ -219,7 +212,6 @@ public: // or a peer node host_id may be updated when the node is replaced with another node using the same ip address. void update_node(node& node, std::optional opt_id, - std::optional opt_ep, std::optional opt_dr, std::optional opt_st, std::optional opt_shard_count = std::nullopt); @@ -241,10 +233,6 @@ public: return *n; }; - // Looks up a node by its inet_address. - // Returns a pointer to the node if found, or nullptr otherwise. - const node* find_node(const inet_address& ep) const noexcept; - // Finds a node by its index // Returns a pointer to the node if found, or nullptr otherwise. const node* find_node(node::idx_type idx) const noexcept; @@ -257,8 +245,7 @@ public: * * Adds or updates a node with given endpoint */ - const node& add_or_update_endpoint(host_id id, std::optional opt_ep, - std::optional opt_dr = std::nullopt, + const node& add_or_update_endpoint(host_id id, std::optional opt_dr = std::nullopt, std::optional opt_st = std::nullopt, std::optional shard_count = std::nullopt); @@ -411,7 +398,6 @@ private: const node* _this_node = nullptr; std::vector _nodes; std::unordered_map> _nodes_by_host_id; - std::unordered_map> _nodes_by_endpoint; std::unordered_map>> _dc_nodes; std::unordered_map>>> _dc_rack_nodes; @@ -434,10 +420,6 @@ private: void calculate_datacenters(); - const std::unordered_map>& get_nodes_by_endpoint() const noexcept { - return _nodes_by_endpoint; - }; - mutable random_engine_type _random_engine; friend class token_metadata_impl; @@ -491,12 +473,11 @@ struct fmt::formatter : fmt::formatter { template auto format(const locator::node& node, FormatContext& ctx) const { if (!verbose) { - return fmt::format_to(ctx.out(), "{}/{}", node.host_id(), node.endpoint()); + return fmt::format_to(ctx.out(), "{}", node.host_id()); } else { - return fmt::format_to(ctx.out(), " idx={} host_id={} endpoint={} dc={} rack={} state={} shards={} this_node={}", + return fmt::format_to(ctx.out(), " idx={} host_id={} dc={} rack={} state={} shards={} this_node={}", node.idx(), node.host_id(), - node.endpoint(), node.dc_rack().dc, node.dc_rack().rack, locator::node::to_string(node.get_state()), diff --git a/main.cc b/main.cc index 36c2694f73..4e6b682151 100644 --- a/main.cc +++ b/main.cc @@ -1465,12 +1465,11 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl const auto listen_address = utils::resolve(cfg->listen_address, family).get(); const auto host_id = initialize_local_info_thread(sys_ks, snitch, listen_address, *cfg, broadcast_addr, broadcast_rpc_addr); - shared_token_metadata::mutate_on_all_shards(token_metadata, [host_id, endpoint = broadcast_addr] (locator::token_metadata& tm) { + shared_token_metadata::mutate_on_all_shards(token_metadata, [host_id] (locator::token_metadata& tm) { // Makes local host id available in topology cfg as soon as possible. // Raft topology discard the endpoint-to-id map, so the local id can // still be found in the config. tm.get_topology().set_host_id_cfg(host_id); - tm.get_topology().add_or_update_endpoint(host_id, endpoint); return make_ready_future<>(); }).get(); diff --git a/test/boost/locator_topology_test.cc b/test/boost/locator_topology_test.cc index ccb321a11d..f42dd74356 100644 --- a/test/boost/locator_topology_test.cc +++ b/test/boost/locator_topology_test.cc @@ -32,9 +32,7 @@ SEASTAR_THREAD_TEST_CASE(test_add_node) { auto id1 = host_id::create_random_id(); auto ep1 = gms::inet_address("127.0.0.1"); auto id2 = host_id::create_random_id(); - auto ep2 = gms::inet_address("127.0.0.2"); auto id3 = host_id::create_random_id(); - auto ep3 = gms::inet_address("127.0.0.3"); topology::config cfg = { .this_endpoint = ep1, @@ -51,16 +49,13 @@ SEASTAR_THREAD_TEST_CASE(test_add_node) { std::unordered_set> nodes; - nodes.insert(std::cref(topo.add_node(id2, ep2, endpoint_dc_rack::default_location, node::state::normal))); - nodes.insert(std::cref(topo.add_or_update_endpoint(id1, ep1, endpoint_dc_rack::default_location, node::state::normal))); + nodes.insert(std::cref(topo.add_node(id2, endpoint_dc_rack::default_location, node::state::normal))); + nodes.insert(std::cref(topo.add_or_update_endpoint(id1, endpoint_dc_rack::default_location, node::state::normal))); - BOOST_REQUIRE_THROW(topo.add_node(id1, ep2, endpoint_dc_rack::default_location, node::state::normal), std::runtime_error); - BOOST_REQUIRE_THROW(topo.add_node(id2, ep1, endpoint_dc_rack::default_location, node::state::normal), std::runtime_error); - BOOST_REQUIRE_THROW(topo.add_node(id2, ep2, endpoint_dc_rack::default_location, node::state::normal), std::runtime_error); - BOOST_REQUIRE_THROW(topo.add_node(id2, ep3, endpoint_dc_rack::default_location, node::state::normal), std::runtime_error); - BOOST_REQUIRE_THROW(topo.add_node(id3, ep3, endpoint_dc_rack{}, node::state::normal), std::runtime_error); + BOOST_REQUIRE_THROW(topo.add_node(id2, endpoint_dc_rack::default_location, node::state::normal), std::runtime_error); + BOOST_REQUIRE_THROW(topo.add_node(id3, endpoint_dc_rack{}, node::state::normal), std::runtime_error); - nodes.insert(std::cref(topo.add_node(id3, ep3, endpoint_dc_rack::default_location, node::state::normal))); + nodes.insert(std::cref(topo.add_node(id3, endpoint_dc_rack::default_location, node::state::normal))); topo.for_each_node([&] (const locator::node& node) { BOOST_REQUIRE(nodes.erase(std::cref(node))); @@ -82,7 +77,7 @@ SEASTAR_THREAD_TEST_CASE(test_moving) { auto topo = topology(cfg); - topo.add_or_update_endpoint(id1, ep1, endpoint_dc_rack::default_location, node::state::normal); + topo.add_or_update_endpoint(id1, endpoint_dc_rack::default_location, node::state::normal); BOOST_REQUIRE(topo.this_node()->topology() == &topo); @@ -101,8 +96,6 @@ SEASTAR_THREAD_TEST_CASE(test_update_node) { auto id1 = host_id::create_random_id(); auto ep1 = gms::inet_address("127.0.0.1"); auto id2 = host_id::create_random_id(); - auto ep2 = gms::inet_address("127.0.0.2"); - auto ep3 = gms::inet_address("127.0.0.3"); topology::config cfg = { .this_endpoint = ep1, @@ -117,93 +110,42 @@ SEASTAR_THREAD_TEST_CASE(test_update_node) { set_abort_on_internal_error(true); }); - topo.add_or_update_endpoint(id1, std::nullopt, endpoint_dc_rack::default_location, node::state::normal); + topo.add_or_update_endpoint(id1, endpoint_dc_rack::default_location, node::state::normal); auto node = const_cast(topo.this_node()); - topo.update_node(*node, std::nullopt, ep1, std::nullopt, std::nullopt); + topo.update_node(*node, std::nullopt, std::nullopt, std::nullopt); BOOST_REQUIRE_EQUAL(topo.find_node(id1), node); - BOOST_REQUIRE_THROW(topo.update_node(*node, host_id::create_null_id(), std::nullopt, std::nullopt, std::nullopt), std::runtime_error); - BOOST_REQUIRE_THROW(topo.update_node(*node, id2, std::nullopt, std::nullopt, std::nullopt), std::runtime_error); + BOOST_REQUIRE_THROW(topo.update_node(*node, host_id::create_null_id(), std::nullopt, std::nullopt), std::runtime_error); + BOOST_REQUIRE_THROW(topo.update_node(*node, id2, std::nullopt, std::nullopt), std::runtime_error); BOOST_REQUIRE_EQUAL(topo.find_node(id1), node); BOOST_REQUIRE_EQUAL(topo.find_node(id2), nullptr); - topo.update_node(*node, std::nullopt, ep2, std::nullopt, std::nullopt); - BOOST_REQUIRE_EQUAL(topo.find_node(ep1), nullptr); - BOOST_REQUIRE_EQUAL(topo.find_node(ep2), node); - auto dc_rack1 = endpoint_dc_rack{"DC1", "RACK1"}; - topo.update_node(*node, std::nullopt, std::nullopt, dc_rack1, std::nullopt); + topo.update_node(*node, std::nullopt, dc_rack1, std::nullopt); BOOST_REQUIRE(topo.get_location(id1) == dc_rack1); auto dc_rack2 = endpoint_dc_rack{"DC2", "RACK2"}; - topo.update_node(*node, std::nullopt, std::nullopt, dc_rack2, std::nullopt); + topo.update_node(*node, std::nullopt, dc_rack2, std::nullopt); BOOST_REQUIRE(topo.get_location(id1) == dc_rack2); BOOST_REQUIRE_NE(node->get_state(), locator::node::state::being_decommissioned); - topo.update_node(*node, std::nullopt, std::nullopt, std::nullopt, locator::node::state::being_decommissioned); + topo.update_node(*node, std::nullopt, std::nullopt, locator::node::state::being_decommissioned); BOOST_REQUIRE_EQUAL(node->get_state(), locator::node::state::being_decommissioned); auto dc_rack3 = endpoint_dc_rack{"DC3", "RACK3"}; // Note: engage state option, but keep node::state value the same // to reproduce #13502 - topo.update_node(*node, std::nullopt, ep3, dc_rack3, locator::node::state::being_decommissioned); + topo.update_node(*node, std::nullopt, dc_rack3, locator::node::state::being_decommissioned); BOOST_REQUIRE_EQUAL(topo.find_node(id1), node); - BOOST_REQUIRE_EQUAL(topo.find_node(ep1), nullptr); - BOOST_REQUIRE_EQUAL(topo.find_node(ep2), nullptr); - BOOST_REQUIRE_EQUAL(topo.find_node(ep3), node); BOOST_REQUIRE(topo.get_location(id1) == dc_rack3); BOOST_REQUIRE_EQUAL(node->get_state(), locator::node::state::being_decommissioned); - - // In state::left the node will remain indexed only by its host_id - topo.update_node(*node, std::nullopt, std::nullopt, std::nullopt, locator::node::state::left); - BOOST_REQUIRE_EQUAL(topo.find_node(id1), node); - BOOST_REQUIRE_EQUAL(topo.find_node(ep1), nullptr); - BOOST_REQUIRE_EQUAL(topo.find_node(ep2), nullptr); - BOOST_REQUIRE_EQUAL(topo.find_node(ep3), nullptr); - BOOST_REQUIRE(topo.get_location(id1) == dc_rack3); - BOOST_REQUIRE_EQUAL(node->get_state(), locator::node::state::left); -} - -SEASTAR_THREAD_TEST_CASE(test_add_or_update_by_host_id) { - auto id1 = host_id::create_random_id(); - auto id2 = host_id::create_random_id(); - auto ep1 = gms::inet_address("127.0.0.1"); - - // In this test we check that add_or_update_endpoint searches by host_id first. - // We create two nodes, one matches by id, another - by ip, - // and SCYLLA_ASSERT that add_or_update_endpoint updates the first. - // We need to make the second node 'being_decommissioned', so that - // it gets removed from ip index and we don't get the non-unique IP error. - - auto topo = topology({ - .this_host_id = id1, - .local_dc_rack = endpoint_dc_rack::default_location, - }); - - topo.add_or_update_endpoint(id1, gms::inet_address{}, endpoint_dc_rack::default_location, node::state::normal); - topo.add_node(id2, ep1, endpoint_dc_rack::default_location, node::state::being_decommissioned); - - topo.add_or_update_endpoint(id1, ep1, std::nullopt, node::state::bootstrapping); - - auto* n = topo.find_node(id1); - BOOST_REQUIRE_EQUAL(n->get_state(), node::state::bootstrapping); - BOOST_REQUIRE_EQUAL(n->host_id(), id1); - BOOST_REQUIRE_EQUAL(n->endpoint(), ep1); - - auto* n2 = topo.find_node(ep1); - BOOST_REQUIRE_EQUAL(n, n2); - - auto* n3 = topo.find_node(id2); - BOOST_REQUIRE_EQUAL(n3->get_state(), node::state::being_decommissioned); - BOOST_REQUIRE_EQUAL(n3->host_id(), id2); - BOOST_REQUIRE_EQUAL(n3->endpoint(), ep1); } SEASTAR_THREAD_TEST_CASE(test_remove_endpoint) { @@ -214,7 +156,6 @@ SEASTAR_THREAD_TEST_CASE(test_remove_endpoint) { const auto id1 = host_id::create_random_id(); const auto ep1 = gms::inet_address("127.0.0.1"); const auto id2 = host_id::create_random_id(); - const auto ep2 = gms::inet_address("127.0.0.2"); const auto dc_rack1 = endpoint_dc_rack { .dc = "dc1", .rack = "rack1" @@ -232,8 +173,8 @@ SEASTAR_THREAD_TEST_CASE(test_remove_endpoint) { auto topo = topology(cfg); - topo.add_or_update_endpoint(id1, ep1, dc_rack1, node::state::normal); - topo.add_node(id2, ep2, dc_rack2, node::state::normal); + topo.add_or_update_endpoint(id1, dc_rack1, node::state::normal); + topo.add_node(id2, dc_rack2, node::state::normal); BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{{"dc1", {id1, id2}}})); BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{{"dc1", {{"rack1", {id1}}, {"rack2", {id2}}}}})); @@ -374,9 +315,7 @@ SEASTAR_THREAD_TEST_CASE(test_left_node_is_kept_outside_dc) { auto id1 = host_id::create_random_id(); auto ep1 = gms::inet_address("127.0.0.1"); auto id2 = host_id::create_random_id(); - auto ep2 = gms::inet_address("127.0.0.2"); auto id3 = host_id::create_random_id(); - auto ep3 = gms::inet_address("127.0.0.3"); const auto dc_rack1 = endpoint_dc_rack { .dc = "dc1", @@ -397,8 +336,8 @@ SEASTAR_THREAD_TEST_CASE(test_left_node_is_kept_outside_dc) { std::unordered_set> nodes; - nodes.insert(std::cref(topo.add_node(id2, ep2, dc_rack1, node::state::normal))); - nodes.insert(std::cref(topo.add_node(id3, ep3, dc_rack1, node::state::left))); + nodes.insert(std::cref(topo.add_node(id2, dc_rack1, node::state::normal))); + nodes.insert(std::cref(topo.add_node(id3, dc_rack1, node::state::left))); topo.for_each_node([&] (const locator::node& node) { BOOST_REQUIRE(node.host_id() != id3); diff --git a/test/boost/network_topology_strategy_test.cc b/test/boost/network_topology_strategy_test.cc index a4369457c6..a3a7333d9c 100644 --- a/test/boost/network_topology_strategy_test.cc +++ b/test/boost/network_topology_strategy_test.cc @@ -304,7 +304,7 @@ void simple_test() { for (const auto& [ring_point, endpoint, id] : ring_points) { std::unordered_set tokens; tokens.insert(token{tests::d2t(ring_point / ring_points.size())}); - topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal); + topo.add_node(id, make_endpoint_dc_rack(endpoint), locator::node::state::normal); co_await tm.update_normal_tokens(std::move(tokens), id); } }).get(); @@ -412,7 +412,7 @@ void heavy_origin_test() { stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { auto& topo = tm.get_topology(); for (const auto& [ring_point, endpoint, id] : ring_points) { - topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal); + topo.add_node(id, make_endpoint_dc_rack(endpoint), locator::node::state::normal); co_await tm.update_normal_tokens(tokens[endpoint], id); } }).get(); @@ -484,7 +484,7 @@ SEASTAR_THREAD_TEST_CASE(NetworkTopologyStrategy_tablets_test) { for (const auto& [ring_point, endpoint, id] : ring_points) { std::unordered_set tokens; tokens.insert(token{tests::d2t(ring_point / ring_points.size())}); - topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal, shard_count); + topo.add_node(id, make_endpoint_dc_rack(endpoint), locator::node::state::normal, shard_count); tm.update_host_id(id, endpoint); co_await tm.update_normal_tokens(std::move(tokens), id); } @@ -576,7 +576,7 @@ static void test_random_balancing(sharded& snitch, gms::inet_address for (const auto& [ring_point, endpoint, id] : ring_points) { std::unordered_set tokens; tokens.insert(token{tests::d2t(ring_point / ring_points.size())}); - topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal, shard_count); + topo.add_node(id, make_endpoint_dc_rack(endpoint), locator::node::state::normal, shard_count); tm.update_host_id(id, endpoint); co_await tm.update_normal_tokens(std::move(tokens), id); } @@ -865,12 +865,11 @@ void generate_topology(topology& topo, const std::unordered_map out = std::fill_n(out, rf, std::cref(dc)); } - unsigned i = 0; for (auto& node : nodes) { const sstring& dc = dcs[udist(0, dcs.size() - 1)(e1)]; auto rc = racks_per_dc.at(dc); auto r = udist(0, rc)(e1); - topo.add_or_update_endpoint(node, inet_address((127u << 24) | ++i), endpoint_dc_rack{dc, to_sstring(r)}, locator::node::state::normal); + topo.add_or_update_endpoint(node, endpoint_dc_rack{dc, to_sstring(r)}, locator::node::state::normal); } } @@ -1145,21 +1144,15 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) { const node* n1 = stm.get()->get_topology().find_node(host1); BOOST_REQUIRE(n1); - n1 = stm.get()->get_topology().find_node(ip1); - BOOST_REQUIRE(n1); BOOST_REQUIRE(bool(n1->is_this_node())); BOOST_REQUIRE_EQUAL(n1->host_id(), host1); - BOOST_REQUIRE_EQUAL(n1->endpoint(), ip1); BOOST_REQUIRE(n1->dc_rack() == ip1_dc_rack); BOOST_REQUIRE(stm.get()->get_topology().get_location() == ip1_dc_rack); const node* n2 = stm.get()->get_topology().find_node(host2); BOOST_REQUIRE(n2); - n2 = stm.get()->get_topology().find_node(ip2); - BOOST_REQUIRE(n2); BOOST_REQUIRE(!bool(n2->is_this_node())); BOOST_REQUIRE_EQUAL(n2->host_id(), host2); - BOOST_REQUIRE_EQUAL(n2->endpoint(), ip2); BOOST_REQUIRE(n2->dc_rack() == endpoint_dc_rack::default_location); // Local node cannot be removed @@ -1172,8 +1165,6 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) { n1 = stm.get()->get_topology().find_node(host1); BOOST_REQUIRE(n1); - n1 = stm.get()->get_topology().find_node(ip1); - BOOST_REQUIRE(n1); // Removing node with no local node @@ -1184,22 +1175,19 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) { n2 = stm.get()->get_topology().find_node(host2); BOOST_REQUIRE(!n2); - n2 = stm.get()->get_topology().find_node(ip2); - BOOST_REQUIRE(!n2); // Repopulate after clear_gently() stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { co_await tm.clear_gently(); - tm.update_host_id(host2, ip2); - tm.update_host_id(host1, ip1); // this_node added last on purpose + tm.update_topology(host2, std::nullopt, std::nullopt); + tm.update_topology(host1, std::nullopt, std::nullopt); // this_node added last on purpose }).get(); n1 = stm.get()->get_topology().find_node(host1); BOOST_REQUIRE(n1); BOOST_REQUIRE(bool(n1->is_this_node())); BOOST_REQUIRE_EQUAL(n1->host_id(), host1); - BOOST_REQUIRE_EQUAL(n1->endpoint(), ip1); BOOST_REQUIRE(n1->dc_rack() == ip1_dc_rack); BOOST_REQUIRE(stm.get()->get_topology().get_location() == ip1_dc_rack); @@ -1207,21 +1195,19 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) { BOOST_REQUIRE(n2); BOOST_REQUIRE(!bool(n2->is_this_node())); BOOST_REQUIRE_EQUAL(n2->host_id(), host2); - BOOST_REQUIRE_EQUAL(n2->endpoint(), ip2); BOOST_REQUIRE(n2->dc_rack() == endpoint_dc_rack::default_location); // get_location() should pick up endpoint_dc_rack from node info stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { co_await tm.clear_gently(); - tm.get_topology().add_or_update_endpoint(host1, ip1, ip1_dc_rack_v2, node::state::being_decommissioned); + tm.get_topology().add_or_update_endpoint(host1, ip1_dc_rack_v2, node::state::being_decommissioned); }).get(); n1 = stm.get()->get_topology().find_node(host1); BOOST_REQUIRE(n1); BOOST_REQUIRE(bool(n1->is_this_node())); BOOST_REQUIRE_EQUAL(n1->host_id(), host1); - BOOST_REQUIRE_EQUAL(n1->endpoint(), ip1); BOOST_REQUIRE(n1->dc_rack() == ip1_dc_rack_v2); BOOST_REQUIRE(stm.get()->get_topology().get_location() == ip1_dc_rack_v2); } diff --git a/test/boost/tablets_test.cc b/test/boost/tablets_test.cc index 3e4c241810..1e8b62f8b5 100644 --- a/test/boost/tablets_test.cc +++ b/test/boost/tablets_test.cc @@ -1020,7 +1020,7 @@ SEASTAR_TEST_CASE(test_sharder) { auto table1 = table_id(utils::UUID_gen::get_time_UUID()); token_metadata tokm(token_metadata::config{ .topo_cfg{ .this_host_id = h1, .local_dc_rack = locator::endpoint_dc_rack::default_location } }); - tokm.get_topology().add_or_update_endpoint(h1, tokm.get_topology().my_address()); + tokm.get_topology().add_or_update_endpoint(h1); std::vector tablet_ids; { @@ -1235,7 +1235,7 @@ SEASTAR_TEST_CASE(test_intranode_sharding) { auto table1 = table_id(utils::UUID_gen::get_time_UUID()); token_metadata tokm(token_metadata::config{ .topo_cfg{ .this_host_id = h1, .local_dc_rack = locator::endpoint_dc_rack::default_location } }); - tokm.get_topology().add_or_update_endpoint(h1, tokm.get_topology().my_address()); + tokm.get_topology().add_or_update_endpoint(h1); auto leaving_replica = tablet_replica{h1, 5}; auto pending_replica = tablet_replica{h1, 7}; @@ -3340,7 +3340,7 @@ static void execute_tablet_for_new_rf_test(calculate_tablet_replicas_for_new_rf_ for (const auto& [ring_point, endpoint, id] : test_config.ring_points) { std::unordered_set tokens; tokens.insert(dht::token{tests::d2t(ring_point / test_config.ring_points.size())}); - topo.add_or_update_endpoint(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal, 1); + topo.add_or_update_endpoint(id, make_endpoint_dc_rack(endpoint), locator::node::state::normal, 1); tm.update_host_id(id, endpoint); co_await tm.update_normal_tokens(std::move(tokens), id); } diff --git a/test/lib/cql_test_env.cc b/test/lib/cql_test_env.cc index 82f5971340..07674fcfc4 100644 --- a/test/lib/cql_test_env.cc +++ b/test/lib/cql_test_env.cc @@ -698,11 +698,10 @@ private: host_id = linfo.host_id; _sys_ks.local().save_local_info(std::move(linfo), _snitch.local()->get_location(), my_address, my_address).get(); } - locator::shared_token_metadata::mutate_on_all_shards(_token_metadata, [hostid = host_id, &cfg_in] (locator::token_metadata& tm) { + locator::shared_token_metadata::mutate_on_all_shards(_token_metadata, [hostid = host_id] (locator::token_metadata& tm) { auto& topo = tm.get_topology(); topo.set_host_id_cfg(hostid); topo.add_or_update_endpoint(hostid, - cfg_in.broadcast_address, std::nullopt, locator::node::state::normal, smp::count); diff --git a/test/perf/perf_sort_by_proximity.cc b/test/perf/perf_sort_by_proximity.cc index 4d86bcea1d..438bf19ebd 100644 --- a/test/perf/perf_sort_by_proximity.cc +++ b/test/perf/perf_sort_by_proximity.cc @@ -53,7 +53,6 @@ struct sort_by_proximity_topology { auto id = locator::host_id{utils::UUID(0, i)}; nodes[dc][rack].emplace_back(id); topology.add_or_update_endpoint(id, - gms::inet_address((127u << 24) | i), locator::endpoint_dc_rack{format("dc{}", dc), format("rack{}", rack)}, locator::node::state::normal); } From 1e4b2f25dc7dca2f4f769db6cd8bbf91bb767d96 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 7 Jan 2025 15:50:20 +0200 Subject: [PATCH 56/57] locator: token_metadata: drop update_host_id() function that does nothing now --- locator/token_metadata.cc | 5 -- locator/token_metadata.hh | 9 --- service/storage_service.cc | 26 ++------ test/boost/locator_topology_test.cc | 3 - test/boost/network_topology_strategy_test.cc | 6 -- test/boost/tablets_test.cc | 62 +------------------- test/boost/token_metadata_test.cc | 37 ------------ test/perf/tablet_load_balancing.cc | 1 - 8 files changed, 9 insertions(+), 140 deletions(-) diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index 519a664754..e48c740db3 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -949,11 +949,6 @@ token_metadata::debug_show() const { _impl->debug_show(); } -void -token_metadata::update_host_id(const host_id& host_id, inet_address endpoint) { - // Do nothing for now. Remove later. -} - std::unordered_set token_metadata::get_host_ids() const { return _impl->get_host_ids(); diff --git a/locator/token_metadata.hh b/locator/token_metadata.hh index f2c27fcb14..988734eef2 100644 --- a/locator/token_metadata.hh +++ b/locator/token_metadata.hh @@ -227,15 +227,6 @@ public: const topology& get_topology() const; void debug_show() const; - /** - * Store an end-point to host ID mapping. Each ID must be unique, and - * cannot be changed after the fact. - * - * @param hostId - * @param endpoint - */ - void update_host_id(const locator::host_id& host_id, inet_address endpoint); - /** Return the unique host ID for an end-point. */ host_id get_host_id(inet_address endpoint) const; diff --git a/service/storage_service.cc b/service/storage_service.cc index c65fb5f265..fb65b690a4 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -420,12 +420,9 @@ future storage_service::sync_raft_t const auto& am =_address_map; const auto& t = _topology_state_machine._topology; - auto update_topology = [&] (locator::host_id id, std::optional ip, const replica_state& rs) { + auto update_topology = [&] (locator::host_id id, const replica_state& rs) { tmptr->update_topology(id, locator::endpoint_dc_rack{rs.datacenter, rs.rack}, to_topology_node_state(rs.state), rs.shard_count); - if (ip) { - tmptr->update_host_id(id, *ip); - } }; using host_id_to_ip_map_t = std::unordered_map; @@ -466,7 +463,7 @@ future storage_service::sync_raft_t } if (t.left_nodes_rs.find(id) != t.left_nodes_rs.end()) { - update_topology(host_id, std::nullopt, t.left_nodes_rs.at(id)); + update_topology(host_id, t.left_nodes_rs.at(id)); } // However if we do that, we need to also implement unbanning a node and do it if `removenode` is aborted. @@ -524,7 +521,7 @@ future storage_service::sync_raft_t co_await remove_ip(it->second, host_id, false); } } - update_topology(host_id, ip, rs); + update_topology(host_id, rs); co_await tmptr->update_normal_tokens(rs.ring.value().tokens, host_id); }; @@ -551,7 +548,7 @@ future storage_service::sync_raft_t // Save ip -> id mapping in peers table because we need it on restart, but do not save tokens until owned sys_ks_futures.push_back(_sys_ks.local().update_peer_info(*ip, host_id, {})); } - update_topology(host_id, ip, rs); + update_topology(host_id, rs); if (_topology_state_machine._topology.normal_nodes.empty()) { // This is the first node in the cluster. Insert the tokens as normal to the token ring early // so we can perform writes to regular 'distributed' tables during the bootstrap procedure @@ -579,7 +576,7 @@ future storage_service::sync_raft_t co_await process_normal_node(id, rs); break; } - update_topology(host_id, ip, rs); + update_topology(host_id, rs); co_await tmptr->update_normal_tokens(rs.ring.value().tokens, host_id); tmptr->add_leaving_endpoint(host_id); co_await update_topology_change_info(tmptr, ::format("{} {}/{}", rs.state, id, ip)); @@ -592,7 +589,7 @@ future storage_service::sync_raft_t tmptr->update_topology(replaced_host_id, std::nullopt, locator::node::state::being_replaced); tmptr->add_replacing_endpoint(replaced_host_id, host_id); if (rs.ring.has_value()) { - update_topology(host_id, ip, rs); + update_topology(host_id, rs); co_await update_topology_change_info(tmptr, ::format("replacing {}/{} by {}/{}", replaced_id, existing_ip.value_or(gms::inet_address{}), id, ip)); } } @@ -1551,7 +1548,6 @@ future<> storage_service::join_topology(sharded tmptr->update_topology(tmptr->get_my_id(), std::nullopt, locator::node::state::replacing); tmptr->update_topology(ri->host_id, std::move(ri->dc_rack), locator::node::state::being_replaced); co_await tmptr->update_normal_tokens(bootstrap_tokens, ri->host_id); - tmptr->update_host_id(ri->host_id, *replace_address); tmptr->add_replacing_endpoint(ri->host_id, tmptr->get_my_id()); replaced_host_id = ri->host_id; @@ -1561,7 +1557,6 @@ future<> storage_service::join_topology(sharded // therefore we need to "inject" their state here after we // learn about them in the shadow round initiated in `prepare_replacement_info`. for (const auto& [host_id, st] : ri->ignore_nodes) { - tmptr->update_host_id(host_id, st.endpoint); if (st.opt_dc_rack) { tmptr->update_topology(host_id, st.opt_dc_rack, locator::node::state::normal); } @@ -2314,7 +2309,6 @@ future<> storage_service::handle_state_bootstrap(inet_address endpoint, gms::per } tmptr->update_topology(host_id, get_dc_rack_for(host_id), locator::node::state::bootstrapping); tmptr->add_bootstrap_tokens(tokens, host_id); - tmptr->update_host_id(host_id, endpoint); co_await update_topology_change_info(tmptr, ::format("handle_state_bootstrap {}", endpoint)); co_await replicate_to_all_cores(std::move(tmptr)); @@ -2382,8 +2376,6 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit slogger.warn("Host ID collision for {} between {} and {}; {} is the new owner", host_id, *existing, endpoint, endpoint); do_remove_node(*existing); - slogger.info("Set host_id={} to be owned by node={}, existing={}", host_id, endpoint, *existing); - tmptr->update_host_id(host_id, endpoint); } else { // The new IP has smaller generation than the existing one, // we are going to remove it, so we add it to the endpoints_to_remove. @@ -2434,8 +2426,6 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit _normal_state_handled_on_boot.insert(endpoint); co_return; } - slogger.info("Set host_id={} to be owned by node={}", host_id, endpoint); - tmptr->update_host_id(host_id, endpoint); } // Tokens owned by the handled endpoint. @@ -2643,7 +2633,6 @@ future<> storage_service::on_alive(gms::inet_address endpoint, gms::endpoint_sta auto tmlock = co_await get_token_metadata_lock(); auto tmptr = co_await get_mutable_token_metadata_ptr(); const auto dc_rack = get_dc_rack_for(host_id); - tmptr->update_host_id(host_id, endpoint); tmptr->update_topology(host_id, dc_rack); co_await replicate_to_all_cores(std::move(tmptr)); } @@ -3047,7 +3036,6 @@ future<> storage_service::join_cluster(sharded& slogger.debug("Loaded tokens: endpoint={}/{} dc={} rack={} tokens={}", host_id, st.endpoint, dc_rack.dc, dc_rack.rack, st.tokens); tmptr->update_topology(host_id, dc_rack, locator::node::state::normal); co_await tmptr->update_normal_tokens(st.tokens, host_id); - tmptr->update_host_id(host_id, st.endpoint); // gossiping hasn't started yet // so no need to lock the endpoint co_await _gossiper.add_saved_endpoint(host_id, st, gms::null_permit_id); @@ -4567,7 +4555,6 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad // In handle_state_normal we'll remap the IP to the new host_id. tmptr->update_topology(existing_node_id, std::nullopt, locator::node::state::being_replaced); tmptr->update_topology(replacing_node_id, get_dc_rack_for(replacing_node_id), locator::node::state::replacing); - tmptr->update_host_id(replacing_node_id, replacing_node); tmptr->add_replacing_endpoint(existing_node_id, replacing_node_id); return make_ready_future<>(); }).get(); @@ -4631,7 +4618,6 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad const auto dc_rack = get_dc_rack_for(host_id); slogger.info("bootstrap[{}]: Added node={}/{} as bootstrap, coordinator={}/{}", req.ops_uuid, endpoint, host_id, coordinator, *coordinator_host_id); - tmptr->update_host_id(host_id, endpoint); tmptr->update_topology(host_id, dc_rack, locator::node::state::bootstrapping); tmptr->add_bootstrap_tokens(tokens, host_id); } diff --git a/test/boost/locator_topology_test.cc b/test/boost/locator_topology_test.cc index f42dd74356..1256a33a49 100644 --- a/test/boost/locator_topology_test.cc +++ b/test/boost/locator_topology_test.cc @@ -215,9 +215,6 @@ SEASTAR_THREAD_TEST_CASE(test_load_sketch) { }); stm.mutate_token_metadata([&] (token_metadata& tm) { - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::normal, node1_shard_count); tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, node2_shard_count); tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::normal, node3_shard_count); diff --git a/test/boost/network_topology_strategy_test.cc b/test/boost/network_topology_strategy_test.cc index a3a7333d9c..312f79a954 100644 --- a/test/boost/network_topology_strategy_test.cc +++ b/test/boost/network_topology_strategy_test.cc @@ -485,7 +485,6 @@ SEASTAR_THREAD_TEST_CASE(NetworkTopologyStrategy_tablets_test) { std::unordered_set tokens; tokens.insert(token{tests::d2t(ring_point / ring_points.size())}); topo.add_node(id, make_endpoint_dc_rack(endpoint), locator::node::state::normal, shard_count); - tm.update_host_id(id, endpoint); co_await tm.update_normal_tokens(std::move(tokens), id); } }).get(); @@ -577,7 +576,6 @@ static void test_random_balancing(sharded& snitch, gms::inet_address std::unordered_set tokens; tokens.insert(token{tests::d2t(ring_point / ring_points.size())}); topo.add_node(id, make_endpoint_dc_rack(endpoint), locator::node::state::normal, shard_count); - tm.update_host_id(id, endpoint); co_await tm.update_normal_tokens(std::move(tokens), id); } }).get(); @@ -1115,7 +1113,6 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) { auto host1 = host_id(utils::make_random_uuid()); auto host2 = host_id(utils::make_random_uuid()); - auto host3 = host_id(utils::make_random_uuid()); auto ip1_dc_rack = endpoint_dc_rack{ "dc1", "rack_ip1" }; auto ip1_dc_rack_v2 = endpoint_dc_rack{ "dc1", "rack_ip1_v2" }; @@ -1134,8 +1131,6 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) { BOOST_REQUIRE(stm.get()->get_topology().get_location() == ip1_dc_rack); stm.mutate_token_metadata([&] (token_metadata& tm) { - tm.update_host_id(host2, ip2); - tm.update_host_id(host1, ip1); // this_node added last on purpose // Need to move to non left or none state in order to be indexed by ip tm.update_topology(host1, {}, locator::node::state::normal); tm.update_topology(host2, {}, locator::node::state::normal); @@ -1159,7 +1154,6 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) { stm.mutate_token_metadata([&] (token_metadata& tm) { tm.remove_endpoint(host1); - tm.update_host_id(host3, ip3); return make_ready_future<>(); }).get(); diff --git a/test/boost/tablets_test.cc b/test/boost/tablets_test.cc index 1e8b62f8b5..13bd5a52d9 100644 --- a/test/boost/tablets_test.cc +++ b/test/boost/tablets_test.cc @@ -773,9 +773,6 @@ SEASTAR_TEST_CASE(test_get_shard) { tablet_id tid1(0); stm.mutate_token_metadata([&] (token_metadata& tm) { - tm.update_host_id(h1, ip1); - tm.update_host_id(h2, ip2); - tm.update_host_id(h3, ip3); tm.update_topology(h1, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); tm.update_topology(h2, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); tm.update_topology(h3, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); @@ -1515,9 +1512,6 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_empty_node) { }); stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); @@ -1616,9 +1610,6 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_skiplist) { }); stm.mutate_token_metadata([&] (token_metadata& tm) { - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); @@ -1707,9 +1698,6 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rf_met) { stm.mutate_token_metadata([&](token_metadata& tm) -> future<> { const unsigned shard_count = 2; - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::being_decommissioned, @@ -1807,10 +1795,6 @@ SEASTAR_THREAD_TEST_CASE(test_table_creation_during_decommission) { const unsigned shard_count = 1; stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); - tm.update_host_id(host4, ip4); tm.update_topology(host1, dcrack, node::state::normal, shard_count); tm.update_topology(host2, dcrack, node::state::normal, shard_count); tm.update_topology(host3, dcrack, node::state::being_decommissioned, shard_count); @@ -1877,10 +1861,6 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_two_racks) { stm.mutate_token_metadata([&](token_metadata& tm) -> future<> { const unsigned shard_count = 1; - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); - tm.update_host_id(host4, ip4); tm.update_topology(host1, racks[0], node::state::normal, shard_count); tm.update_topology(host2, racks[1], node::state::normal, shard_count); tm.update_topology(host3, racks[0], node::state::normal, shard_count); @@ -1984,10 +1964,6 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rack_load_failure) { stm.mutate_token_metadata([&](token_metadata& tm) -> future<> { const unsigned shard_count = 1; - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); - tm.update_host_id(host4, ip4); tm.update_topology(host1, racks[0], node::state::normal, shard_count); tm.update_topology(host2, racks[0], node::state::normal, shard_count); tm.update_topology(host3, racks[0], node::state::normal, shard_count); @@ -2063,9 +2039,6 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rf_not_met) { stm.mutate_token_metadata([&](token_metadata& tm) -> future<> { const unsigned shard_count = 2; - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::being_decommissioned, @@ -2122,9 +2095,6 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_works_with_in_progress_transitions) }); stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::normal, 1); tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, 1); tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::normal, 2); @@ -2196,9 +2166,6 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancer_shuffle_mode) { }); stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::normal, 1); tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, 1); tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::normal, 2); @@ -2263,10 +2230,6 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_two_empty_nodes) { }); stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); - tm.update_host_id(host4, ip4); tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); @@ -2328,9 +2291,6 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_asymmetric_node_capacity) { }); stm.mutate_token_metadata([&](token_metadata& tm) -> future<> { - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::being_decommissioned, 8); tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, 1); tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::normal, 7); @@ -2395,8 +2355,6 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancer_disabling) { // host1 is loaded and host2 is empty, resulting in an imbalance. // host1's shard 0 is loaded and shard 1 is empty, resulting in intra-node imbalance. stm.mutate_token_metadata([&] (auto& tm) -> future<> { - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); co_await tm.update_normal_tokens(std::unordered_set{token(tests::d2t(1. / 2))}, host1); @@ -2487,8 +2445,6 @@ SEASTAR_THREAD_TEST_CASE(test_drained_node_is_not_balanced_internally) { }); stm.mutate_token_metadata([&] (locator::token_metadata& tm) -> future<> { - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); tm.update_topology(host1, locator::endpoint_dc_rack::default_location, locator::node::state::being_removed, shard_count); tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); co_await tm.update_normal_tokens(std::unordered_set{token(tests::d2t(1. / 2))}, host1); @@ -2537,7 +2493,6 @@ SEASTAR_THREAD_TEST_CASE(test_plan_fails_when_removing_last_replica) { }); stm.mutate_token_metadata([&] (locator::token_metadata& tm) -> future<> { - tm.update_host_id(host1, ip1); tm.update_topology(host1, locator::endpoint_dc_rack::default_location, locator::node::state::being_removed, shard_count); co_await tm.update_normal_tokens(std::unordered_set{token(tests::d2t(1. / 2))}, host1); @@ -2589,9 +2544,6 @@ SEASTAR_THREAD_TEST_CASE(test_skiplist_is_ignored_when_draining) { }); stm.mutate_token_metadata([&] (locator::token_metadata& tm) -> future<> { - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); tm.update_topology(host1, locator::endpoint_dc_rack::default_location, locator::node::state::being_removed, shard_count); tm.update_topology(host2, locator::endpoint_dc_rack::default_location, locator::node::state::normal, shard_count); tm.update_topology(host3, locator::endpoint_dc_rack::default_location, locator::node::state::normal, shard_count); @@ -2699,10 +2651,8 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_random_load) { int i = 0; for (auto h : hosts) { - auto ip = inet_address(format("192.168.0.{}", ++i)); auto shard_count = 2; - tm.update_host_id(h, ip); - auto rack = racks[i % racks.size()]; + auto rack = racks[++i % racks.size()]; tm.update_topology(h, rack, node::state::normal, shard_count); if (h != hosts[0]) { // Leave the first host empty by making it invisible to allocation algorithm. @@ -2871,13 +2821,11 @@ static void do_test_load_balancing_merge_colocation(cql_test_env& e, const int n int i = 0; for (auto h : hosts) { - auto ip = inet_address(format("192.168.0.{}", ++i)); - tm.update_host_id(h, ip); - auto rack = racks[i % racks.size()]; + auto rack = racks[++i % racks.size()]; hosts_by_rack[rack.rack].push_back(h); tm.update_topology(h, rack, node::state::normal, shard_count); co_await tm.update_normal_tokens(std::unordered_set{token(tests::d2t(float(i) / hosts.size()))}, h); - testlog.debug("adding host {}, ip {}, rack {}, token {}", h, ip, rack.rack, token(tests::d2t(1. / hosts.size()))); + testlog.debug("adding host {}, rack {}, token {}", h, rack.rack, token(tests::d2t(1. / hosts.size()))); } tablet_map tmap(initial_tablets); @@ -3056,8 +3004,6 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_resize_requests) { }); stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count); co_await tm.update_normal_tokens(std::unordered_set{token(tests::d2t(1. / 2))}, host1); @@ -3341,7 +3287,6 @@ static void execute_tablet_for_new_rf_test(calculate_tablet_replicas_for_new_rf_ std::unordered_set tokens; tokens.insert(dht::token{tests::d2t(ring_point / test_config.ring_points.size())}); topo.add_or_update_endpoint(id, make_endpoint_dc_rack(endpoint), locator::node::state::normal, 1); - tm.update_host_id(id, endpoint); co_await tm.update_normal_tokens(std::move(tokens), id); } }).get(); @@ -3362,7 +3307,6 @@ static void execute_tablet_for_new_rf_test(calculate_tablet_replicas_for_new_rf_ stm.mutate_token_metadata([&] (token_metadata& tm) { for (size_t i = 0; i < test_config.ring_points.size(); ++i) { auto& [ring_point, endpoint, id] = test_config.ring_points[i]; - tm.update_host_id(id, endpoint); tm.update_topology(id, make_endpoint_dc_rack(endpoint), node::state::normal, nodes_shard_count[i]); } return make_ready_future<>(); diff --git a/test/boost/token_metadata_test.cc b/test/boost/token_metadata_test.cc index 96aa2f9b0f..ba92fe80e8 100644 --- a/test/boost/token_metadata_test.cc +++ b/test/boost/token_metadata_test.cc @@ -50,16 +50,12 @@ namespace { } SEASTAR_THREAD_TEST_CASE(test_pending_and_read_endpoints_for_everywhere_strategy) { - const auto e1 = inet_address("192.168.0.1"); - const auto e2 = inet_address("192.168.0.2"); const auto e1_id = gen_id(1); const auto e2_id = gen_id(2); const auto t1 = dht::token::from_int64(10); const auto t2 = dht::token::from_int64(20); auto token_metadata = create_token_metadata(e1_id); - token_metadata->update_host_id(e1_id, e1); - token_metadata->update_host_id(e2_id, e2); token_metadata->update_topology(e1_id, get_dc_rack(e1_id), node::state::normal); token_metadata->update_topology(e2_id, get_dc_rack(e2_id), node::state::normal); token_metadata->update_normal_tokens({t1}, e1_id).get(); @@ -74,16 +70,12 @@ SEASTAR_THREAD_TEST_CASE(test_pending_and_read_endpoints_for_everywhere_strategy } SEASTAR_THREAD_TEST_CASE(test_pending_endpoints_for_bootstrap_second_node) { - const auto e1 = inet_address("192.168.0.1"); const auto t1 = dht::token::from_int64(1); - const auto e2 = inet_address("192.168.0.2"); const auto t2 = dht::token::from_int64(100); const auto e1_id = gen_id(1); const auto e2_id = gen_id(2); auto token_metadata = create_token_metadata(e1_id); - token_metadata->update_host_id(e1_id, e1); - token_metadata->update_host_id(e2_id, e2); token_metadata->update_topology(e1_id, get_dc_rack(e1_id), node::state::normal); token_metadata->update_topology(e2_id, get_dc_rack(e2_id), node::state::normal); token_metadata->update_normal_tokens({t1}, e1_id).get(); @@ -107,17 +99,11 @@ SEASTAR_THREAD_TEST_CASE(test_pending_endpoints_for_bootstrap_with_replicas) { const auto t10 = dht::token::from_int64(10); const auto t100 = dht::token::from_int64(100); const auto t1000 = dht::token::from_int64(1000); - const auto e1 = inet_address("192.168.0.1"); - const auto e2 = inet_address("192.168.0.2"); - const auto e3 = inet_address("192.168.0.3"); const auto e1_id = gen_id(1); const auto e2_id = gen_id(2); const auto e3_id = gen_id(3); auto token_metadata = create_token_metadata(e1_id); - token_metadata->update_host_id(e1_id, e1); - token_metadata->update_host_id(e2_id, e2); - token_metadata->update_host_id(e3_id, e3); token_metadata->update_topology(e1_id, get_dc_rack(e1_id), node::state::normal); token_metadata->update_topology(e2_id, get_dc_rack(e2_id), node::state::normal); token_metadata->update_topology(e3_id, get_dc_rack(e3_id), node::state::normal); @@ -143,17 +129,11 @@ SEASTAR_THREAD_TEST_CASE(test_pending_endpoints_for_leave_with_replicas) { const auto t10 = dht::token::from_int64(10); const auto t100 = dht::token::from_int64(100); const auto t1000 = dht::token::from_int64(1000); - const auto e1 = inet_address("192.168.0.1"); - const auto e2 = inet_address("192.168.0.2"); - const auto e3 = inet_address("192.168.0.3"); const auto e1_id = gen_id(1); const auto e2_id = gen_id(2); const auto e3_id = gen_id(3); auto token_metadata = create_token_metadata(e1_id); - token_metadata->update_host_id(e1_id, e1); - token_metadata->update_host_id(e2_id, e2); - token_metadata->update_host_id(e3_id, e3); token_metadata->update_topology(e1_id, get_dc_rack(e1_id), node::state::normal); token_metadata->update_topology(e2_id, get_dc_rack(e2_id), node::state::normal); token_metadata->update_topology(e3_id, get_dc_rack(e3_id), node::state::normal); @@ -180,20 +160,12 @@ SEASTAR_THREAD_TEST_CASE(test_pending_endpoints_for_replace_with_replicas) { const auto t10 = dht::token::from_int64(10); const auto t100 = dht::token::from_int64(100); const auto t1000 = dht::token::from_int64(1000); - const auto e1 = inet_address("192.168.0.1"); - const auto e2 = inet_address("192.168.0.2"); - const auto e3 = inet_address("192.168.0.3"); - const auto e4 = inet_address("192.168.0.4"); const auto e1_id = gen_id(1); const auto e2_id = gen_id(2); const auto e3_id = gen_id(3); const auto e4_id = gen_id(4); auto token_metadata = create_token_metadata(e1_id); - token_metadata->update_host_id(e1_id, e1); - token_metadata->update_host_id(e2_id, e2); - token_metadata->update_host_id(e3_id, e3); - token_metadata->update_host_id(e4_id, e4); token_metadata->update_topology(e1_id, get_dc_rack(e1_id), node::state::normal); token_metadata->update_topology(e2_id, get_dc_rack(e2_id), node::state::normal); token_metadata->update_topology(e3_id, get_dc_rack(e3_id), node::state::normal); @@ -225,17 +197,11 @@ SEASTAR_THREAD_TEST_CASE(test_endpoints_for_reading_when_bootstrap_with_replicas const auto t10 = dht::token::from_int64(10); const auto t100 = dht::token::from_int64(100); const auto t1000 = dht::token::from_int64(1000); - const auto e1 = inet_address("192.168.0.1"); - const auto e2 = inet_address("192.168.0.2"); - const auto e3 = inet_address("192.168.0.3"); const auto e1_id = gen_id(1); const auto e2_id = gen_id(2); const auto e3_id = gen_id(3); auto token_metadata = create_token_metadata(e1_id); - token_metadata->update_host_id(e1_id, e1); - token_metadata->update_host_id(e2_id, e2); - token_metadata->update_host_id(e3_id, e3); token_metadata->update_topology(e1_id, get_dc_rack(e1_id), node::state::normal); token_metadata->update_topology(e2_id, get_dc_rack(e2_id), node::state::normal); token_metadata->update_topology(e3_id, get_dc_rack(e3_id), node::state::normal); @@ -285,17 +251,14 @@ SEASTAR_THREAD_TEST_CASE(test_endpoints_for_reading_when_bootstrap_with_replicas SEASTAR_THREAD_TEST_CASE(test_replace_node_with_same_endpoint) { const auto t1 = dht::token::from_int64(1); - const auto e1 = inet_address("192.168.0.1"); const auto e1_id1 = gen_id(1); const auto e1_id2 = gen_id(2); auto token_metadata = create_token_metadata(e1_id2); - token_metadata->update_host_id(e1_id1, e1); token_metadata->update_topology(e1_id1, get_dc_rack(e1_id1), node::state::being_replaced); token_metadata->update_normal_tokens({t1}, e1_id1).get(); token_metadata->update_topology(e1_id2, get_dc_rack(e1_id2), node::state::replacing); - token_metadata->update_host_id(e1_id2, e1); token_metadata->add_replacing_endpoint(e1_id1, e1_id2); diff --git a/test/perf/tablet_load_balancing.cc b/test/perf/tablet_load_balancing.cc index 9a9e5178cb..c0625b0f31 100644 --- a/test/perf/tablet_load_balancing.cc +++ b/test/perf/tablet_load_balancing.cc @@ -243,7 +243,6 @@ future test_load_balancing_with_many_tables(params p, bool tablet_aware }; auto add_host_to_topology = [&] (token_metadata& tm, int i) -> future<> { - tm.update_host_id(hosts[i], ips[i]); tm.update_topology(hosts[i], rack1, node::state::normal, shard_count); co_await tm.update_normal_tokens(std::unordered_set{token(tests::d2t(float(i) / hosts.size()))}, hosts[i]); }; From a40e8104426969d4a9e543015b8237ac97ddfbd0 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 8 Jan 2025 11:37:26 +0200 Subject: [PATCH 57/57] hint manager: do not translate ip to id in case hint manager is stopped already Since we do not stop storage proxy on shutdown this code can be called during shutdown when address map is no longer usable. --- db/hints/manager.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/db/hints/manager.cc b/db/hints/manager.cc index 1f996d193f..77ca698b2a 100644 --- a/db/hints/manager.cc +++ b/db/hints/manager.cc @@ -433,9 +433,8 @@ bool manager::have_ep_manager(const std::variant fm, tracing::trace_state_ptr tr_state) noexcept { - auto ip = _gossiper_anchor->get_address_map().get(host_id); if (utils::get_local_injector().enter("reject_incoming_hints")) { - manager_logger.debug("Rejecting a hint to {} / {} due to an error injection", host_id, ip); + manager_logger.debug("Rejecting a hint to {} due to an error injection", host_id); ++_stats.dropped; return false; } @@ -446,6 +445,8 @@ bool manager::store_hint(endpoint_id host_id, schema_ptr s, lw_shared_ptrget_address_map().get(host_id); + try { manager_logger.trace("Going to store a hint to {}", host_id); tracing::trace(tr_state, "Going to store a hint to {}", host_id);