Merge 'drop ip addresses from token metadata' from Gleb

Now that all topology related code uses host ids there is not point to
maintain ip to id (and back) mappings in the token metadata. After the
patch the mapping will be maintained in the gossiper only. The rest of
the system will use host ids and in rare cases where translation is
needed (mostly for UX compatibility reasons) the translation will be
done using gossiper.

Fixes: scylladb/scylla#21777

* 'gleb/drop-ip-from-tm-v3' of github.com:scylladb/scylla-dev: (57 commits)
  hint manager: do not translate ip to id in case hint manager is stopped already
  locator: token_metadata: drop update_host_id() function that does nothing now
  locator: topology: drop indexing by ips
  repair: drop unneeded code
  storage_service: use host_id to look for a node in on_alive handler
  storage_proxy: translate ips to ids in forward array using gossiper
  locator: topology: remove unused functions
  storage_service: check for outdated ip in on_change notification in the peers table
  storage_proxy: translate id to ip using address map in tablets's describe_ring code instead of taking one from the topology
  topology coordinator: change connection dropping code to work on host ids
  cql3: report host id instead of ip in error during SELECT FROM MUTATION_FRAGMENTS query
  locator: drop unused function from tablet_effective_replication_map
  api: view_build_statuses: do not use IP from the topology, but translate id to ip using address map instead
  locator: token_metadata: remove unused ip based functions
  locator: network_topology_strategy: use host_id based function to check number of endpoints in dcs
  gossiper: drop get_unreachable_token_owners functions
  storage_service: use gossiper to map ip to id in node_ops operations
  storage_service: fix indentation after the last patch
  storage_service: drop loops from node ops replace_prepare handling since there can be only one replacing node
  token_metadata: drop no longer used functions
  ...
This commit is contained in:
Kamil Braun
2025-01-17 11:00:52 +01:00
57 changed files with 534 additions and 1085 deletions

View File

@@ -217,7 +217,7 @@ protected:
// If the DC does not exist, we return an empty list - not an error.
sstring query_dc = req->get_query_param("dc");
sstring local_dc = query_dc.empty() ? topology.get_datacenter() : query_dc;
std::unordered_set<gms::inet_address> local_dc_nodes;
std::unordered_set<locator::host_id> local_dc_nodes;
const auto& endpoints = topology.get_datacenter_endpoints();
auto dc_it = endpoints.find(local_dc);
if (dc_it != endpoints.end()) {
@@ -227,7 +227,8 @@ protected:
// DC, unless a single rack is selected by the "rack" query option.
// If the rack does not exist, we return an empty list - not an error.
sstring query_rack = req->get_query_param("rack");
for (auto& ip : local_dc_nodes) {
for (auto& id : local_dc_nodes) {
auto ip = _gossiper.get_address_map().get(id);
if (!query_rack.empty()) {
auto rack = _gossiper.get_application_state_value(ip, gms::application_state::RACK);
if (rack != query_rack) {

View File

@@ -153,8 +153,8 @@ future<> unset_server_sstables_loader(http_context& ctx) {
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_sstables_loader(ctx, r); });
}
future<> set_server_view_builder(http_context& ctx, sharded<db::view::view_builder>& vb) {
return ctx.http_server.set_routes([&ctx, &vb] (routes& r) { set_view_builder(ctx, r, vb); });
future<> set_server_view_builder(http_context& ctx, sharded<db::view::view_builder>& vb, sharded<gms::gossiper>& g) {
return ctx.http_server.set_routes([&ctx, &vb, &g] (routes& r) { set_view_builder(ctx, r, vb, g); });
}
future<> unset_server_view_builder(http_context& ctx) {
@@ -188,8 +188,8 @@ future<> unset_server_snapshot(http_context& ctx) {
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_snapshot(ctx, r); });
}
future<> set_server_token_metadata(http_context& ctx, sharded<locator::shared_token_metadata>& tm) {
return ctx.http_server.set_routes([&ctx, &tm] (routes& r) { set_token_metadata(ctx, r, tm); });
future<> set_server_token_metadata(http_context& ctx, sharded<locator::shared_token_metadata>& tm, sharded<gms::gossiper>& g) {
return ctx.http_server.set_routes([&ctx, &tm, &g] (routes& r) { set_token_metadata(ctx, r, tm, g); });
}
future<> unset_server_token_metadata(http_context& ctx) {
@@ -273,10 +273,10 @@ future<> unset_server_cache(http_context& ctx) {
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_cache_service(ctx, r); });
}
future<> set_hinted_handoff(http_context& ctx, sharded<service::storage_proxy>& proxy) {
future<> set_hinted_handoff(http_context& ctx, sharded<service::storage_proxy>& proxy, sharded<gms::gossiper>& g) {
return register_api(ctx, "hinted_handoff",
"The hinted handoff API", [&proxy] (http_context& ctx, routes& r) {
set_hinted_handoff(ctx, r, proxy);
"The hinted handoff API", [&proxy, &g] (http_context& ctx, routes& r) {
set_hinted_handoff(ctx, r, proxy, g);
});
}

View File

@@ -100,7 +100,7 @@ future<> set_server_storage_service(http_context& ctx, sharded<service::storage_
future<> unset_server_storage_service(http_context& ctx);
future<> set_server_sstables_loader(http_context& ctx, sharded<sstables_loader>& sst_loader);
future<> unset_server_sstables_loader(http_context& ctx);
future<> set_server_view_builder(http_context& ctx, sharded<db::view::view_builder>& vb);
future<> set_server_view_builder(http_context& ctx, sharded<db::view::view_builder>& vb, sharded<gms::gossiper>& g);
future<> unset_server_view_builder(http_context& ctx);
future<> set_server_repair(http_context& ctx, sharded<repair_service>& repair, sharded<gms::gossip_address_map>& am);
future<> unset_server_repair(http_context& ctx);
@@ -112,7 +112,7 @@ future<> set_server_authorization_cache(http_context& ctx, sharded<auth::service
future<> unset_server_authorization_cache(http_context& ctx);
future<> set_server_snapshot(http_context& ctx, sharded<db::snapshot_ctl>& snap_ctl);
future<> unset_server_snapshot(http_context& ctx);
future<> set_server_token_metadata(http_context& ctx, sharded<locator::shared_token_metadata>& tm);
future<> set_server_token_metadata(http_context& ctx, sharded<locator::shared_token_metadata>& tm, sharded<gms::gossiper>& g);
future<> unset_server_token_metadata(http_context& ctx);
future<> set_server_gossip(http_context& ctx, sharded<gms::gossiper>& g);
future<> unset_server_gossip(http_context& ctx);
@@ -124,7 +124,7 @@ future<> set_server_storage_proxy(http_context& ctx, sharded<service::storage_pr
future<> unset_server_storage_proxy(http_context& ctx);
future<> set_server_stream_manager(http_context& ctx, sharded<streaming::stream_manager>& sm);
future<> unset_server_stream_manager(http_context& ctx);
future<> set_hinted_handoff(http_context& ctx, sharded<service::storage_proxy>& p);
future<> set_hinted_handoff(http_context& ctx, sharded<service::storage_proxy>& p, sharded<gms::gossiper>& g);
future<> unset_hinted_handoff(http_context& ctx);
future<> set_server_cache(http_context& ctx);
future<> unset_server_cache(http_context& ctx);

View File

@@ -14,6 +14,7 @@
#include "gms/inet_address.hh"
#include "service/storage_proxy.hh"
#include "gms/gossiper.hh"
namespace api {
@@ -21,18 +22,18 @@ using namespace json;
using namespace seastar::httpd;
namespace hh = httpd::hinted_handoff_json;
void set_hinted_handoff(http_context& ctx, routes& r, sharded<service::storage_proxy>& proxy) {
hh::create_hints_sync_point.set(r, [&proxy] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
auto parse_hosts_list = [] (sstring arg) {
void set_hinted_handoff(http_context& ctx, routes& r, sharded<service::storage_proxy>& proxy, sharded<gms::gossiper>& g) {
hh::create_hints_sync_point.set(r, [&proxy, &g] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
auto parse_hosts_list = [&g] (sstring arg) {
std::vector<sstring> hosts_str = split(arg, ",");
std::vector<gms::inet_address> hosts;
std::vector<locator::host_id> hosts;
hosts.reserve(hosts_str.size());
for (const auto& host_str : hosts_str) {
try {
gms::inet_address host;
host = gms::inet_address(host_str);
hosts.push_back(host);
hosts.push_back(g.local().get_host_id(host));
} catch (std::exception& e) {
throw httpd::bad_param_exception(format("Failed to parse host address {}: {}", host_str, e.what()));
}
@@ -41,7 +42,7 @@ void set_hinted_handoff(http_context& ctx, routes& r, sharded<service::storage_p
return hosts;
};
std::vector<gms::inet_address> target_hosts = parse_hosts_list(req->get_query_param("target_hosts"));
std::vector<locator::host_id> target_hosts = parse_hosts_list(req->get_query_param("target_hosts"));
return proxy.local().create_hint_sync_point(std::move(target_hosts)).then([] (db::hints::sync_point sync_point) {
return json::json_return_type(sync_point.encode());
});

View File

@@ -10,12 +10,13 @@
#include <seastar/core/sharded.hh>
#include "api/api_init.hh"
#include "gms/gossiper.hh"
namespace service { class storage_proxy; }
namespace api {
void set_hinted_handoff(http_context& ctx, httpd::routes& r, sharded<service::storage_proxy>& p);
void set_hinted_handoff(http_context& ctx, httpd::routes& r, sharded<service::storage_proxy>& p, sharded<gms::gossiper>& g);
void unset_hinted_handoff(http_context& ctx, httpd::routes& r);
}

View File

@@ -114,7 +114,7 @@ void set_messaging_service(http_context& ctx, routes& r, sharded<netw::messaging
}));
get_version.set(r, [&ms](const_req req) {
return ms.local().get_raw_version(gms::inet_address(req.get_query_param("addr")));
return ms.local().current_version;
});
get_dropped_messages_by_ver.set(r, [&ms](std::unique_ptr<request> req) {

View File

@@ -533,11 +533,11 @@ void unset_sstables_loader(http_context& ctx, routes& r) {
ss::start_restore.unset(r);
}
void set_view_builder(http_context& ctx, routes& r, sharded<db::view::view_builder>& vb) {
ss::view_build_statuses.set(r, [&ctx, &vb] (std::unique_ptr<http::request> req) {
void set_view_builder(http_context& ctx, routes& r, sharded<db::view::view_builder>& vb, sharded<gms::gossiper>& g) {
ss::view_build_statuses.set(r, [&ctx, &vb, &g] (std::unique_ptr<http::request> req) {
auto keyspace = validate_keyspace(ctx, req);
auto view = req->get_path_param("view");
return vb.local().view_build_statuses(std::move(keyspace), std::move(view)).then([] (std::unordered_map<sstring, sstring> status) {
return vb.local().view_build_statuses(std::move(keyspace), std::move(view), g.local()).then([] (std::unordered_map<sstring, sstring> status) {
std::vector<storage_service_json::mapper> res;
return make_ready_future<json::json_return_type>(map_to_key_value(std::move(status), res));
});

View File

@@ -75,7 +75,7 @@ void set_storage_service(http_context& ctx, httpd::routes& r, sharded<service::s
void unset_storage_service(http_context& ctx, httpd::routes& r);
void set_sstables_loader(http_context& ctx, httpd::routes& r, sharded<sstables_loader>& sst_loader);
void unset_sstables_loader(http_context& ctx, httpd::routes& r);
void set_view_builder(http_context& ctx, httpd::routes& r, sharded<db::view::view_builder>& vb);
void set_view_builder(http_context& ctx, httpd::routes& r, sharded<db::view::view_builder>& vb, sharded<gms::gossiper>& g);
void unset_view_builder(http_context& ctx, httpd::routes& r);
void set_repair(http_context& ctx, httpd::routes& r, sharded<repair_service>& repair, sharded<gms::gossip_address_map>& am);
void unset_repair(http_context& ctx, httpd::routes& r);

View File

@@ -10,6 +10,7 @@
#include "api/api-doc/storage_service.json.hh"
#include "api/api-doc/endpoint_snitch_info.json.hh"
#include "locator/token_metadata.hh"
#include "gms/gossiper.hh"
using namespace seastar::httpd;
@@ -18,7 +19,7 @@ namespace api {
namespace ss = httpd::storage_service_json;
using namespace json;
void set_token_metadata(http_context& ctx, routes& r, sharded<locator::shared_token_metadata>& tm) {
void set_token_metadata(http_context& ctx, routes& r, sharded<locator::shared_token_metadata>& tm, sharded<gms::gossiper>& g) {
ss::local_hostid.set(r, [&tm](std::unique_ptr<http::request> req) {
auto id = tm.local().get()->get_my_id();
if (!bool(id)) {
@@ -33,22 +34,25 @@ void set_token_metadata(http_context& ctx, routes& r, sharded<locator::shared_to
}));
});
ss::get_node_tokens.set(r, [&tm] (std::unique_ptr<http::request> req) {
ss::get_node_tokens.set(r, [&tm, &g] (std::unique_ptr<http::request> req) {
gms::inet_address addr(req->get_path_param("endpoint"));
auto& local_tm = *tm.local().get();
const auto host_id = local_tm.get_host_id_if_known(addr);
std::optional<locator::host_id> host_id;
try {
host_id = g.local().get_host_id(addr);
} catch (...) {}
return make_ready_future<json::json_return_type>(stream_range_as_array(host_id ? local_tm.get_tokens(*host_id): std::vector<dht::token>{}, [](const dht::token& i) {
return fmt::to_string(i);
}));
});
ss::get_leaving_nodes.set(r, [&tm](const_req req) {
ss::get_leaving_nodes.set(r, [&tm, &g](const_req req) {
const auto& local_tm = *tm.local().get();
const auto& leaving_host_ids = local_tm.get_leaving_endpoints();
std::unordered_set<gms::inet_address> eps;
eps.reserve(leaving_host_ids.size());
for (const auto host_id: leaving_host_ids) {
eps.insert(local_tm.get_endpoint_for_host_id(host_id));
eps.insert(g.local().get_address_map().get(host_id));
}
return container_to_vec(eps);
});
@@ -58,20 +62,23 @@ void set_token_metadata(http_context& ctx, routes& r, sharded<locator::shared_to
return container_to_vec(addr);
});
ss::get_joining_nodes.set(r, [&tm](const_req req) {
ss::get_joining_nodes.set(r, [&tm, &g](const_req req) {
const auto& local_tm = *tm.local().get();
const auto& points = local_tm.get_bootstrap_tokens();
std::unordered_set<gms::inet_address> eps;
eps.reserve(points.size());
for (const auto& [token, host_id]: points) {
eps.insert(local_tm.get_endpoint_for_host_id(host_id));
eps.insert(g.local().get_address_map().get(host_id));
}
return container_to_vec(eps);
});
ss::get_host_id_map.set(r, [&tm](const_req req) {
ss::get_host_id_map.set(r, [&tm, &g](const_req req) {
std::vector<ss::mapper> res;
return map_to_key_value(tm.local().get()->get_endpoint_to_host_id_map(), res);
auto map = tm.local().get()->get_host_ids() |
std::views::transform([&g] (locator::host_id id) { return std::make_pair(g.local().get_address_map().get(id), id); }) |
std::ranges::to<std::unordered_map>();
return map_to_key_value(std::move(map), res);
});
static auto host_or_broadcast = [&tm](const_req req) {
@@ -79,26 +86,34 @@ void set_token_metadata(http_context& ctx, routes& r, sharded<locator::shared_to
return host.empty() ? tm.local().get()->get_topology().my_address() : gms::inet_address(host);
};
httpd::endpoint_snitch_info_json::get_datacenter.set(r, [&tm](const_req req) {
httpd::endpoint_snitch_info_json::get_datacenter.set(r, [&tm, &g](const_req req) {
auto& topology = tm.local().get()->get_topology();
auto ep = host_or_broadcast(req);
if (!topology.has_endpoint(ep)) {
std::optional<locator::host_id> host_id;
try {
host_id = g.local().get_host_id(ep);
} catch (...) {}
if (!host_id || !topology.has_node(*host_id)) {
// Cannot return error here, nodetool status can race, request
// info about just-left node and not handle it nicely
return locator::endpoint_dc_rack::default_location.dc;
}
return topology.get_datacenter(ep);
return topology.get_datacenter(*host_id);
});
httpd::endpoint_snitch_info_json::get_rack.set(r, [&tm](const_req req) {
httpd::endpoint_snitch_info_json::get_rack.set(r, [&tm, &g](const_req req) {
auto& topology = tm.local().get()->get_topology();
auto ep = host_or_broadcast(req);
if (!topology.has_endpoint(ep)) {
std::optional<locator::host_id> host_id;
try {
host_id = g.local().get_host_id(ep);
} catch (...) {}
if (!host_id || !topology.has_node(*host_id)) {
// Cannot return error here, nodetool status can race, request
// info about just-left node and not handle it nicely
return locator::endpoint_dc_rack::default_location.rack;
}
return topology.get_rack(ep);
return topology.get_rack(*host_id);
});
}

View File

@@ -15,10 +15,11 @@ class routes;
}
namespace locator { class shared_token_metadata; }
namespace gms { class gossiper; }
namespace api {
struct http_context;
void set_token_metadata(http_context& ctx, seastar::httpd::routes& r, seastar::sharded<locator::shared_token_metadata>& tm);
void set_token_metadata(http_context& ctx, seastar::httpd::routes& r, seastar::sharded<locator::shared_token_metadata>& tm, seastar::sharded<gms::gossiper>& g);
void unset_token_metadata(http_context& ctx, seastar::httpd::routes& r);
}

View File

@@ -402,7 +402,7 @@ future<cdc::generation_id> generation_service::legacy_make_new_generation(const
throw std::runtime_error(
format("Can't find endpoint for token {}", end));
}
const auto ep = tmptr->get_endpoint_for_host_id(*endpoint);
const auto ep = _gossiper.get_address_map().get(*endpoint);
auto sc = get_shard_count(ep, _gossiper);
return {sc > 0 ? sc : 1, get_sharding_ignore_msb(ep, _gossiper)};
}

View File

@@ -1820,7 +1820,7 @@ mutation_fragments_select_statement::do_execute(query_processor& qp, service::qu
throw exceptions::invalid_request_exception(seastar::format(
"Moving between coordinators is not allowed in SELECT FROM MUTATION_FRAGMENTS() statements, last page's coordinator was {}{}",
last_host,
last_node ? fmt::format("({})", last_node->endpoint()) : ""));
last_node ? fmt::format("({})", last_node->host_id()) : ""));
}
}
}

View File

@@ -113,11 +113,10 @@ bool is_datacenter_local(consistency_level l) {
return l == consistency_level::LOCAL_ONE || l == consistency_level::LOCAL_QUORUM;
}
template <typename Range, typename PendingRange = std::array<gms::inet_address, 0>>
std::unordered_map<sstring, dc_node_count> count_per_dc_endpoints(
const locator::effective_replication_map& erm,
const Range& live_endpoints,
const PendingRange& pending_endpoints = std::array<gms::inet_address, 0>()) {
const host_id_vector_replica_set& live_endpoints,
const host_id_vector_topology_change& pending_endpoints = host_id_vector_topology_change()) {
using namespace locator;
auto& rs = erm.get_replication_strategy();
@@ -148,12 +147,11 @@ std::unordered_map<sstring, dc_node_count> count_per_dc_endpoints(
return dc_endpoints;
}
template<typename Range, typename PendingRange>
bool assure_sufficient_live_nodes_each_quorum(
consistency_level cl,
const locator::effective_replication_map& erm,
const Range& live_endpoints,
const PendingRange& pending_endpoints) {
const host_id_vector_replica_set& live_endpoints,
const host_id_vector_topology_change& pending_endpoints) {
using namespace locator;
auto& rs = erm.get_replication_strategy();
@@ -175,12 +173,11 @@ bool assure_sufficient_live_nodes_each_quorum(
return false;
}
template<typename Range, typename PendingRange>
void assure_sufficient_live_nodes(
consistency_level cl,
const locator::effective_replication_map& erm,
const Range& live_endpoints,
const PendingRange& pending_endpoints) {
const host_id_vector_replica_set& live_endpoints,
const host_id_vector_topology_change& pending_endpoints) {
size_t need = block_for(erm, cl);
auto adjust_live_for_error = [] (size_t live, size_t pending) {
@@ -228,10 +225,6 @@ void assure_sufficient_live_nodes(
}
}
template void assure_sufficient_live_nodes(consistency_level, const locator::effective_replication_map&, const inet_address_vector_replica_set&, const std::array<gms::inet_address, 0>&);
template void assure_sufficient_live_nodes(db::consistency_level, const locator::effective_replication_map&, const inet_address_vector_replica_set&, const utils::small_vector<gms::inet_address, 1ul>&);
template void assure_sufficient_live_nodes(db::consistency_level, const locator::effective_replication_map&, const host_id_vector_replica_set&, const host_id_vector_topology_change&);
host_id_vector_replica_set
filter_for_query(consistency_level cl,
const locator::effective_replication_map& erm,

View File

@@ -61,15 +61,9 @@ is_sufficient_live_nodes(consistency_level cl,
const locator::effective_replication_map& erm,
const host_id_vector_replica_set& live_endpoints);
template<typename Range, typename PendingRange = std::array<gms::inet_address, 0>>
void assure_sufficient_live_nodes(
consistency_level cl,
const locator::effective_replication_map& erm,
const Range& live_endpoints,
const PendingRange& pending_endpoints = std::array<gms::inet_address, 0>());
extern template void assure_sufficient_live_nodes(consistency_level, const locator::effective_replication_map&, const inet_address_vector_replica_set&, const std::array<gms::inet_address, 0>&);
extern template void assure_sufficient_live_nodes(db::consistency_level, const locator::effective_replication_map&, const inet_address_vector_replica_set&, const utils::small_vector<gms::inet_address, 1ul>&);
extern template void assure_sufficient_live_nodes(db::consistency_level, const locator::effective_replication_map&, const host_id_vector_replica_set&, const host_id_vector_topology_change&);
const host_id_vector_replica_set& live_endpoints,
const host_id_vector_topology_change& pending_endpoints = host_id_vector_topology_change());
}

View File

@@ -82,33 +82,16 @@ bool hint_sender::can_send() noexcept {
}
const auto tmptr = _shard_manager._proxy.get_token_metadata_ptr();
const auto maybe_ep = std::invoke([&] () noexcept -> std::optional<gms::inet_address> {
try {
return tmptr->get_endpoint_for_host_id_if_known(_ep_key);
} catch (...) {
return std::nullopt;
}
});
try {
// `hint_sender` can never target this node, so if the returned optional is empty,
// that must mean the current locator::token_metadata doesn't store the information
// about the target node.
if (maybe_ep && _gossiper.is_alive(*maybe_ep)) {
_state.remove(state::ep_state_left_the_ring);
return true;
} else {
if (!_state.contains(state::ep_state_left_the_ring)) {
_state.set_if<state::ep_state_left_the_ring>(!tmptr->is_normal_token_owner(_ep_key));
}
// If the node is not part of the ring, we will send hints to all new replicas.
// Note that if the optional -- `maybe_ep` -- is empty, that could mean that `_ep_key`
// is the locator::host_id of THIS node. However, that's impossible because instances
// of `hint_sender` are only created for OTHER nodes, so this logic is correct.
return _state.contains(state::ep_state_left_the_ring);
if (_gossiper.is_alive(_ep_key)) {
_state.remove(state::ep_state_left_the_ring);
return true;
} else {
if (!_state.contains(state::ep_state_left_the_ring)) {
_state.set_if<state::ep_state_left_the_ring>(!tmptr->is_normal_token_owner(_ep_key));
}
} catch (...) {
return false;
// If the node is not part of the ring, we will send hints to all new replicas.
return _state.contains(state::ep_state_left_the_ring);
}
}

View File

@@ -266,21 +266,14 @@ void manager::forbid_hints_for_eps_with_pending_hints() {
}
}
sync_point::shard_rps manager::calculate_current_sync_point(std::span<const gms::inet_address> target_eps) const {
sync_point::shard_rps manager::calculate_current_sync_point(std::span<const locator::host_id> target_eps) const {
sync_point::shard_rps rps;
const auto tmptr = _proxy.get_token_metadata_ptr();
for (auto addr : target_eps) {
const auto hid = tmptr->get_host_id_if_known(addr);
// Ignore the IPs that we cannot map.
if (!hid) {
continue;
}
auto it = _ep_managers.find(*hid);
auto it = _ep_managers.find(addr);
if (it != _ep_managers.end()) {
const hint_endpoint_manager& ep_man = it->second;
rps[*hid] = ep_man.last_written_replay_position();
rps[addr] = ep_man.last_written_replay_position();
}
}
@@ -342,10 +335,11 @@ future<> manager::wait_for_sync_point(abort_source& as, const sync_point::shard_
for (const auto& [addr, rp] : rps) {
if (std::holds_alternative<gms::inet_address>(addr)) {
const auto maybe_hid = tmptr->get_host_id_if_known(std::get<gms::inet_address>(addr));
// Ignore the IPs we cannot map.
if (maybe_hid) [[likely]] {
hid_rps.emplace(*maybe_hid, rp);
try {
const auto hid = _gossiper_anchor->get_host_id(std::get<gms::inet_address>(addr));
hid_rps.emplace(hid, rp);
} catch (...) {
// Ignore the IPs we cannot map.
}
} else {
hid_rps.emplace(std::get<locator::host_id>(addr), rp);
@@ -436,11 +430,11 @@ bool manager::have_ep_manager(const std::variant<locator::host_id, gms::inet_add
return _hint_directory_manager.has_mapping(std::get<gms::inet_address>(ep));
}
bool manager::store_hint(endpoint_id host_id, gms::inet_address ip, schema_ptr s, lw_shared_ptr<const frozen_mutation> fm,
bool manager::store_hint(endpoint_id host_id, schema_ptr s, lw_shared_ptr<const frozen_mutation> fm,
tracing::trace_state_ptr tr_state) noexcept
{
if (utils::get_local_injector().enter("reject_incoming_hints")) {
manager_logger.debug("Rejecting a hint to {} / {} due to an error injection", host_id, ip);
manager_logger.debug("Rejecting a hint to {} due to an error injection", host_id);
++_stats.dropped;
return false;
}
@@ -451,6 +445,8 @@ bool manager::store_hint(endpoint_id host_id, gms::inet_address ip, schema_ptr s
return false;
}
auto ip = _gossiper_anchor->get_address_map().get(host_id);
try {
manager_logger.trace("Going to store a hint to {}", host_id);
tracing::trace(tr_state, "Going to store a hint to {}", host_id);
@@ -599,9 +595,9 @@ future<> manager::change_host_filter(host_filter filter) {
// been created by mistake and they're invalid. The same for pre-host-ID hinted handoff
// -- hint directories representing host IDs are NOT valid.
if (hid_or_ep.has_host_id() && _uses_host_id) {
return std::make_optional(pair_type{hid_or_ep.id(), hid_or_ep.resolve_endpoint(*tmptr)});
return std::make_optional(pair_type{hid_or_ep.id(), hid_or_ep.resolve_endpoint(*_gossiper_anchor)});
} else if (hid_or_ep.has_endpoint() && !_uses_host_id) {
return std::make_optional(pair_type{hid_or_ep.resolve_id(*tmptr), hid_or_ep.endpoint()});
return std::make_optional(pair_type{hid_or_ep.resolve_id(*_gossiper_anchor), hid_or_ep.endpoint()});
} else {
return std::nullopt;
}
@@ -822,7 +818,7 @@ future<> manager::initialize_endpoint_managers() {
const auto maybe_host_id = std::invoke([&] () -> std::optional<locator::host_id> {
try {
return maybe_host_id_or_ep->resolve_id(*tmptr);
return maybe_host_id_or_ep->resolve_id(*_gossiper_anchor);
} catch (...) {
return std::nullopt;
}
@@ -874,7 +870,7 @@ future<> manager::migrate_ip_directories() {
continue;
}
const locator::host_id host_id = hid_or_ep.resolve_id(*tmptr);
const locator::host_id host_id = hid_or_ep.resolve_id(*_gossiper_anchor);
dirs_to_rename.push_back({.current_name = std::move(directory), .new_name = host_id.to_sstring()});
} catch (...) {
// We cannot map the IP to the corresponding host ID either because

View File

@@ -171,7 +171,7 @@ public:
void register_metrics(const sstring& group_name);
future<> start(shared_ptr<const gms::gossiper> gossiper_ptr);
future<> stop();
bool store_hint(endpoint_id host_id, gms::inet_address ip, schema_ptr s, lw_shared_ptr<const frozen_mutation> fm,
bool store_hint(endpoint_id host_id, schema_ptr s, lw_shared_ptr<const frozen_mutation> fm,
tracing::trace_state_ptr tr_state) noexcept;
/// \brief Changes the host_filter currently used, stopping and starting endpoint_managers relevant to the new host_filter.
@@ -278,7 +278,7 @@ public:
///
/// \param target_eps The list of endpoints the sync point should correspond to. When empty, the function assumes all endpoints.
/// \return Sync point corresponding to the specified endpoints.
sync_point::shard_rps calculate_current_sync_point(std::span<const gms::inet_address> target_eps) const;
sync_point::shard_rps calculate_current_sync_point(std::span<const locator::host_id> target_eps) const;
/// \brief Waits until hint replay reach replay positions described in `rps`.
future<> wait_for_sync_point(abort_source& as, const sync_point::shard_rps& rps);

View File

@@ -2080,7 +2080,7 @@ future<> system_keyspace::update_peer_info(gms::inet_address ep, locator::host_i
if (!hid) {
on_internal_error(slogger, format("update_peer_info called with empty host_id, ep {}", ep));
}
if (_db.get_token_metadata().get_topology().is_me(ep)) {
if (_db.get_token_metadata().get_topology().is_me(hid)) {
on_internal_error(slogger, format("update_peer_info called for this node: {}", ep));
}

View File

@@ -2433,14 +2433,14 @@ future<std::unordered_map<locator::host_id, sstring>> view_builder::view_status(
}
future<std::unordered_map<sstring, sstring>>
view_builder::view_build_statuses(sstring keyspace, sstring view_name) const {
view_builder::view_build_statuses(sstring keyspace, sstring view_name, const gms::gossiper& gossiper) const {
std::unordered_map<locator::host_id, sstring> status = co_await view_status(std::move(keyspace), std::move(view_name));
std::unordered_map<sstring, sstring> status_map;
const auto& topo = _db.get_token_metadata().get_topology();
topo.for_each_node([&] (const locator::node& node) {
auto it = status.find(node.host_id());
auto s = it != status.end() ? std::move(it->second) : "UNKNOWN";
status_map.emplace(fmt::to_string(node.endpoint()), std::move(s));
status_map.emplace(fmt::to_string(gossiper.get_address_map().get(node.host_id())), std::move(s));
});
co_return status_map;
}
@@ -2670,7 +2670,7 @@ future<> view_builder::migrate_to_v2(locator::token_metadata_ptr tmptr, db::syst
// In the v1 table we may have left over rows that belong to nodes that were removed
// and we didn't clean them, so do that now.
auto host_id = row.get_as<utils::UUID>("host_id");
if (!tmptr->get_endpoint_for_host_id_if_known(locator::host_id(host_id))) {
if (!tmptr->get_topology().find_node(locator::host_id(host_id))) {
vlogger.warn("Dropping a row from view_build_status: host {} does not exist", host_id);
continue;
}
@@ -3150,7 +3150,7 @@ future<bool> view_builder::check_view_build_ongoing(const locator::token_metadat
return view_status(ks_name, cf_name).then([&tm] (view_statuses_type&& view_statuses) {
return std::ranges::any_of(view_statuses, [&tm] (const view_statuses_type::value_type& view_status) {
// Only consider status of known hosts.
return view_status.second == "STARTED" && tm.get_endpoint_for_host_id_if_known(view_status.first);
return view_status.second == "STARTED" && tm.get_topology().find_node(view_status.first);
});
});
}

View File

@@ -235,7 +235,7 @@ public:
// For tests
future<> wait_until_built(const sstring& ks_name, const sstring& view_name);
future<std::unordered_map<sstring, sstring>> view_build_statuses(sstring keyspace, sstring view_name) const;
future<std::unordered_map<sstring, sstring>> view_build_statuses(sstring keyspace, sstring view_name, const gms::gossiper& g) const;
// Can only be called on shard-0
future<> mark_existing_views_as_built();

View File

@@ -75,7 +75,7 @@ public:
std::vector<frozen_mutation> muts;
muts.reserve(gossiper.num_endpoints());
gossiper.for_each_endpoint_state([&] (const gms::inet_address& endpoint, const gms::endpoint_state&) {
gossiper.for_each_endpoint_state([&] (const gms::inet_address& endpoint, const gms::endpoint_state& eps) {
static thread_local auto s = build_schema();
mutation m(s, partition_key::from_single_value(*s, data_value(endpoint).serialize_nonnull()));
row& cr = m.partition().clustered_row(*schema(), clustering_key::make_empty()).cells();
@@ -86,16 +86,15 @@ public:
}
set_cell(cr, "load", gossiper.get_application_state_value(endpoint, gms::application_state::LOAD));
auto hostid = tm.get_host_id_if_known(endpoint);
if (hostid) {
if (ss.raft_topology_change_enabled() && !gossiper.is_shutdown(endpoint)) {
set_cell(cr, "status", boost::to_upper_copy<std::string>(fmt::format("{}", ss.get_node_state(*hostid))));
}
set_cell(cr, "host_id", hostid->uuid());
auto hostid = eps.get_host_id();
if (ss.raft_topology_change_enabled() && !gossiper.is_shutdown(endpoint)) {
set_cell(cr, "status", boost::to_upper_copy<std::string>(fmt::format("{}", ss.get_node_state(hostid))));
}
set_cell(cr, "host_id", hostid.uuid());
if (hostid) {
sstring dc = tm.get_topology().get_location(endpoint).dc;
if (tm.get_topology().has_node(hostid)) {
// Not all entries in gossiper are present in the topology
sstring dc = tm.get_topology().get_location(hostid).dc;
set_cell(cr, "dc", dc);
}
@@ -103,7 +102,7 @@ public:
set_cell(cr, "owns", ownership[endpoint]);
}
set_cell(cr, "tokens", int32_t(hostid ? tm.get_tokens(*hostid).size() : 0));
set_cell(cr, "tokens", int32_t(tm.get_tokens(hostid).size()));
muts.push_back(freeze(std::move(m)));
});

View File

@@ -924,7 +924,7 @@ future<std::set<inet_address>> gossiper::get_live_members_synchronized() {
return container().invoke_on(0, [] (gms::gossiper& g) -> future<std::set<inet_address>> {
// Make sure the value we return is synchronized on all shards
auto lock = co_await g.lock_endpoint_update_semaphore();
co_return g.get_live_members();
co_return g.get_live_members_helper();
});
}
@@ -1175,7 +1175,7 @@ future<> gossiper::unregister_(shared_ptr<i_endpoint_state_change_subscriber> su
return _subscribers.remove(subscriber);
}
std::set<inet_address> gossiper::get_live_members() const {
std::set<inet_address> gossiper::get_live_members_helper() const {
std::set<inet_address> live_members(_live_endpoints.begin(), _live_endpoints.end());
auto myip = get_broadcast_address();
logger.debug("live_members before={}", live_members);
@@ -1186,6 +1186,10 @@ std::set<inet_address> gossiper::get_live_members() const {
return live_members;
}
std::set<locator::host_id> gossiper::get_live_members() const {
return get_live_members_helper() | std::views::transform([this] (inet_address ip) { return get_host_id(ip); }) | std::ranges::to<std::set>();
}
std::set<locator::host_id> gossiper::get_live_token_owners() const {
std::set<locator::host_id> token_owners;
auto normal_token_owners = get_token_metadata_ptr()->get_normal_token_owners();
@@ -1197,17 +1201,6 @@ std::set<locator::host_id> gossiper::get_live_token_owners() const {
return token_owners;
}
std::set<inet_address> gossiper::get_unreachable_token_owners() const {
std::set<inet_address> token_owners;
auto normal_token_owners = get_token_metadata_ptr()->get_normal_token_owners_ips();
for (auto& node: normal_token_owners) {
if (!is_alive(node)) {
token_owners.insert(node);
}
}
return token_owners;
}
std::set<locator::host_id> gossiper::get_unreachable_nodes() const {
std::set<locator::host_id> unreachable_nodes;
auto nodes = get_token_metadata_ptr()->get_topology().get_all_host_ids();
@@ -1505,6 +1498,14 @@ endpoint_state_ptr gossiper::get_endpoint_state_ptr(inet_address ep) const noexc
}
}
endpoint_state_ptr gossiper::get_endpoint_state_ptr(locator::host_id id) const noexcept {
auto ip = _address_map.find(id);
if (!ip) {
return nullptr;
}
return get_endpoint_state_ptr(*ip);
}
void gossiper::update_timestamp(const endpoint_state_ptr& eps) noexcept {
const_cast<endpoint_state&>(*eps).update_timestamp();
}

View File

@@ -296,7 +296,7 @@ public:
*/
future<> unregister_(shared_ptr<i_endpoint_state_change_subscriber> subscriber);
std::set<inet_address> get_live_members() const;
std::set<locator::host_id> get_live_members() const;
std::set<locator::host_id> get_live_token_owners() const;
@@ -306,11 +306,6 @@ public:
std::set<inet_address> get_unreachable_members() const;
std::set<locator::host_id> get_unreachable_host_ids() const;
/**
* @return a list of unreachable token owners
*/
std::set<inet_address> get_unreachable_token_owners() const;
/**
* @return a list of unreachable nodes
*/
@@ -435,6 +430,7 @@ public:
// The endpoint_state is immutable (except for its update_timestamp), guaranteed not to change while
// the endpoint_state_ptr is held.
endpoint_state_ptr get_endpoint_state_ptr(inet_address ep) const noexcept;
endpoint_state_ptr get_endpoint_state_ptr(locator::host_id ep) const noexcept;
// Return this node's endpoint_state_ptr
endpoint_state_ptr get_this_endpoint_state_ptr() const noexcept {
@@ -527,6 +523,7 @@ public:
future<> wait_alive(std::vector<gms::inet_address> nodes, std::chrono::milliseconds timeout);
future<> wait_alive(std::vector<locator::host_id> nodes, std::chrono::milliseconds timeout);
future<> wait_alive(noncopyable_function<std::vector<locator::host_id>()> get_nodes, std::chrono::milliseconds timeout);
std::set<inet_address> get_live_members_helper() const;
// Wait for `n` live nodes to show up in gossip (including ourself).
future<> wait_for_live_nodes_to_show_up(size_t n);

View File

@@ -56,19 +56,6 @@ auto& ring_mapping::operator*() const {
return std::as_const(_impl->map);
}
template <typename ResultSet, typename SourceSet>
static ResultSet resolve_endpoints(const SourceSet& host_ids, const token_metadata& tm) {
ResultSet result{};
result.reserve(host_ids.size());
for (const auto& host_id: host_ids) {
// Empty host_id is used as a marker for local address.
// The reason for this hack is that we need local_strategy to
// work before the local host_id is loaded from the system.local table.
result.push_back(host_id ? tm.get_endpoint_for_host_id(host_id) : tm.get_topology().my_address());
}
return result;
}
logging::logger rslogger("replication_strategy");
abstract_replication_strategy::abstract_replication_strategy(
@@ -507,20 +494,10 @@ host_id_vector_replica_set vnode_effective_replication_map::do_get_replicas(cons
return it->second;
}
inet_address_vector_replica_set vnode_effective_replication_map::do_get_natural_endpoints(const token& tok,
bool is_vnode) const
{
return resolve_endpoints<inet_address_vector_replica_set>(do_get_replicas(tok, is_vnode), *_tmptr);
}
host_id_vector_replica_set vnode_effective_replication_map::get_replicas(const token& tok) const {
return do_get_replicas(tok, false);
}
inet_address_vector_replica_set vnode_effective_replication_map::get_natural_endpoints(const token& search_token) const {
return do_get_natural_endpoints(search_token, false);
}
host_id_vector_replica_set vnode_effective_replication_map::get_natural_replicas(const token& search_token) const {
return get_replicas(search_token);
}

View File

@@ -231,9 +231,6 @@ public:
/// new replica.
///
/// The returned addresses are present in the topology object associated with this instance.
virtual inet_address_vector_replica_set get_natural_endpoints(const token& search_token) const = 0;
/// Same as above but returns host ids instead of addresses
virtual host_id_vector_replica_set get_natural_replicas(const token& search_token) const = 0;
/// Same as above but returns host ids instead of addresses
@@ -333,7 +330,6 @@ private:
friend class abstract_replication_strategy;
friend class effective_replication_map_factory;
public: // effective_replication_map
inet_address_vector_replica_set get_natural_endpoints(const token& search_token) const override;
host_id_vector_replica_set get_natural_replicas(const token& search_token) const override;
host_id_vector_topology_change get_pending_replicas(const token& search_token) const override;
host_id_vector_replica_set get_replicas_for_reading(const token& token) const override;
@@ -397,7 +393,6 @@ public:
private:
future<dht::token_range_vector> do_get_ranges(noncopyable_function<stop_iteration(bool& add_range, const host_id& natural_endpoint)> consider_range_for_endpoint) const;
inet_address_vector_replica_set do_get_natural_endpoints(const token& tok, bool is_vnode) const;
host_id_vector_replica_set do_get_replicas(const token& tok, bool is_vnode) const;
stop_iteration for_each_natural_endpoint_until(const token& vnode_tok, const noncopyable_function<stop_iteration(const host_id&)>& func) const;

View File

@@ -235,7 +235,7 @@ public:
}
static void check_enough_endpoints(const token_metadata& tm, const std::unordered_map<sstring, size_t>& dc_rf) {
auto dc_endpoints = tm.get_datacenter_token_owners_ips();
auto dc_endpoints = tm.get_datacenter_token_owners();
auto endpoints_in = [&dc_endpoints](sstring dc) {
auto i = dc_endpoints.find(dc);
return i != dc_endpoints.end() ? i->second.size() : size_t(0);
@@ -309,7 +309,7 @@ effective_replication_map_ptr network_topology_strategy::make_replication_map(ta
static unsigned calculate_initial_tablets_from_topology(const schema& s, token_metadata_ptr tm, const std::unordered_map<sstring, size_t>& rf) {
unsigned initial_tablets = std::numeric_limits<unsigned>::min();
for (const auto& dc : tm->get_datacenter_token_owners_ips()) {
for (const auto& dc : tm->get_datacenter_token_owners()) {
unsigned shards_in_dc = 0;
unsigned rf_in_dc = 1;

View File

@@ -765,19 +765,6 @@ class tablet_effective_replication_map : public effective_replication_map {
tablet_sharder _sharder;
mutable const tablet_map* _tmap = nullptr;
private:
inet_address_vector_replica_set to_replica_set(const tablet_replica_set& replicas) const {
inet_address_vector_replica_set result;
result.reserve(replicas.size());
auto& topo = _tmptr->get_topology();
for (auto&& replica : replicas) {
auto* node = topo.find_node(replica.host);
if (node && !node->left()) {
result.emplace_back(node->endpoint());
}
}
return result;
}
host_id_vector_replica_set to_host_set(const tablet_replica_set& replicas) const {
host_id_vector_replica_set result;
result.reserve(replicas.size());
@@ -879,10 +866,6 @@ public:
return to_host_set(get_replicas_for_write(search_token));
}
virtual inet_address_vector_replica_set get_natural_endpoints(const token& search_token) const override {
return to_replica_set(get_replicas_for_write(search_token));
}
virtual host_id_vector_replica_set get_natural_replicas(const token& search_token) const override {
return to_host_set(get_replicas_for_write(search_token));
}

View File

@@ -22,7 +22,7 @@
#include <seastar/core/smp.hh>
#include "utils/assert.hh"
#include "utils/stall_free.hh"
#include "gms/gossiper.hh"
namespace locator {
static logging::logger tlogger("token_metadata");
@@ -116,7 +116,7 @@ public:
}
void update_topology(host_id id, std::optional<endpoint_dc_rack> opt_dr, std::optional<node::state> opt_st, std::optional<shard_id> shard_count = std::nullopt) {
_topology.add_or_update_endpoint(id, std::nullopt, std::move(opt_dr), std::move(opt_st), std::move(shard_count));
_topology.add_or_update_endpoint(id, std::move(opt_dr), std::move(opt_st), std::move(shard_count));
}
/**
@@ -141,29 +141,8 @@ public:
void debug_show() const;
/**
* Store an end-point to host ID mapping. Each ID must be unique, and
* cannot be changed after the fact.
*
* @param hostId
* @param endpoint
*/
void update_host_id(const host_id& host_id, inet_address endpoint);
/** Return the unique host ID for an end-point. */
host_id get_host_id(inet_address endpoint) const;
/// Return the unique host ID for an end-point or nullopt if not found.
std::optional<host_id> get_host_id_if_known(inet_address endpoint) const;
/** Return the end-point for a unique host ID or nullopt if not found.*/
std::optional<inet_address> get_endpoint_for_host_id_if_known(host_id) const;
/** Return the end-point for a unique host ID.*/
inet_address get_endpoint_for_host_id(host_id) const;
/** @return a copy of the endpoint-to-id map for read-only operations */
std::unordered_map<inet_address, host_id> get_endpoint_to_host_id_map() const;
/** @return a copy of host id set for read-only operations */
std::unordered_set<host_id> get_host_ids() const;
void add_bootstrap_token(token t, host_id endpoint);
@@ -260,11 +239,8 @@ public:
* Bootstrapping tokens are not taken into account. */
size_t count_normal_token_owners() const;
std::unordered_map<sstring, std::unordered_set<inet_address>> get_datacenter_token_owners_ips() const;
std::unordered_map<sstring, std::unordered_set<host_id>> get_datacenter_token_owners() const;
std::unordered_map<sstring, std::unordered_map<sstring, std::unordered_set<inet_address>>>
get_datacenter_racks_token_owners_ips() const;
std::unordered_map<sstring, std::unordered_map<sstring, std::unordered_set<host_id>>>
get_datacenter_racks_token_owners() const;
@@ -531,57 +507,11 @@ void token_metadata_impl::debug_show() const {
reporter->arm_periodic(std::chrono::seconds(1));
}
void token_metadata_impl::update_host_id(const host_id& host_id, inet_address endpoint) {
_topology.add_or_update_endpoint(host_id, endpoint);
}
host_id token_metadata_impl::get_host_id(inet_address endpoint) const {
if (const auto* node = _topology.find_node(endpoint)) [[likely]] {
return node->host_id();
} else {
on_internal_error(tlogger, format("host_id for endpoint {} is not found", endpoint));
}
}
std::optional<host_id> token_metadata_impl::get_host_id_if_known(inet_address endpoint) const {
if (const auto* node = _topology.find_node(endpoint)) [[likely]] {
return node->host_id();
} else {
return std::nullopt;
}
}
std::optional<inet_address> token_metadata_impl::get_endpoint_for_host_id_if_known(host_id host_id) const {
if (const auto* node = _topology.find_node(host_id)) [[likely]] {
return node->endpoint();
} else {
return std::nullopt;
}
}
inet_address token_metadata_impl::get_endpoint_for_host_id(host_id host_id) const {
if (const auto* node = _topology.find_node(host_id)) [[likely]] {
return node->endpoint();
} else {
on_internal_error(tlogger, format("endpoint for host_id {} is not found", host_id));
}
}
std::unordered_map<inet_address, host_id> token_metadata_impl::get_endpoint_to_host_id_map() const {
const auto& nodes = _topology.get_nodes_by_endpoint();
std::unordered_map<inet_address, host_id> map;
map.reserve(nodes.size());
for (const auto& [endpoint, node] : nodes) {
if (node.get().left() || node.get().is_none()) {
continue;
}
if (const auto& host_id = node.get().host_id()) {
map[endpoint] = host_id;
} else {
tlogger.info("get_endpoint_to_host_id_map: endpoint {} has null host_id: state={}", endpoint, node.get().get_state());
}
}
return map;
std::unordered_set<host_id> token_metadata_impl::get_host_ids() const {
return _topology.get_nodes() |
std::views::filter([&] (const node& n) { return !n.left() && !n.is_none(); }) |
std::views::transform([] (const node& n) { return n.host_id(); }) |
std::ranges::to<std::unordered_set>();
}
bool token_metadata_impl::is_normal_token_owner(host_id endpoint) const {
@@ -805,17 +735,6 @@ void token_metadata_impl::for_each_token_owner(std::function<void(const node&)>
});
}
std::unordered_map<sstring, std::unordered_set<inet_address>>
token_metadata_impl::get_datacenter_token_owners_ips() const {
std::unordered_map<sstring, std::unordered_set<inet_address>> datacenter_token_owners;
_topology.for_each_node([&] (const node& n) {
if (is_normal_token_owner(n.host_id())) {
datacenter_token_owners[n.dc_rack().dc].insert(n.endpoint());
}
});
return datacenter_token_owners;
}
std::unordered_map<sstring, std::unordered_set<host_id>>
token_metadata_impl::get_datacenter_token_owners() const {
std::unordered_map<sstring, std::unordered_set<host_id>> datacenter_token_owners;
@@ -827,18 +746,6 @@ token_metadata_impl::get_datacenter_token_owners() const {
return datacenter_token_owners;
}
std::unordered_map<sstring, std::unordered_map<sstring, std::unordered_set<inet_address>>>
token_metadata_impl::get_datacenter_racks_token_owners_ips() const {
std::unordered_map<sstring, std::unordered_map<sstring, std::unordered_set<inet_address>>> dc_racks_token_owners;
_topology.for_each_node([&] (const node& n) {
const auto& dc_rack = n.dc_rack();
if (is_normal_token_owner(n.host_id())) {
dc_racks_token_owners[dc_rack.dc][dc_rack.rack].insert(n.endpoint());
}
});
return dc_racks_token_owners;
}
std::unordered_map<sstring, std::unordered_map<sstring, std::unordered_set<host_id>>>
token_metadata_impl::get_datacenter_racks_token_owners() const {
std::unordered_map<sstring, std::unordered_map<sstring, std::unordered_set<host_id>>> dc_racks_token_owners;
@@ -1042,34 +949,9 @@ token_metadata::debug_show() const {
_impl->debug_show();
}
void
token_metadata::update_host_id(const host_id& host_id, inet_address endpoint) {
_impl->update_host_id(host_id, endpoint);
}
host_id
token_metadata::get_host_id(inet_address endpoint) const {
return _impl->get_host_id(endpoint);
}
std::optional<host_id>
token_metadata::get_host_id_if_known(inet_address endpoint) const {
return _impl->get_host_id_if_known(endpoint);
}
std::optional<token_metadata::inet_address>
token_metadata::get_endpoint_for_host_id_if_known(host_id host_id) const {
return _impl->get_endpoint_for_host_id_if_known(host_id);
}
token_metadata::inet_address
token_metadata::get_endpoint_for_host_id(host_id host_id) const {
return _impl->get_endpoint_for_host_id(host_id);
}
std::unordered_map<inet_address, host_id>
token_metadata::get_endpoint_to_host_id_map() const {
return _impl->get_endpoint_to_host_id_map();
std::unordered_set<host_id>
token_metadata::get_host_ids() const {
return _impl->get_host_ids();
}
void
@@ -1189,16 +1071,6 @@ token_metadata::get_normal_token_owners() const {
return _impl->get_normal_token_owners();
}
std::unordered_set<gms::inet_address> token_metadata::get_normal_token_owners_ips() const {
const auto& host_ids = _impl->get_normal_token_owners();
std::unordered_set<gms::inet_address> result;
result.reserve(host_ids.size());
for (const auto& id: host_ids) {
result.insert(_impl->get_endpoint_for_host_id(id));
}
return result;
}
void token_metadata::for_each_token_owner(std::function<void(const node&)> func) const {
return _impl->for_each_token_owner(func);
}
@@ -1208,19 +1080,10 @@ token_metadata::count_normal_token_owners() const {
return _impl->count_normal_token_owners();
}
std::unordered_map<sstring, std::unordered_set<inet_address>> token_metadata::get_datacenter_token_owners_ips() const {
return _impl->get_datacenter_token_owners_ips();
}
std::unordered_map<sstring, std::unordered_set<host_id>> token_metadata::get_datacenter_token_owners() const {
return _impl->get_datacenter_token_owners();
}
std::unordered_map<sstring, std::unordered_map<sstring, std::unordered_set<inet_address>>>
token_metadata::get_datacenter_racks_token_owners_ips() const {
return _impl->get_datacenter_racks_token_owners_ips();
}
std::unordered_map<sstring, std::unordered_map<sstring, std::unordered_set<host_id>>>
token_metadata::get_datacenter_racks_token_owners() const {
return _impl->get_datacenter_racks_token_owners();
@@ -1401,22 +1264,22 @@ host_id_or_endpoint::host_id_or_endpoint(const sstring& s, param_type restrict)
}
}
host_id host_id_or_endpoint::resolve_id(const token_metadata& tm) const {
host_id host_id_or_endpoint::resolve_id(const gms::gossiper& g) const {
if (has_host_id()) {
return id();
}
auto opt_id = tm.get_host_id_if_known(endpoint());
if (!opt_id) {
try {
return g.get_host_id(endpoint());
} catch (...) {
throw std::runtime_error(format("Host inet address {} not found in the cluster", endpoint()));
}
return *opt_id;
}
gms::inet_address host_id_or_endpoint::resolve_endpoint(const token_metadata& tm) const {
gms::inet_address host_id_or_endpoint::resolve_endpoint(const gms::gossiper& g) const {
if (has_endpoint()) {
return endpoint();
}
auto endpoint_opt = tm.get_endpoint_for_host_id_if_known(id());
auto endpoint_opt = g.get_address_map().find(id());
if (!endpoint_opt) {
throw std::runtime_error(format("Host ID {} not found in the cluster", id()));
}

View File

@@ -37,6 +37,10 @@ namespace replica {
class keyspace;
}
namespace gms {
class gossiper;
}
namespace locator {
class abstract_replication_strategy;
@@ -75,8 +79,8 @@ struct host_id_or_endpoint {
// Map the host_id to endpoint or vice verse, using the token_metadata.
// Throws runtime error if failed to resolve.
host_id resolve_id(const token_metadata&) const;
gms::inet_address resolve_endpoint(const token_metadata&) const;
host_id resolve_id(const gms::gossiper&) const;
gms::inet_address resolve_endpoint(const gms::gossiper&) const;
};
using host_id_or_endpoint_list = std::vector<host_id_or_endpoint>;
@@ -223,29 +227,11 @@ public:
const topology& get_topology() const;
void debug_show() const;
/**
* Store an end-point to host ID mapping. Each ID must be unique, and
* cannot be changed after the fact.
*
* @param hostId
* @param endpoint
*/
void update_host_id(const locator::host_id& host_id, inet_address endpoint);
/** Return the unique host ID for an end-point. */
host_id get_host_id(inet_address endpoint) const;
/// Return the unique host ID for an end-point or nullopt if not found.
std::optional<host_id> get_host_id_if_known(inet_address endpoint) const;
/** Return the end-point for a unique host ID or nullopt if not found. */
std::optional<inet_address> get_endpoint_for_host_id_if_known(locator::host_id host_id) const;
/** Return the end-point for a unique host ID */
inet_address get_endpoint_for_host_id(locator::host_id host_id) const;
/** @return a copy of the endpoint-to-id map for read-only operations */
std::unordered_map<inet_address, host_id> get_endpoint_to_host_id_map() const;
std::unordered_set<host_id> get_host_ids() const;
/// Returns host_id of the local node.
host_id get_my_id() const;
@@ -327,23 +313,18 @@ public:
const std::unordered_set<host_id>& get_normal_token_owners() const;
std::unordered_set<gms::inet_address> get_normal_token_owners_ips() const;
void for_each_token_owner(std::function<void(const node&)> func) const;
/* Returns the number of different endpoints that own tokens in the ring.
* Bootstrapping tokens are not taken into account. */
size_t count_normal_token_owners() const;
// Returns the map: DC -> addresses of token owners in that DC.
// Returns the map: DC -> host_id of token owners in that DC.
// If there are no token owners in a DC, it is not present in the result.
std::unordered_map<sstring, std::unordered_set<inet_address>> get_datacenter_token_owners_ips() const;
std::unordered_map<sstring, std::unordered_set<host_id>> get_datacenter_token_owners() const;
// Returns the map: DC -> (map: rack -> addresses of token owners in that rack).
// Returns the map: DC -> (map: rack -> host_id of token owners in that rack).
// If there are no token owners in a DC/rack, it is not present in the result.
std::unordered_map<sstring, std::unordered_map<sstring, std::unordered_set<inet_address>>>
get_datacenter_racks_token_owners_ips() const;
std::unordered_map<sstring, std::unordered_map<sstring, std::unordered_set<host_id>>>
get_datacenter_racks_token_owners() const;

View File

@@ -55,10 +55,9 @@ thread_local const endpoint_dc_rack endpoint_dc_rack::default_location = {
.rack = locator::production_snitch_base::default_rack,
};
node::node(const locator::topology* topology, locator::host_id id, inet_address endpoint, endpoint_dc_rack dc_rack, state state, shard_id shard_count, this_node is_this_node, node::idx_type idx)
node::node(const locator::topology* topology, locator::host_id id, endpoint_dc_rack dc_rack, state state, shard_id shard_count, this_node is_this_node, node::idx_type idx)
: _topology(topology)
, _host_id(id)
, _endpoint(endpoint)
, _dc_rack(std::move(dc_rack))
, _state(state)
, _shard_count(std::move(shard_count))
@@ -66,12 +65,12 @@ node::node(const locator::topology* topology, locator::host_id id, inet_address
, _idx(idx)
{}
node_holder node::make(const locator::topology* topology, locator::host_id id, inet_address endpoint, endpoint_dc_rack dc_rack, state state, shard_id shard_count, node::this_node is_this_node, node::idx_type idx) {
return std::make_unique<node>(topology, std::move(id), std::move(endpoint), std::move(dc_rack), std::move(state), shard_count, is_this_node, idx);
node_holder node::make(const locator::topology* topology, locator::host_id id, endpoint_dc_rack dc_rack, state state, shard_id shard_count, node::this_node is_this_node, node::idx_type idx) {
return std::make_unique<node>(topology, std::move(id), std::move(dc_rack), std::move(state), shard_count, is_this_node, idx);
}
node_holder node::clone() const {
return make(nullptr, host_id(), endpoint(), dc_rack(), get_state(), get_shard_count(), is_this_node());
return make(nullptr, host_id(), dc_rack(), get_state(), get_shard_count(), is_this_node());
}
std::string node::to_string(node::state s) {
@@ -95,7 +94,6 @@ future<> topology::clear_gently() noexcept {
_datacenters.clear();
_dc_rack_nodes.clear();
_dc_nodes.clear();
_nodes_by_endpoint.clear();
_nodes_by_host_id.clear();
co_await utils::clear_gently(_nodes);
}
@@ -116,7 +114,7 @@ topology::topology(config cfg)
{
tlogger.trace("topology[{}]: constructing using config: endpoint={} id={} dc={} rack={}", fmt::ptr(this),
cfg.this_endpoint, cfg.this_host_id, cfg.local_dc_rack.dc, cfg.local_dc_rack.rack);
add_node(cfg.this_host_id, cfg.this_endpoint, cfg.local_dc_rack, node::state::none);
add_node(cfg.this_host_id, cfg.local_dc_rack, node::state::none);
}
topology::topology(topology&& o) noexcept
@@ -125,7 +123,6 @@ topology::topology(topology&& o) noexcept
, _this_node(std::exchange(o._this_node, nullptr))
, _nodes(std::move(o._nodes))
, _nodes_by_host_id(std::move(o._nodes_by_host_id))
, _nodes_by_endpoint(std::move(o._nodes_by_endpoint))
, _dc_nodes(std::move(o._dc_nodes))
, _dc_rack_nodes(std::move(o._dc_rack_nodes))
, _dc_endpoints(std::move(o._dc_endpoints))
@@ -171,7 +168,7 @@ void topology::set_host_id_cfg(host_id this_host_id) {
tlogger.trace("topology[{}]: set host id to {}", fmt::ptr(this), this_host_id);
_cfg.this_host_id = this_host_id;
add_or_update_endpoint(this_host_id, _cfg.this_endpoint);
add_or_update_endpoint(this_host_id);
}
future<topology> topology::clone_gently() const {
@@ -188,21 +185,15 @@ future<topology> topology::clone_gently() const {
co_return ret;
}
const node& topology::add_node(host_id id, const inet_address& ep, const endpoint_dc_rack& dr, node::state state, shard_id shard_count) {
const node& topology::add_node(host_id id, const endpoint_dc_rack& dr, node::state state, shard_id shard_count) {
if (dr.dc.empty() || dr.rack.empty()) {
on_internal_error(tlogger, "Node must have valid dc and rack");
}
return add_node(node::make(this, id, ep, dr, state, shard_count));
return add_node(node::make(this, id, dr, state, shard_count));
}
bool topology::is_configured_this_node(const node& n) const {
if (_cfg.this_host_id && n.host_id()) { // Selection by host_id
return _cfg.this_host_id == n.host_id();
}
if (_cfg.this_endpoint != inet_address()) { // Selection by endpoint
return _cfg.this_endpoint == n.endpoint();
}
return false; // No selection;
return _cfg.this_host_id == n.host_id();
}
const node& topology::add_node(node_holder nptr) {
@@ -245,10 +236,9 @@ const node& topology::add_node(node_holder nptr) {
return *node;
}
void topology::update_node(node& node, std::optional<host_id> opt_id, std::optional<inet_address> opt_ep, std::optional<endpoint_dc_rack> opt_dr, std::optional<node::state> opt_st, std::optional<shard_id> opt_shard_count) {
tlogger.debug("topology[{}]: update_node: {}: to: host_id={} endpoint={} dc={} rack={} state={} shard_count={}, at {}", fmt::ptr(this), node_printer(&node),
void topology::update_node(node& node, std::optional<host_id> opt_id, std::optional<endpoint_dc_rack> opt_dr, std::optional<node::state> opt_st, std::optional<shard_id> opt_shard_count) {
tlogger.debug("topology[{}]: update_node: {}: to: host_id={} dc={} rack={} state={} shard_count={}, at {}", fmt::ptr(this), node_printer(&node),
opt_id ? format("{}", *opt_id) : "unchanged",
opt_ep ? format("{}", *opt_ep) : "unchanged",
opt_dr ? format("{}", opt_dr->dc) : "unchanged",
opt_dr ? format("{}", opt_dr->rack) : "unchanged",
opt_st ? format("{}", *opt_st) : "unchanged",
@@ -272,16 +262,6 @@ void topology::update_node(node& node, std::optional<host_id> opt_id, std::optio
opt_id.reset();
}
}
if (opt_ep) {
if (*opt_ep != node.endpoint()) {
if (*opt_ep == inet_address{}) {
on_internal_error(tlogger, seastar::format("Updating node endpoint to null is disallowed: {}: new endpoint={}", node_printer(&node), *opt_ep));
}
changed = true;
} else {
opt_ep.reset();
}
}
if (opt_dr) {
if (opt_dr->dc.empty() || opt_dr->dc == production_snitch_base::default_dc) {
opt_dr->dc = node.dc_rack().dc;
@@ -312,9 +292,6 @@ void topology::update_node(node& node, std::optional<host_id> opt_id, std::optio
if (opt_id) {
node._host_id = *opt_id;
}
if (opt_ep) {
node._endpoint = *opt_ep;
}
if (opt_dr) {
node._dc_rack = std::move(*opt_dr);
}
@@ -360,32 +337,6 @@ void topology::index_node(const node& node) {
if (!inserted_host_id) {
on_internal_error(tlogger, seastar::format("topology[{}]: {}: node already exists", fmt::ptr(this), node_printer(&node)));
}
if (node.endpoint() != inet_address{}) {
auto eit = _nodes_by_endpoint.find(node.endpoint());
if (eit != _nodes_by_endpoint.end()) {
if (eit->second.get().get_state() == node::state::none && eit->second.get().is_this_node()) {
// eit->second is default entry created for local node and it is replaced by existing node with the same ip
// it means this node is going to replace the existing node with the same ip, but it does not know it yet
// map ip to the old node
_nodes_by_endpoint.erase(node.endpoint());
} else if (eit->second.get().get_state() == node::state::replacing && node.get_state() == node::state::being_replaced) {
// replace-with-same-ip, map ip to the old node
_nodes_by_endpoint.erase(node.endpoint());
} else if (eit->second.get().get_state() == node::state::being_replaced && node.get_state() == node::state::replacing) {
// replace-with-same-ip, map ip to the old node, do nothing if it's already the case
} else if (eit->second.get().is_leaving() || eit->second.get().left()) {
_nodes_by_endpoint.erase(node.endpoint());
} else if (!node.is_leaving() && !node.left()) {
if (node.host_id()) {
_nodes_by_host_id.erase(node.host_id());
}
on_internal_error(tlogger, seastar::format("topology[{}]: {}: node endpoint already mapped to {}", fmt::ptr(this), node_printer(&node), node_printer(&eit->second.get())));
}
}
if (!node.left() && !node.is_none()) {
_nodes_by_endpoint.try_emplace(node.endpoint(), std::cref(node));
}
}
// We keep location of left nodes because they may still appear in tablet replica sets
// and algorithms expect to know which dc they belonged to. View replica pairing needs stable
@@ -394,7 +345,7 @@ void topology::index_node(const node& node) {
if (!node.left() && !node.is_none()) {
const auto& dc = node.dc_rack().dc;
const auto& rack = node.dc_rack().rack;
const auto& endpoint = node.endpoint();
const auto& endpoint = node.host_id();
_dc_nodes[dc].emplace(std::cref(node));
_dc_rack_nodes[dc][rack].emplace(std::cref(node));
_dc_endpoints[dc].insert(endpoint);
@@ -416,7 +367,7 @@ void topology::unindex_node(const node& node) {
bool found = _dc_nodes.at(dc).erase(std::cref(node));
if (found) {
if (auto dit = _dc_endpoints.find(dc); dit != _dc_endpoints.end()) {
const auto& ep = node.endpoint();
const auto& ep = node.host_id();
auto& eps = dit->second;
eps.erase(ep);
if (eps.empty()) {
@@ -442,10 +393,6 @@ void topology::unindex_node(const node& node) {
if (host_it != _nodes_by_host_id.end() && host_it->second == node) {
_nodes_by_host_id.erase(host_it);
}
auto ep_it = _nodes_by_endpoint.find(node.endpoint());
if (ep_it != _nodes_by_endpoint.end() && ep_it->second.get() == node) {
_nodes_by_endpoint.erase(ep_it);
}
if (_this_node == &node) {
_this_node = nullptr;
}
@@ -484,16 +431,6 @@ node* topology::find_node(host_id id) noexcept {
return make_mutable(const_cast<const topology*>(this)->find_node(id));
}
// Finds a node by its endpoint
// Returns nullptr if not found
const node* topology::find_node(const inet_address& ep) const noexcept {
auto it = _nodes_by_endpoint.find(ep);
if (it != _nodes_by_endpoint.end()) {
return &it->second.get();
}
return nullptr;
}
// Finds a node by its index
// Returns nullptr if not found
const node* topology::find_node(node::idx_type idx) const noexcept {
@@ -503,23 +440,19 @@ const node* topology::find_node(node::idx_type idx) const noexcept {
return _nodes.at(idx).get();
}
const node& topology::add_or_update_endpoint(host_id id, std::optional<inet_address> opt_ep, std::optional<endpoint_dc_rack> opt_dr, std::optional<node::state> opt_st, std::optional<shard_id> shard_count)
const node& topology::add_or_update_endpoint(host_id id, std::optional<endpoint_dc_rack> opt_dr, std::optional<node::state> opt_st, std::optional<shard_id> shard_count)
{
tlogger.trace("topology[{}]: add_or_update_endpoint: host_id={} ep={} dc={} rack={} state={} shards={}, at {}", fmt::ptr(this),
id, opt_ep, opt_dr.value_or(endpoint_dc_rack{}).dc, opt_dr.value_or(endpoint_dc_rack{}).rack, opt_st.value_or(node::state::none), shard_count,
tlogger.trace("topology[{}]: add_or_update_endpoint: host_id={} dc={} rack={} state={} shards={}, at {}", fmt::ptr(this),
id, opt_dr.value_or(endpoint_dc_rack{}).dc, opt_dr.value_or(endpoint_dc_rack{}).rack, opt_st.value_or(node::state::none), shard_count,
lazy_backtrace());
auto* n = find_node(id);
if (n) {
update_node(*n, std::nullopt, opt_ep, std::move(opt_dr), std::move(opt_st), std::move(shard_count));
return *n;
} else if (opt_ep && (n = make_mutable(find_node(*opt_ep)))) {
update_node(*n, id, std::nullopt, std::move(opt_dr), std::move(opt_st), std::move(shard_count));
update_node(*n, std::nullopt, std::move(opt_dr), std::move(opt_st), std::move(shard_count));
return *n;
}
return add_node(id,
opt_ep.value_or(inet_address{}),
opt_dr.value_or(endpoint_dc_rack::default_location),
opt_st.value_or(node::state::none),
shard_count.value_or(0));
@@ -544,35 +477,6 @@ bool topology::has_node(host_id id) const noexcept {
return bool(node);
}
bool topology::has_node(inet_address ep) const noexcept {
auto node = find_node(ep);
tlogger.trace("topology[{}]: has_node: endpoint={}: node={}", fmt::ptr(this), ep, node_printer(node));
return bool(node);
}
bool topology::has_endpoint(inet_address ep) const
{
return has_node(ep);
}
const endpoint_dc_rack& topology::get_location(const inet_address& ep) const {
if (auto node = find_node(ep)) {
return node->dc_rack();
}
// We should do the following check after lookup in nodes.
// In tests, there may be no config for local node, so fall back to get_location()
// only if no mapping is found. Otherwise, get_location() will return empty location
// from config or random node, neither of which is correct.
if (ep == _cfg.this_endpoint) {
return get_location();
}
// FIXME -- this shouldn't happen. After topology is stable and is
// correctly populated with endpoints, this should be replaced with
// on_internal_error()
tlogger.warn("Requested location for node {} not in topology. backtrace {}", ep, lazy_backtrace());
return endpoint_dc_rack::default_location;
}
void topology::sort_by_proximity(locator::host_id address, host_id_vector_replica_set& addresses) const {
if (can_sort_by_proximity()) {
do_sort_by_proximity(address, addresses);

View File

@@ -59,7 +59,6 @@ public:
private:
const locator::topology* _topology;
locator::host_id _host_id;
inet_address _endpoint;
endpoint_dc_rack _dc_rack;
state _state;
shard_id _shard_count = 0;
@@ -72,7 +71,6 @@ private:
public:
node(const locator::topology* topology,
locator::host_id id,
inet_address endpoint,
endpoint_dc_rack dc_rack,
state state,
shard_id shard_count = 0,
@@ -94,10 +92,6 @@ public:
return _host_id;
}
const inet_address& endpoint() const noexcept {
return _endpoint;
}
const endpoint_dc_rack& dc_rack() const noexcept {
return _dc_rack;
}
@@ -164,7 +158,6 @@ public:
private:
static node_holder make(const locator::topology* topology,
locator::host_id id,
inet_address endpoint,
endpoint_dc_rack dc_rack,
state state,
shard_id shard_count = 0,
@@ -211,7 +204,7 @@ public:
}
// Adds a node with given host_id, endpoint, and DC/rack.
const node& add_node(host_id id, const inet_address& ep, const endpoint_dc_rack& dr, node::state state,
const node& add_node(host_id id, const endpoint_dc_rack& dr, node::state state,
shard_id shard_count = 0);
// Optionally updates node's current host_id, endpoint, or DC/rack.
@@ -219,7 +212,6 @@ public:
// or a peer node host_id may be updated when the node is replaced with another node using the same ip address.
void update_node(node& node,
std::optional<host_id> opt_id,
std::optional<inet_address> opt_ep,
std::optional<endpoint_dc_rack> opt_dr,
std::optional<node::state> opt_st,
std::optional<shard_id> opt_shard_count = std::nullopt);
@@ -241,37 +233,26 @@ public:
return *n;
};
// Looks up a node by its inet_address.
// Returns a pointer to the node if found, or nullptr otherwise.
const node* find_node(const inet_address& ep) const noexcept;
// Finds a node by its index
// Returns a pointer to the node if found, or nullptr otherwise.
const node* find_node(node::idx_type idx) const noexcept;
// Returns true if a node with given host_id is found
bool has_node(host_id id) const noexcept;
bool has_node(inet_address id) const noexcept;
/**
* Stores current DC/rack assignment for ep
*
* Adds or updates a node with given endpoint
*/
const node& add_or_update_endpoint(host_id id, std::optional<inet_address> opt_ep,
std::optional<endpoint_dc_rack> opt_dr = std::nullopt,
const node& add_or_update_endpoint(host_id id, std::optional<endpoint_dc_rack> opt_dr = std::nullopt,
std::optional<node::state> opt_st = std::nullopt,
std::optional<shard_id> shard_count = std::nullopt);
bool remove_endpoint(locator::host_id ep);
/**
* Returns true iff contains given endpoint.
*/
bool has_endpoint(inet_address) const;
const std::unordered_map<sstring,
std::unordered_set<inet_address>>&
std::unordered_set<host_id>>&
get_datacenter_endpoints() const {
return _dc_endpoints;
}
@@ -292,7 +273,7 @@ public:
const std::unordered_map<sstring,
std::unordered_map<sstring,
std::unordered_set<inet_address>>>&
std::unordered_set<host_id>>>&
get_datacenter_racks() const {
return _dc_racks;
}
@@ -310,9 +291,6 @@ public:
const endpoint_dc_rack& get_location(host_id id) const {
return find_node(id)->dc_rack();
}
// Get dc/rack location of a node identified by endpoint
// The specified node must exist.
const endpoint_dc_rack& get_location(const inet_address& ep) const;
// Get datacenter of this node
const sstring& get_datacenter() const noexcept {
@@ -323,11 +301,6 @@ public:
const sstring& get_datacenter(host_id id) const {
return get_location(id).dc;
}
// Get datacenter of a node identified by endpoint
// The specified node must exist.
const sstring& get_datacenter(inet_address ep) const {
return get_location(ep).dc;
}
// Get rack of this node
const sstring& get_rack() const noexcept {
@@ -338,11 +311,6 @@ public:
const sstring& get_rack(host_id id) const {
return get_location(id).rack;
}
// Get rack of a node identified by endpoint
// The specified node must exist.
const sstring& get_rack(inet_address ep) const {
return get_location(ep).rack;
}
auto get_local_dc_filter() const noexcept {
return [ this, local_dc = get_datacenter() ] (auto ep) {
@@ -406,10 +374,6 @@ public:
return id == my_host_id();
}
bool is_me(const inet_address& addr) const noexcept {
return addr == my_address();
}
private:
using random_engine_type = std::mt19937_64;
@@ -434,20 +398,19 @@ private:
const node* _this_node = nullptr;
std::vector<node_holder> _nodes;
std::unordered_map<host_id, std::reference_wrapper<const node>> _nodes_by_host_id;
std::unordered_map<inet_address, std::reference_wrapper<const node>> _nodes_by_endpoint;
std::unordered_map<sstring, std::unordered_set<std::reference_wrapper<const node>>> _dc_nodes;
std::unordered_map<sstring, std::unordered_map<sstring, std::unordered_set<std::reference_wrapper<const node>>>> _dc_rack_nodes;
/** multi-map: DC -> endpoints in that DC */
std::unordered_map<sstring,
std::unordered_set<inet_address>>
std::unordered_set<host_id>>
_dc_endpoints;
/** map: DC -> (multi-map: rack -> endpoints in that rack) */
std::unordered_map<sstring,
std::unordered_map<sstring,
std::unordered_set<inet_address>>>
std::unordered_set<host_id>>>
_dc_racks;
bool _sort_by_proximity = true;
@@ -457,10 +420,6 @@ private:
void calculate_datacenters();
const std::unordered_map<inet_address, std::reference_wrapper<const node>>& get_nodes_by_endpoint() const noexcept {
return _nodes_by_endpoint;
};
mutable random_engine_type _random_engine;
friend class token_metadata_impl;
@@ -514,12 +473,11 @@ struct fmt::formatter<locator::node> : fmt::formatter<string_view> {
template <typename FormatContext>
auto format(const locator::node& node, FormatContext& ctx) const {
if (!verbose) {
return fmt::format_to(ctx.out(), "{}/{}", node.host_id(), node.endpoint());
return fmt::format_to(ctx.out(), "{}", node.host_id());
} else {
return fmt::format_to(ctx.out(), " idx={} host_id={} endpoint={} dc={} rack={} state={} shards={} this_node={}",
return fmt::format_to(ctx.out(), " idx={} host_id={} dc={} rack={} state={} shards={} this_node={}",
node.idx(),
node.host_id(),
node.endpoint(),
node.dc_rack().dc,
node.dc_rack().rack,
locator::node::to_string(node.get_state()),

View File

@@ -12,14 +12,14 @@
namespace locator {
static future<std::unordered_map<dht::token_range, inet_address_vector_replica_set>>
static future<std::unordered_map<dht::token_range, host_id_vector_replica_set>>
construct_range_to_endpoint_map(
locator::effective_replication_map_ptr erm,
const dht::token_range_vector& ranges) {
std::unordered_map<dht::token_range, inet_address_vector_replica_set> res;
std::unordered_map<dht::token_range, host_id_vector_replica_set> res;
res.reserve(ranges.size());
for (auto r : ranges) {
res[r] = erm->get_natural_endpoints(
res[r] = erm->get_natural_replicas(
r.end() ? r.end()->value() : dht::maximum_token());
co_await coroutine::maybe_yield();
}
@@ -47,7 +47,7 @@ get_all_ranges(const std::vector<token>& sorted_tokens) {
}
// Caller is responsible to hold token_metadata valid until the returned future is resolved
static future<std::unordered_map<dht::token_range, inet_address_vector_replica_set>>
future<std::unordered_map<dht::token_range, host_id_vector_replica_set>>
get_range_to_address_map(locator::effective_replication_map_ptr erm,
const std::vector<token>& sorted_tokens) {
co_return co_await construct_range_to_endpoint_map(erm, co_await get_all_ranges(sorted_tokens));
@@ -67,12 +67,12 @@ get_tokens_in_local_dc(const locator::token_metadata& tm) {
co_return filtered_tokens;
}
static future<std::unordered_map<dht::token_range, inet_address_vector_replica_set>>
static future<std::unordered_map<dht::token_range, host_id_vector_replica_set>>
get_range_to_address_map_in_local_dc(
locator::effective_replication_map_ptr erm) {
auto tmptr = erm->get_token_metadata_ptr();
auto orig_map = co_await get_range_to_address_map(erm, co_await get_tokens_in_local_dc(*tmptr));
std::unordered_map<dht::token_range, inet_address_vector_replica_set> filtered_map;
std::unordered_map<dht::token_range, host_id_vector_replica_set> filtered_map;
filtered_map.reserve(orig_map.size());
auto local_dc_filter = tmptr->get_topology().get_local_dc_filter();
for (auto entry : orig_map) {
@@ -90,7 +90,7 @@ get_range_to_address_map_in_local_dc(
// return get_range_to_address_map(db.find_keyspace(keyspace).get_vnode_effective_replication_map());
// }
static future<std::unordered_map<dht::token_range, inet_address_vector_replica_set>>
future<std::unordered_map<dht::token_range, host_id_vector_replica_set>>
get_range_to_address_map(locator::effective_replication_map_ptr erm) {
return get_range_to_address_map(erm, erm->get_token_metadata_ptr()->sorted_tokens());
}
@@ -100,7 +100,7 @@ describe_ring(const replica::database& db, const gms::gossiper& gossiper, const
std::vector<dht::token_range_endpoints> ranges;
auto erm = db.find_keyspace(keyspace).get_vnode_effective_replication_map();
std::unordered_map<dht::token_range, inet_address_vector_replica_set> range_to_address_map = co_await (
std::unordered_map<dht::token_range, host_id_vector_replica_set> range_to_address_map = co_await (
include_only_local_dc
? get_range_to_address_map_in_local_dc(erm)
: get_range_to_address_map(erm)
@@ -119,10 +119,10 @@ describe_ring(const replica::database& db, const gms::gossiper& gossiper, const
}
for (auto endpoint : addresses) {
dht::endpoint_details details;
details._host = endpoint;
details._host = gossiper.get_address_map().get(endpoint);
details._datacenter = topology.get_datacenter(endpoint);
details._rack = topology.get_rack(endpoint);
tr._rpc_endpoints.push_back(gossiper.get_rpc_address(endpoint));
tr._rpc_endpoints.push_back(gossiper.get_rpc_address(details._host));
tr._endpoints.push_back(fmt::to_string(details._host));
tr._endpoint_details.push_back(details);
}

View File

@@ -9,6 +9,9 @@
#pragma once
#include "dht/token_range_endpoints.hh"
#include "dht/i_partitioner_fwd.hh"
#include "inet_address_vectors.hh"
#include "locator/abstract_replication_strategy.hh"
namespace replica {
class database;
@@ -20,4 +23,6 @@ namespace gms {
namespace locator {
future<std::vector<dht::token_range_endpoints>> describe_ring(const replica::database& db, const gms::gossiper& gossiper, const sstring& keyspace, bool include_only_local_dc = false);
future<std::unordered_map<dht::token_range, host_id_vector_replica_set>> get_range_to_address_map(
locator::effective_replication_map_ptr erm, const std::vector<token>& sorted_tokens);
}

22
main.cc
View File

@@ -1103,7 +1103,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
// token_metadata.stop().get();
//});
api::set_server_token_metadata(ctx, token_metadata).get();
api::set_server_token_metadata(ctx, token_metadata, gossiper).get();
auto stop_tokens_api = defer_verbose_shutdown("token metadata API", [&ctx] {
api::unset_server_token_metadata(ctx).get();
});
@@ -1475,12 +1475,11 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
const auto listen_address = utils::resolve(cfg->listen_address, family).get();
const auto host_id = initialize_local_info_thread(sys_ks, snitch, listen_address, *cfg, broadcast_addr, broadcast_rpc_addr);
shared_token_metadata::mutate_on_all_shards(token_metadata, [host_id, endpoint = broadcast_addr] (locator::token_metadata& tm) {
shared_token_metadata::mutate_on_all_shards(token_metadata, [host_id] (locator::token_metadata& tm) {
// Makes local host id available in topology cfg as soon as possible.
// Raft topology discard the endpoint-to-id map, so the local id can
// still be found in the config.
tm.get_topology().set_host_id_cfg(host_id);
tm.get_topology().add_or_update_endpoint(host_id, endpoint);
return make_ready_future<>();
}).get();
@@ -2128,7 +2127,18 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
group0_service.setup_group0_if_exist(sys_ks.local(), ss.local(), qp.local(), mm.local()).get();
with_scheduling_group(maintenance_scheduling_group, [&] {
return messaging.invoke_on_all(&netw::messaging_service::start_listen, std::ref(token_metadata));
return messaging.invoke_on_all([&] (auto& ms) {
return ms.start_listen(token_metadata.local(), [&gossiper] (gms::inet_address ip) {
if (ip == gossiper.local().get_broadcast_address()) {
return gossiper.local().my_host_id();
}
try {
return gossiper.local().get_host_id(ip);
} catch (...) {
return locator::host_id{};
}
});
});
}).get();
const auto generation_number = gms::generation_type(sys_ks.local().increment_and_get_generation().get());
@@ -2272,7 +2282,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
supervisor::notify("allow replaying hints");
proxy.invoke_on_all(&service::storage_proxy::allow_replaying_hints).get();
api::set_hinted_handoff(ctx, proxy).get();
api::set_hinted_handoff(ctx, proxy, gossiper).get();
auto stop_hinted_handoff_api = defer_verbose_shutdown("hinted handoff API", [&ctx] {
api::unset_hinted_handoff(ctx).get();
});
@@ -2291,7 +2301,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
view_builder.invoke_on_all(&db::view::view_builder::drain).get();
});
api::set_server_view_builder(ctx, view_builder).get();
api::set_server_view_builder(ctx, view_builder, gossiper).get();
auto stop_vb_api = defer_verbose_shutdown("view builder API", [&ctx] {
api::unset_server_view_builder(ctx).get();
});

View File

@@ -257,16 +257,6 @@ const uint64_t* messaging_service::get_dropped_messages() const {
return _dropped_messages;
}
int32_t messaging_service::get_raw_version(const gms::inet_address& endpoint) const {
// FIXME: messaging service versioning
return current_version;
}
bool messaging_service::knows_version(const gms::inet_address& endpoint) const {
// FIXME: messaging service versioning
return true;
}
future<> messaging_service::unregister_handler(messaging_verb verb) {
return _rpc->unregister_handler(verb);
}
@@ -309,8 +299,9 @@ future<> messaging_service::start() {
return make_ready_future<>();
}
future<> messaging_service::start_listen(locator::shared_token_metadata& stm) {
future<> messaging_service::start_listen(locator::shared_token_metadata& stm, std::function<locator::host_id(gms::inet_address)> address_to_host_id_mapper) {
_token_metadata = &stm;
_address_to_host_id_mapper = std::move(address_to_host_id_mapper);
do_start_listen();
return make_ready_future<>();
}
@@ -318,20 +309,21 @@ future<> messaging_service::start_listen(locator::shared_token_metadata& stm) {
bool messaging_service::topology_known_for(inet_address addr) const {
// The token metadata pointer is nullptr before
// the service is start_listen()-ed and after it's being shutdown()-ed.
const locator::node* node;
return _token_metadata
&& _token_metadata->get()->get_topology().has_endpoint(addr);
&& (node = _token_metadata->get()->get_topology().find_node(_address_to_host_id_mapper(addr))) && !node->is_none();
}
// Precondition: `topology_known_for(addr)`.
bool messaging_service::is_same_dc(inet_address addr) const {
const auto& topo = _token_metadata->get()->get_topology();
return topo.get_datacenter(addr) == topo.get_datacenter();
return topo.get_datacenter(_address_to_host_id_mapper(addr)) == topo.get_datacenter();
}
// Precondition: `topology_known_for(addr)`.
bool messaging_service::is_same_rack(inet_address addr) const {
const auto& topo = _token_metadata->get()->get_topology();
return topo.get_rack(addr) == topo.get_rack();
return topo.get_rack(_address_to_host_id_mapper(addr)) == topo.get_rack();
}
// The socket metrics domain defines the way RPC metrics are grouped
@@ -344,12 +336,15 @@ bool messaging_service::is_same_rack(inet_address addr) const {
// that the isolation cookie suits very well, because these cookies
// are different for different indices and are more informative than
// plain numbers
sstring messaging_service::client_metrics_domain(unsigned idx, inet_address addr) const {
sstring messaging_service::client_metrics_domain(unsigned idx, inet_address addr, std::optional<locator::host_id> id) const {
sstring ret = _scheduling_info_for_connection_index[idx].isolation_cookie;
if (!id) {
id = _address_to_host_id_mapper(addr);
}
if (_token_metadata) {
const auto& topo = _token_metadata->get()->get_topology();
if (topo.has_endpoint(addr)) {
ret += ":" + topo.get_datacenter(addr);
if (topo.has_node(*id)) {
ret += ":" + topo.get_datacenter(*id);
}
}
return ret;
@@ -1119,7 +1114,7 @@ shared_ptr<messaging_service::rpc_protocol_client_wrapper> messaging_service::ge
opts.tcp_nodelay = must_tcp_nodelay;
opts.reuseaddr = true;
opts.isolation_cookie = _scheduling_info_for_connection_index[idx].isolation_cookie;
opts.metrics_domain = client_metrics_domain(idx, id.addr); // not just `addr` as the latter may be internal IP
opts.metrics_domain = client_metrics_domain(idx, id.addr, host_id); // not just `addr` as the latter may be internal IP
SCYLLA_ASSERT(!must_encrypt || _credentials);

View File

@@ -272,10 +272,6 @@ public:
const uint64_t* get_dropped_messages() const;
int32_t get_raw_version(const gms::inet_address& endpoint) const;
bool knows_version(const gms::inet_address& endpoint) const;
enum class encrypt_what {
none,
rack,
@@ -341,6 +337,8 @@ private:
private:
config _cfg;
locator::shared_token_metadata* _token_metadata = nullptr;
// a function that maps from ip to host id if known (returns default constructable host_id if there is no mapping)
std::function<locator::host_id(gms::inet_address)> _address_to_host_id_mapper;
// map: Node broadcast address -> Node internal IP, and the reversed mapping, for communication within the same data center
std::unordered_map<gms::inet_address, gms::inet_address> _preferred_ip_cache, _preferred_to_endpoint;
std::unique_ptr<rpc_protocol_wrapper> _rpc;
@@ -382,7 +380,7 @@ public:
~messaging_service();
future<> start();
future<> start_listen(locator::shared_token_metadata& stm);
future<> start_listen(locator::shared_token_metadata& stm, std::function<locator::host_id(gms::inet_address)> address_to_host_id_mapper);
uint16_t port() const noexcept {
return _cfg.port;
}
@@ -459,7 +457,7 @@ private:
bool is_host_banned(locator::host_id);
sstring client_metrics_domain(unsigned idx, inet_address addr) const;
sstring client_metrics_domain(unsigned idx, inet_address addr, std::optional<locator::host_id> id) const;
public:
// Return rpc::protocol::client for a shard which is a ip + cpuid pair.

View File

@@ -143,7 +143,7 @@ future<> node_ops_ctl::abort_on_error(node_ops_cmd cmd, std::exception_ptr ex) n
future<> node_ops_ctl::send_to_all(node_ops_cmd cmd) {
req.cmd = cmd;
req.ignore_nodes = ignore_nodes |
std::views::transform([&] (locator::host_id id) { return tmptr->get_endpoint_for_host_id(id); }) |
std::views::transform([&] (locator::host_id id) { return ss.gossiper().get_address_map().get(id); }) |
std::ranges::to<std::list>();
sstring op_desc = ::format("{}", cmd);
nlogger.info("{}[{}]: Started {}", desc, uuid(), req);

View File

@@ -226,6 +226,7 @@ repair_neighbors::repair_neighbors(std::vector<locator::host_id> nodes, std::vec
// Return all of the neighbors with whom we share the provided range.
static std::vector<locator::host_id> get_neighbors(
const gms::gossiper& gossiper,
const locator::effective_replication_map& erm,
const sstring& ksname, query::range<dht::token> range,
const std::vector<sstring>& data_centers,
@@ -282,17 +283,19 @@ static std::vector<locator::host_id> get_neighbors(
} catch(...) {
throw std::runtime_error(format("Unknown host specified: {}", host));
}
auto endpoint = erm.get_token_metadata().get_host_id_if_known(ip);
if (endpoint) {
try {
auto endpoint = gossiper.get_host_id(ip);
if (endpoint == my_address) {
found_me = true;
} else if (neighbor_set.contains(*endpoint)) {
ret.push_back(*endpoint);
} else if (neighbor_set.contains(endpoint)) {
ret.push_back(endpoint);
// If same host is listed twice, don't add it again later
neighbor_set.erase(*endpoint);
} else {
rlogger.warn("Provided host ip {} has no corresponding host id", ip);
neighbor_set.erase(endpoint);
}
} catch (...) {
rlogger.warn("Provided host ip {} has no corresponding host id", ip);
}
// Nodes which aren't neighbors for this range are ignored.
// This allows the user to give a list of "good" nodes, where
@@ -329,6 +332,7 @@ static std::vector<locator::host_id> get_neighbors(
}
static future<std::list<locator::host_id>> get_hosts_participating_in_repair(
const gms::gossiper& gossiper,
const locator::effective_replication_map& erm,
const sstring& ksname,
const dht::token_range_vector& ranges,
@@ -344,7 +348,7 @@ static future<std::list<locator::host_id>> get_hosts_participating_in_repair(
participating_hosts.insert(my_address);
co_await do_for_each(ranges, [&] (const dht::token_range& range) {
const auto nbs = get_neighbors(erm, ksname, range, data_centers, hosts, ignore_nodes);
const auto nbs = get_neighbors(gossiper, erm, ksname, range, data_centers, hosts, ignore_nodes);
for (const auto& nb : nbs) {
participating_hosts.insert(nb);
}
@@ -676,7 +680,7 @@ void repair::shard_repair_task_impl::check_in_abort_or_shutdown() {
repair_neighbors repair::shard_repair_task_impl::get_repair_neighbors(const dht::token_range& range) {
return neighbors.empty() ?
repair_neighbors(get_neighbors(*erm, _status.keyspace, range, data_centers, hosts, ignore_nodes, _small_table_optimization)) :
repair_neighbors(get_neighbors(gossiper, *erm, _status.keyspace, range, data_centers, hosts, ignore_nodes, _small_table_optimization)) :
neighbors[range];
}
@@ -1306,7 +1310,7 @@ future<int> repair_service::do_repair_start(gms::gossip_address_map& addr_map, s
}
auto ranges_parallelism = options.ranges_parallelism == -1 ? std::nullopt : std::optional<int>(options.ranges_parallelism);
auto task = co_await _repair_module->make_and_start_task<repair::user_requested_repair_task_impl>({}, id, std::move(keyspace), "", germs, std::move(cfs), std::move(ranges), std::move(options.hosts), std::move(options.data_centers), std::move(ignore_nodes), small_table_optimization, ranges_parallelism);
auto task = co_await _repair_module->make_and_start_task<repair::user_requested_repair_task_impl>({}, id, std::move(keyspace), "", germs, std::move(cfs), std::move(ranges), std::move(options.hosts), std::move(options.data_centers), std::move(ignore_nodes), small_table_optimization, ranges_parallelism, _gossiper.local());
co_return id.id;
}
@@ -1331,7 +1335,7 @@ future<> repair::user_requested_repair_task_impl::run() {
auto normal_nodes = germs->get().get_token_metadata().get_normal_token_owners();
participants = std::list<locator::host_id>(normal_nodes.begin(), normal_nodes.end());
} else {
participants = get_hosts_participating_in_repair(germs->get(), keyspace, ranges, data_centers, hosts, ignore_nodes).get();
participants = get_hosts_participating_in_repair(_gossiper, germs->get(), keyspace, ranges, data_centers, hosts, ignore_nodes).get();
}
auto [hints_batchlog_flushed, flush_time] = rs.flush_hints(id, keyspace, cfs, ignore_nodes).get();
@@ -2146,11 +2150,11 @@ future<> repair_service::do_rebuild_replace_with_repair(std::unordered_map<sstri
return sync_nodes.contains(node);
}) | std::views::transform([&topology] (const auto& node) {
const auto& n = topology.get_node(node);
return std::make_pair(n.host_id(), n.endpoint());
}) | std::ranges::to<std::unordered_map>();
return n.host_id();
}) | std::ranges::to<std::vector>();
rlogger.debug("{}: keyspace={}, range={}, natural_enpoints={}, neighbors={}", op, keyspace_name, r, natural_eps, neighbors);
if (!neighbors.empty()) {
range_sources[r] = repair_neighbors(neighbors);
range_sources[r] = repair_neighbors(std::move(neighbors));
++it;
} else {
// Skip the range with zero neighbors

View File

@@ -143,9 +143,6 @@ public:
explicit repair_neighbors(std::vector<locator::host_id> a)
: all(std::move(a)) {
}
explicit repair_neighbors(const std::unordered_map<locator::host_id, gms::inet_address>& a)
: all(a | std::views::keys | std::ranges::to<std::vector<locator::host_id>>()) {
}
repair_neighbors(std::vector<locator::host_id> a, std::vector<locator::host_id> m)
: all(std::move(a))
, mandatory(std::move(m)) {

View File

@@ -2317,7 +2317,7 @@ future<repair_flush_hints_batchlog_response> repair_service::repair_flush_hints_
auto flush_time = now;
if (cache_disabled || (now - _flush_hints_batchlog_time > cache_time)) {
// Empty targets meants all nodes
db::hints::sync_point sync_point = co_await _sp.local().create_hint_sync_point(std::vector<gms::inet_address>{});
db::hints::sync_point sync_point = co_await _sp.local().create_hint_sync_point(std::vector<locator::host_id>{});
lowres_clock::time_point deadline = lowres_clock::now() + req.hints_timeout;
try {
bool bm_throw = utils::get_local_injector().enter("repair_flush_hints_batchlog_handler_bm_uninitialized");

View File

@@ -49,8 +49,9 @@ private:
std::unordered_set<locator::host_id> _ignore_nodes;
bool _small_table_optimization;
std::optional<int> _ranges_parallelism;
gms::gossiper& _gossiper;
public:
user_requested_repair_task_impl(tasks::task_manager::module_ptr module, repair_uniq_id id, std::string keyspace, std::string entity, lw_shared_ptr<locator::global_vnode_effective_replication_map> germs, std::vector<sstring> cfs, dht::token_range_vector ranges, std::vector<sstring> hosts, std::vector<sstring> data_centers, std::unordered_set<locator::host_id> ignore_nodes, bool small_table_optimization, std::optional<int> ranges_parallelism) noexcept
user_requested_repair_task_impl(tasks::task_manager::module_ptr module, repair_uniq_id id, std::string keyspace, std::string entity, lw_shared_ptr<locator::global_vnode_effective_replication_map> germs, std::vector<sstring> cfs, dht::token_range_vector ranges, std::vector<sstring> hosts, std::vector<sstring> data_centers, std::unordered_set<locator::host_id> ignore_nodes, bool small_table_optimization, std::optional<int> ranges_parallelism, gms::gossiper& gossiper) noexcept
: repair_task_impl(module, id.uuid(), id.id, "keyspace", std::move(keyspace), "", std::move(entity), tasks::task_id::create_null_id(), streaming::stream_reason::repair)
, _germs(germs)
, _cfs(std::move(cfs))
@@ -60,6 +61,7 @@ public:
, _ignore_nodes(std::move(ignore_nodes))
, _small_table_optimization(small_table_optimization)
, _ranges_parallelism(ranges_parallelism)
, _gossiper(gossiper)
{}
virtual tasks::is_abortable is_abortable() const noexcept override {

View File

@@ -298,6 +298,15 @@ public:
return entry._addr;
}
// Same as find() above but expects mapping to exist
gms::inet_address get(locator::host_id id) const {
try {
return find(id).value();
} catch (std::bad_optional_access& err) {
on_internal_error(rslog, fmt::format("No ip address for {} when one is expected", id));
}
}
// Find an id with a given mapping.
//
// If a mapping is expiring, the last access timestamp is updated automatically.

View File

@@ -932,15 +932,11 @@ future<> migration_manager::announce_without_raft(std::vector<mutation> schema,
try {
using namespace std::placeholders;
auto all_live = _gossiper.get_live_members();
auto live_members = all_live | std::views::filter([this, my_address = _messaging.broadcast_address()] (const gms::inet_address& endpoint) {
auto live_members = all_live | std::views::filter([my_address = _gossiper.my_host_id()] (const locator::host_id& endpoint) {
// only push schema to nodes with known and equal versions
return endpoint != my_address &&
_messaging.knows_version(endpoint) &&
_messaging.get_raw_version(endpoint) == netw::messaging_service::current_version;
return endpoint != my_address;
});
// FIXME: gossiper should return host id set
auto live_host_ids = live_members | std::views::transform([&] (const gms::inet_address& ip) { return _gossiper.get_host_id(ip); });
co_await coroutine::parallel_for_each(live_host_ids,
co_await coroutine::parallel_for_each(live_members,
std::bind(std::mem_fn(&migration_manager::push_schema_mutation), this, std::placeholders::_1, schema));
} catch (...) {
mlogger.error("failed to announce migration to all nodes: {}", std::current_exception());

View File

@@ -894,7 +894,7 @@ future<> service_level_controller::do_remove_service_level(sstring name, bool re
void service_level_controller::on_join_cluster(const gms::inet_address& endpoint) { }
void service_level_controller::on_leave_cluster(const gms::inet_address& endpoint, const locator::host_id& hid) {
if (this_shard_id() == global_controller && _token_metadata.get()->get_topology().is_me(endpoint)) {
if (this_shard_id() == global_controller && _token_metadata.get()->get_topology().is_me(hid)) {
_global_controller_db->dist_data_update_aborter.request_abort();
_global_controller_db->group0_aborter.request_abort();
}

View File

@@ -105,13 +105,13 @@ namespace {
template<size_t N>
utils::small_vector<locator::host_id, N> addr_vector_to_id(const locator::topology& topo, const utils::small_vector<gms::inet_address, N>& set) {
utils::small_vector<locator::host_id, N> addr_vector_to_id(const gms::gossiper& g, const utils::small_vector<gms::inet_address, N>& set) {
return set | std::views::transform([&] (gms::inet_address ip) {
auto* node = topo.find_node(ip);
if (!node) {
try {
return g.get_host_id(ip);
} catch (...) {
on_internal_error(slogger, fmt::format("addr_vector_to_id cannot map {} to host id", ip));
}
return node->host_id();
}) | std::ranges::to<utils::small_vector<locator::host_id, N>>();
}
@@ -186,10 +186,6 @@ locator::host_id storage_proxy::my_host_id(const locator::effective_replication_
return erm.get_topology().my_host_id();
}
bool storage_proxy::is_me(gms::inet_address addr) const noexcept {
return local_db().get_token_metadata().get_topology().is_me(addr);
}
bool storage_proxy::is_me(const locator::effective_replication_map& erm, locator::host_id id) const noexcept {
return erm.get_topology().is_me(id);
}
@@ -475,7 +471,9 @@ public:
}
future<> send_truncate_blocking(sstring keyspace, sstring cfname, std::chrono::milliseconds timeout_in_ms) {
if (!_gossiper.get_unreachable_token_owners().empty()) {
auto s = _sp.local_db().find_schema(keyspace, cfname);
auto erm_ptr = s->table().get_effective_replication_map();
if (!std::ranges::all_of(erm_ptr->get_token_metadata().get_normal_token_owners(), std::bind_front(&storage_proxy::is_alive, &_sp, std::cref(*erm_ptr)))) {
slogger.info("Cannot perform truncate, some hosts are down");
// Since the truncate operation is so aggressive and is typically only
// invoked by an admin, for simplicity we require that all nodes are up
@@ -589,7 +587,7 @@ private:
}
auto reply_to_host_id = reply_to_id ? *reply_to_id : _gossiper.get_host_id(reply_to);
auto forward_host_id = forward_id ? std::move(*forward_id) : addr_vector_to_id(_sp._shared_token_metadata.get()->get_topology(), forward);
auto forward_host_id = forward_id ? std::move(*forward_id) : addr_vector_to_id(_gossiper, forward);
if (reply_to_id) {
_gossiper.get_mutable_address_map().opt_add_entry(reply_to_host_id, reply_to);
@@ -1058,7 +1056,9 @@ private:
void connection_dropped(gms::inet_address addr, std::optional<locator::host_id> id) {
slogger.debug("Drop hit rate info for {} because of disconnect", addr);
if (!id) {
id = _sp.get_token_metadata_ptr()->get_host_id_if_known(addr);
try {
id = _gossiper.get_host_id(addr);
} catch (...) {}
}
if (!id) {
return;
@@ -1246,8 +1246,7 @@ public:
tracing::trace_state_ptr tr_state) override {
auto m = _mutations[hid];
if (m) {
const auto ep = ermptr->get_token_metadata().get_endpoint_for_host_id(hid);
return hm.store_hint(hid, ep, _schema, std::move(m), tr_state);
return hm.store_hint(hid, _schema, std::move(m), tr_state);
} else {
return false;
}
@@ -1305,8 +1304,7 @@ public:
}
virtual bool store_hint(db::hints::manager& hm, locator::host_id hid, locator::effective_replication_map_ptr ermptr,
tracing::trace_state_ptr tr_state) override {
const auto ep = ermptr->get_token_metadata().get_endpoint_for_host_id(hid);
return hm.store_hint(hid, ep, _schema, _mutation, tr_state);
return hm.store_hint(hid, _schema, _mutation, tr_state);
}
virtual future<> apply_locally(storage_proxy& sp, storage_proxy::clock_type::time_point timeout,
tracing::trace_state_ptr tr_state, db::per_partition_rate_limit::info rate_limit_info,
@@ -3390,7 +3388,7 @@ storage_proxy::create_write_response_handler_helper(schema_ptr s, const dht::tok
live_endpoints.reserve(all.size());
dead_endpoints.reserve(all.size());
std::partition_copy(all.begin(), all.end(), std::back_inserter(live_endpoints),
std::back_inserter(dead_endpoints), std::bind_front(&storage_proxy::is_alive_id, this, std::cref(*erm)));
std::back_inserter(dead_endpoints), std::bind_front(&storage_proxy::is_alive, this, std::cref(*erm)));
db::per_partition_rate_limit::info rate_limit_info;
if (allow_limit && _db.local().can_apply_per_partition_rate_limit(*s, db::operation_type::write)) {
@@ -3767,7 +3765,7 @@ storage_proxy::get_paxos_participants(const sstring& ks_name, const locator::eff
auto all_as_spans = std::array{std::span(natural_endpoints), std::span(pending_endpoints)};
std::ranges::copy(all_as_spans | std::views::join |
std::views::filter(std::bind_front(&storage_proxy::is_alive_id, this, std::cref(erm))), std::back_inserter(live_endpoints));
std::views::filter(std::bind_front(&storage_proxy::is_alive, this, std::cref(erm))), std::back_inserter(live_endpoints));
if (live_endpoints.size() < required_participants) {
throw exceptions::unavailable_exception(cl_for_paxos, required_participants, live_endpoints.size());
@@ -4017,7 +4015,7 @@ storage_proxy::mutate_atomically_result(std::vector<mutation> mutations, db::con
std::ranges::to<std::unordered_set<locator::host_id>>());
}
auto local_rack = topology.get_rack();
auto chosen_endpoints = endpoint_filter(std::bind_front(&storage_proxy::is_alive_id, &_p, std::cref(*_ermp)), local_addr,
auto chosen_endpoints = endpoint_filter(std::bind_front(&storage_proxy::is_alive, &_p, std::cref(*_ermp)), local_addr,
local_rack, local_token_owners);
if (chosen_endpoints.empty()) {
@@ -4171,7 +4169,7 @@ future<> storage_proxy::send_to_endpoint(
std::array{std::span(pending_endpoints), std::span(target.begin(), target.end())} | std::views::join,
std::inserter(targets, targets.begin()),
std::back_inserter(dead_endpoints),
std::bind_front(&storage_proxy::is_alive_id, this, std::cref(*erm)));
std::bind_front(&storage_proxy::is_alive, this, std::cref(*erm)));
slogger.trace("Creating write handler with live: {}; dead: {}", targets, dead_endpoints);
db::assure_sufficient_live_nodes(cl, *erm, targets, pending_endpoints);
return create_write_response_handler(
@@ -6598,7 +6596,7 @@ future<bool> storage_proxy::cas(schema_ptr schema, shared_ptr<cas_request> reque
host_id_vector_replica_set storage_proxy::get_live_endpoints(const locator::effective_replication_map& erm, const dht::token& token) const {
host_id_vector_replica_set eps = erm.get_natural_replicas(token);
auto itend = std::ranges::remove_if(eps, std::not_fn(std::bind_front(&storage_proxy::is_alive_id, this, std::cref(erm)))).begin();
auto itend = std::ranges::remove_if(eps, std::not_fn(std::bind_front(&storage_proxy::is_alive, this, std::cref(erm)))).begin();
eps.erase(itend, eps.end());
return eps;
}
@@ -6623,7 +6621,7 @@ void storage_proxy::sort_endpoints_by_proximity(const locator::effective_replica
host_id_vector_replica_set storage_proxy::get_endpoints_for_reading(const sstring& ks_name, const locator::effective_replication_map& erm, const dht::token& token) const {
auto endpoints = erm.get_replicas_for_reading(token);
validate_read_replicas(erm, endpoints);
auto it = std::ranges::remove_if(endpoints, std::not_fn(std::bind_front(&storage_proxy::is_alive_id, this, std::cref(erm)))).begin();
auto it = std::ranges::remove_if(endpoints, std::not_fn(std::bind_front(&storage_proxy::is_alive, this, std::cref(erm)))).begin();
endpoints.erase(it, endpoints.end());
sort_endpoints_by_proximity(erm, endpoints);
return endpoints;
@@ -6660,11 +6658,7 @@ storage_proxy::filter_replicas_for_read(
return filter_replicas_for_read(cl, erm, live_endpoints, preferred_endpoints, db::read_repair_decision::NONE, nullptr, cf);
}
bool storage_proxy::is_alive(const gms::inet_address& ep) const {
return _remote ? _remote->is_alive(ep) : is_me(ep);
}
bool storage_proxy::is_alive_id(const locator::effective_replication_map& erm, const locator::host_id& ep) const {
bool storage_proxy::is_alive(const locator::effective_replication_map& erm, const locator::host_id& ep) const {
return is_me(erm, ep) || (_remote ? _remote->is_alive(ep) : false);
}
@@ -6795,7 +6789,7 @@ const db::hints::host_filter& storage_proxy::get_hints_host_filter() const {
return _hints_manager.get_host_filter();
}
future<db::hints::sync_point> storage_proxy::create_hint_sync_point(std::vector<gms::inet_address> target_hosts) const {
future<db::hints::sync_point> storage_proxy::create_hint_sync_point(std::vector<locator::host_id> target_hosts) const {
db::hints::sync_point spoint;
spoint.regular_per_shard_rps.resize(smp::count);
spoint.mv_per_shard_rps.resize(smp::count);

View File

@@ -362,8 +362,7 @@ private:
host_id_vector_replica_set filter_replicas_for_read(db::consistency_level, const locator::effective_replication_map&, host_id_vector_replica_set live_endpoints, const host_id_vector_replica_set& preferred_endpoints, db::read_repair_decision, std::optional<locator::host_id>* extra, replica::column_family*) const;
// As above with read_repair_decision=NONE, extra=nullptr.
host_id_vector_replica_set filter_replicas_for_read(db::consistency_level, const locator::effective_replication_map&, const host_id_vector_replica_set& live_endpoints, const host_id_vector_replica_set& preferred_endpoints, replica::column_family*) const;
bool is_alive(const gms::inet_address&) const;
bool is_alive_id(const locator::effective_replication_map& erm, const locator::host_id&) const;
bool is_alive(const locator::effective_replication_map& erm, const locator::host_id&) const;
result<::shared_ptr<abstract_read_executor>> get_read_executor(lw_shared_ptr<query::read_command> cmd,
locator::effective_replication_map_ptr ermp,
schema_ptr schema,
@@ -710,7 +709,7 @@ public:
future<> change_hints_host_filter(db::hints::host_filter new_filter);
const db::hints::host_filter& get_hints_host_filter() const;
future<db::hints::sync_point> create_hint_sync_point(std::vector<gms::inet_address> target_hosts) const;
future<db::hints::sync_point> create_hint_sync_point(std::vector<locator::host_id> target_hosts) const;
future<> wait_for_hint_sync_point(const db::hints::sync_point spoint, clock_type::time_point deadline);
const stats& get_stats() const {

View File

@@ -13,6 +13,7 @@
#include "compaction/task_manager_module.hh"
#include "gc_clock.hh"
#include "raft/raft.hh"
#include <ranges>
#include <seastar/core/sleep.hh>
#include "service/qos/raft_service_level_distributed_data_accessor.hh"
#include "service/qos/service_level_controller.hh"
@@ -355,16 +356,6 @@ static future<> set_gossip_tokens(gms::gossiper& g,
);
}
static std::unordered_map<token, gms::inet_address> get_token_to_endpoint(const locator::token_metadata& tm) {
const auto& map = tm.get_token_to_endpoint();
std::unordered_map<token, gms::inet_address> result;
result.reserve(map.size());
for (const auto [t, id]: map) {
result.insert({t, tm.get_endpoint_for_host_id(id)});
}
return result;
}
/*
* The helper waits for two things
* 1) for schema agreement
@@ -429,26 +420,9 @@ future<storage_service::nodes_to_notify_after_sync> storage_service::sync_raft_t
const auto& am =_address_map;
const auto& t = _topology_state_machine._topology;
auto update_topology = [&] (locator::host_id id, std::optional<inet_address> ip, const replica_state& rs) {
auto update_topology = [&] (locator::host_id id, const replica_state& rs) {
tmptr->update_topology(id, locator::endpoint_dc_rack{rs.datacenter, rs.rack},
to_topology_node_state(rs.state), rs.shard_count);
if (ip) {
tmptr->update_host_id(id, *ip);
}
};
auto get_used_ips = [&, used_ips = std::optional<std::unordered_set<inet_address>>{}]() mutable
-> const std::unordered_set<inet_address>&
{
if (!used_ips) {
used_ips.emplace();
for (const auto& [sid, rs]: boost::range::join(t.normal_nodes, t.transition_nodes)) {
if (const auto used_ip = am.find(locator::host_id{sid.uuid()})) {
used_ips->insert(*used_ip);
}
}
}
return *used_ips;
};
using host_id_to_ip_map_t = std::unordered_map<locator::host_id, gms::inet_address>;
@@ -473,7 +447,7 @@ future<storage_service::nodes_to_notify_after_sync> storage_service::sync_raft_t
auto remove_ip = [&](inet_address ip, locator::host_id host_id, bool notify) -> future<> {
sys_ks_futures.push_back(_sys_ks.local().remove_endpoint(ip));
if (_gossiper.get_endpoint_state_ptr(ip) && !get_used_ips().contains(ip)) {
if (const auto ep = _gossiper.get_endpoint_state_ptr(ip); ep && ep->get_host_id() == host_id) {
co_await _gossiper.force_remove_endpoint(ip, gms::null_permit_id);
if (notify) {
nodes_to_notify.left.push_back({ip, host_id});
@@ -489,7 +463,7 @@ future<storage_service::nodes_to_notify_after_sync> storage_service::sync_raft_t
}
if (t.left_nodes_rs.find(id) != t.left_nodes_rs.end()) {
update_topology(host_id, std::nullopt, t.left_nodes_rs.at(id));
update_topology(host_id, t.left_nodes_rs.at(id));
}
// However if we do that, we need to also implement unbanning a node and do it if `removenode` is aborted.
@@ -547,7 +521,7 @@ future<storage_service::nodes_to_notify_after_sync> storage_service::sync_raft_t
co_await remove_ip(it->second, host_id, false);
}
}
update_topology(host_id, ip, rs);
update_topology(host_id, rs);
co_await tmptr->update_normal_tokens(rs.ring.value().tokens, host_id);
};
@@ -574,7 +548,7 @@ future<storage_service::nodes_to_notify_after_sync> storage_service::sync_raft_t
// Save ip -> id mapping in peers table because we need it on restart, but do not save tokens until owned
sys_ks_futures.push_back(_sys_ks.local().update_peer_info(*ip, host_id, {}));
}
update_topology(host_id, ip, rs);
update_topology(host_id, rs);
if (_topology_state_machine._topology.normal_nodes.empty()) {
// This is the first node in the cluster. Insert the tokens as normal to the token ring early
// so we can perform writes to regular 'distributed' tables during the bootstrap procedure
@@ -602,7 +576,7 @@ future<storage_service::nodes_to_notify_after_sync> storage_service::sync_raft_t
co_await process_normal_node(id, rs);
break;
}
update_topology(host_id, ip, rs);
update_topology(host_id, rs);
co_await tmptr->update_normal_tokens(rs.ring.value().tokens, host_id);
tmptr->add_leaving_endpoint(host_id);
co_await update_topology_change_info(tmptr, ::format("{} {}/{}", rs.state, id, ip));
@@ -615,7 +589,7 @@ future<storage_service::nodes_to_notify_after_sync> storage_service::sync_raft_t
tmptr->update_topology(replaced_host_id, std::nullopt, locator::node::state::being_replaced);
tmptr->add_replacing_endpoint(replaced_host_id, host_id);
if (rs.ring.has_value()) {
update_topology(host_id, ip, rs);
update_topology(host_id, rs);
co_await update_topology_change_info(tmptr, ::format("replacing {}/{} by {}/{}", replaced_id, existing_ip.value_or(gms::inet_address{}), id, ip));
}
}
@@ -803,9 +777,9 @@ future<> storage_service::topology_state_load(state_change_hint hint) {
{
std::vector<future<>> futures;
get_token_metadata_ptr()->get_topology().for_each_node([&](const locator::node& n) {
const auto ep = n.endpoint();
if (ep != inet_address{} && !saved_tmpr->get_topology().has_endpoint(ep)) {
futures.push_back(remove_rpc_client_with_ignored_topology(ep, n.host_id()));
const auto ep = n.host_id();
if (auto ip_opt = _address_map.find(ep); ip_opt && !saved_tmpr->get_topology().has_node(ep)) {
futures.push_back(remove_rpc_client_with_ignored_topology(*ip_opt, n.host_id()));
}
});
co_await when_all_succeed(futures.begin(), futures.end()).discard_result();
@@ -932,7 +906,8 @@ class storage_service::ip_address_updater: public gms::i_endpoint_state_change_s
locator::host_id id(utils::UUID(app_state_ptr->value()));
rslog.debug("ip_address_updater::on_endpoint_change({}) {} {}", ev, endpoint, id);
auto prev_ip = _ss.get_token_metadata().get_endpoint_for_host_id_if_known(id);
// If id maps to different ip in peers table it needs to be updated which is done by sync_raft_topology_nodes below
std::optional<gms::inet_address> prev_ip = co_await _ss.get_ip_from_peers_table(id);
if (prev_ip == endpoint) {
co_return;
}
@@ -1573,7 +1548,7 @@ future<> storage_service::join_topology(sharded<db::system_distributed_keyspace>
tmptr->update_topology(tmptr->get_my_id(), std::nullopt, locator::node::state::replacing);
tmptr->update_topology(ri->host_id, std::move(ri->dc_rack), locator::node::state::being_replaced);
co_await tmptr->update_normal_tokens(bootstrap_tokens, ri->host_id);
tmptr->update_host_id(ri->host_id, *replace_address);
tmptr->add_replacing_endpoint(ri->host_id, tmptr->get_my_id());
replaced_host_id = ri->host_id;
@@ -1582,7 +1557,6 @@ future<> storage_service::join_topology(sharded<db::system_distributed_keyspace>
// therefore we need to "inject" their state here after we
// learn about them in the shadow round initiated in `prepare_replacement_info`.
for (const auto& [host_id, st] : ri->ignore_nodes) {
tmptr->update_host_id(host_id, st.endpoint);
if (st.opt_dc_rack) {
tmptr->update_topology(host_id, st.opt_dc_rack, locator::node::state::normal);
}
@@ -1963,7 +1937,7 @@ future<> storage_service::join_topology(sharded<db::system_distributed_keyspace>
for (auto token : bootstrap_tokens) {
auto existing = tmptr->get_endpoint(token);
if (existing) {
auto eps = _gossiper.get_endpoint_state_ptr(tmptr->get_endpoint_for_host_id(*existing));
auto eps = _gossiper.get_endpoint_state_ptr(*existing);
if (eps && eps->get_update_timestamp() > gms::gossiper::clk::now() - delay) {
throw std::runtime_error("Cannot replace a live node...");
}
@@ -2303,14 +2277,10 @@ future<> storage_service::bootstrap(std::unordered_set<token>& bootstrap_tokens,
future<std::unordered_map<dht::token_range, inet_address_vector_replica_set>>
storage_service::get_range_to_address_map(locator::effective_replication_map_ptr erm) const {
return get_range_to_address_map(erm, erm->get_token_metadata_ptr()->sorted_tokens());
}
// Caller is responsible to hold token_metadata valid until the returned future is resolved
future<std::unordered_map<dht::token_range, inet_address_vector_replica_set>>
storage_service::get_range_to_address_map(locator::effective_replication_map_ptr erm,
const std::vector<token>& sorted_tokens) const {
co_return co_await construct_range_to_endpoint_map(erm, co_await get_all_ranges(sorted_tokens));
co_return (co_await locator::get_range_to_address_map(erm, erm->get_token_metadata_ptr()->sorted_tokens())) |
std::views::transform([&] (auto tid) { return std::make_pair(tid.first,
tid.second | std::views::transform([&] (auto id) { return _address_map.get(id); }) | std::ranges::to<inet_address_vector_replica_set>()); }) |
std::ranges::to<std::unordered_map>();
}
future<> storage_service::handle_state_bootstrap(inet_address endpoint, gms::permit_id pid) {
@@ -2337,9 +2307,8 @@ future<> storage_service::handle_state_bootstrap(inet_address endpoint, gms::per
}
tmptr->remove_endpoint(host_id);
}
tmptr->update_topology(host_id, get_dc_rack_for(endpoint), locator::node::state::bootstrapping);
tmptr->update_topology(host_id, get_dc_rack_for(host_id), locator::node::state::bootstrapping);
tmptr->add_bootstrap_tokens(tokens, host_id);
tmptr->update_host_id(host_id, endpoint);
co_await update_topology_change_info(tmptr, ::format("handle_state_bootstrap {}", endpoint));
co_await replicate_to_all_cores(std::move(tmptr));
@@ -2373,11 +2342,26 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit
if (tmptr->is_normal_token_owner(host_id)) {
slogger.info("handle_state_normal: node {}/{} was already a normal token owner", endpoint, host_id);
}
auto existing = tmptr->get_endpoint_for_host_id_if_known(host_id);
// Old node in replace-with-same-IP scenario.
std::optional<locator::host_id> replaced_id;
auto ips = _gossiper.get_nodes_with_host_id(host_id);
std::optional<inet_address> existing;
if (tmptr->get_topology().find_node(host_id)) {
// If node is not in the topology there is no existsing address
// If there are two addresses for the same id the "other" one is existing
// If there is only one it is existing
if (ips.size() == 2) {
if (ips.erase(endpoint) == 0) {
on_internal_error(slogger, fmt::format("Gossiper has two ips {} for host id {} but none of them is {}", ips, endpoint, host_id));
}
}
existing = *ips.begin();
}
if (existing && *existing != endpoint) {
// This branch in taken when a node changes its IP address.
@@ -2392,8 +2376,6 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit
slogger.warn("Host ID collision for {} between {} and {}; {} is the new owner", host_id, *existing, endpoint, endpoint);
do_remove_node(*existing);
slogger.info("Set host_id={} to be owned by node={}, existing={}", host_id, endpoint, *existing);
tmptr->update_host_id(host_id, endpoint);
} else {
// The new IP has smaller generation than the existing one,
// we are going to remove it, so we add it to the endpoints_to_remove.
@@ -2423,11 +2405,12 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit
// We do this after update_normal_tokens, allowing for tokens to be properly
// migrated to the new host_id.
slogger.info("Host ID {} continues to be owned by {}", host_id, endpoint);
if (const auto old_host_id = tmptr->get_host_id_if_known(endpoint); old_host_id && *old_host_id != host_id) {
// Replace with same IP scenario
slogger.info("The IP {} previously owned host ID {}", endpoint, *old_host_id);
replaced_id = *old_host_id;
auto peers = co_await _sys_ks.local().load_host_ids();
if (peers.contains(endpoint) && peers[endpoint] != host_id) {
replaced_id = peers[endpoint];
slogger.info("The IP {} previously owned host ID {}", endpoint, *replaced_id);
} else {
slogger.info("Host ID {} continues to be owned by {}", host_id, endpoint);
}
} else {
// This branch is taken if this node wasn't involved in node_ops
@@ -2443,8 +2426,6 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit
_normal_state_handled_on_boot.insert(endpoint);
co_return;
}
slogger.info("Set host_id={} to be owned by node={}", host_id, endpoint);
tmptr->update_host_id(host_id, endpoint);
}
// Tokens owned by the handled endpoint.
@@ -2455,7 +2436,10 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit
// token_to_endpoint_map is used to track the current token owners for the purpose of removing replaced endpoints.
// when any token is replaced by a new owner, we track the existing owner in `candidates_for_removal`
// and eventually, if any candidate for removal ends up owning no tokens, it is removed from token_metadata.
std::unordered_map<token, inet_address> token_to_endpoint_map = get_token_to_endpoint(get_token_metadata());
std::unordered_map<token, inet_address> token_to_endpoint_map = get_token_metadata().get_token_to_endpoint() |
std::views::transform([this] (auto& e) {
return std::make_pair(e.first, _address_map.get(e.second));
}) | std::ranges::to<std::unordered_map>();
std::unordered_set<inet_address> candidates_for_removal;
// Here we convert endpoint tokens from gossiper to owned_tokens, which will be assigned as a new
@@ -2531,7 +2515,7 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit
do_notify_joined = true;
}
const auto dc_rack = get_dc_rack_for(endpoint);
const auto dc_rack = get_dc_rack_for(host_id);
tmptr->update_topology(host_id, dc_rack, locator::node::state::normal);
co_await tmptr->update_normal_tokens(owned_tokens, host_id);
if (replaced_id) {
@@ -2574,7 +2558,7 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit
const auto& tm = get_token_metadata();
auto ver = tm.get_ring_version();
for (auto& x : tm.get_token_to_endpoint()) {
slogger.debug("handle_state_normal: token_metadata.ring_version={}, token={} -> endpoint={}/{}", ver, x.first, tm.get_endpoint_for_host_id(x.second), x.second);
slogger.debug("handle_state_normal: token_metadata.ring_version={}, token={} -> endpoint={}/{}", ver, x.first, _address_map.get(x.second), x.second);
}
}
_normal_state_handled_on_boot.insert(endpoint);
@@ -2637,25 +2621,31 @@ future<> storage_service::on_join(gms::inet_address endpoint, gms::endpoint_stat
future<> storage_service::on_alive(gms::inet_address endpoint, gms::endpoint_state_ptr state, gms::permit_id pid) {
const auto& tm = get_token_metadata();
const auto tm_host_id_opt = tm.get_host_id_if_known(endpoint);
slogger.debug("endpoint={}/{} on_alive: permit_id={}", endpoint, tm_host_id_opt, pid);
const auto* node = tm.get_topology().find_node(endpoint);
const auto host_id = state->get_host_id();
slogger.debug("endpoint={}/{} on_alive: permit_id={}", endpoint, host_id, pid);
const auto* node = tm.get_topology().find_node(host_id);
if (node && node->is_member()) {
co_await notify_up(endpoint);
} else if (raft_topology_change_enabled()) {
slogger.debug("ignore on_alive since topology changes are using raft and "
"endpoint {}/{} is not a topology member", endpoint, tm_host_id_opt);
"endpoint {}/{} is not a topology member", endpoint, host_id);
} else {
auto tmlock = co_await get_token_metadata_lock();
auto tmptr = co_await get_mutable_token_metadata_ptr();
const auto dc_rack = get_dc_rack_for(endpoint);
const auto host_id = _gossiper.get_host_id(endpoint);
tmptr->update_host_id(host_id, endpoint);
const auto dc_rack = get_dc_rack_for(host_id);
tmptr->update_topology(host_id, dc_rack);
co_await replicate_to_all_cores(std::move(tmptr));
}
}
future<std::optional<gms::inet_address>> storage_service::get_ip_from_peers_table(locator::host_id id) {
auto peers = co_await _sys_ks.local().load_host_ids();
if (auto it = std::ranges::find_if(peers, [&id] (const auto& e) { return e.second == id; }); it != peers.end()) {
co_return it->first;
}
co_return std::nullopt;
}
future<> storage_service::on_change(gms::inet_address endpoint, const gms::application_state_map& states_, gms::permit_id pid) {
// copy the states map locally since the coroutine may yield
auto states = states_;
@@ -2693,14 +2683,14 @@ future<> storage_service::on_change(gms::inet_address endpoint, const gms::appli
const auto host_id = _gossiper.get_host_id(endpoint);
const auto& tm = get_token_metadata();
const auto* node = tm.get_topology().find_node(host_id);
// The check node->endpoint() == endpoint is needed when a node changes
// The check peers[host_id] == endpoint is needed when a node changes
// its IP - on_change can be called by the gossiper for old IP as part
// of its removal, after handle_state_normal has already been called for
// the new one. Without the check, the do_update_system_peers_table call
// overwrites the IP back to its old value.
// In essence, the code under the 'if' should fire if the given IP belongs
// to a cluster member.
if (node && node->is_member() && node->endpoint() == endpoint) {
if (node && node->is_member() && (co_await get_ip_from_peers_table(host_id)) == endpoint) {
if (!is_me(endpoint)) {
slogger.debug("endpoint={}/{} on_change: updating system.peers table", endpoint, host_id);
if (auto info = get_peer_info_for_update(endpoint, states)) {
@@ -2723,7 +2713,7 @@ future<> storage_service::maybe_reconnect_to_preferred_ip(inet_address ep, inet_
}
const auto& topo = get_token_metadata().get_topology();
if (topo.get_datacenter() == topo.get_datacenter(ep) && _messaging.local().get_preferred_ip(ep) != local_ip) {
if (topo.get_datacenter() == topo.get_datacenter(_gossiper.get_host_id(ep)) && _messaging.local().get_preferred_ip(ep) != local_ip) {
slogger.debug("Initiated reconnect to an Internal IP {} for the {}", local_ip, ep);
co_await _messaging.invoke_on_all([ep, local_ip] (auto& local_ms) {
local_ms.cache_preferred_ip(ep, local_ip);
@@ -2740,15 +2730,26 @@ future<> storage_service::on_remove(gms::inet_address endpoint, gms::permit_id p
co_return;
}
locator::host_id host_id;
try {
// It seems gossiper does not check for endpoint existence before calling the callback
// so the lookup may fail, but there is nothing to do in this case.
host_id = _gossiper.get_host_id(endpoint);
} catch (...) {
co_return;
}
// We should handle the case when the host id is mapped to a different address.
// This could happen when an address for the host id changes and the callback here is called
// with the old ip. We should just skip the remove in that case.
if (_address_map.get(host_id) != endpoint) {
co_return;
}
auto tmlock = co_await get_token_metadata_lock();
auto tmptr = co_await get_mutable_token_metadata_ptr();
// We should handle the case when we aren't able to find endpoint -> ip mapping in token_metadata.
// This could happen e.g. when the new endpoint has bigger generation in handle_state_normal - the code
// in handle_state_normal will remap host_id to the new IP and we won't find
// old IP here. We should just skip the remove in that case.
if (const auto host_id = tmptr->get_host_id_if_known(endpoint); host_id) {
tmptr->remove_endpoint(*host_id);
}
tmptr->remove_endpoint(host_id);
co_await update_topology_change_info(tmptr, ::format("on_remove {}", endpoint));
co_await replicate_to_all_cores(std::move(tmptr));
}
@@ -2858,7 +2859,7 @@ std::optional<locator::endpoint_dc_rack> storage_service::get_dc_rack_for(const
};
}
std::optional<locator::endpoint_dc_rack> storage_service::get_dc_rack_for(inet_address endpoint) {
std::optional<locator::endpoint_dc_rack> storage_service::get_dc_rack_for(locator::host_id endpoint) {
auto eps = _gossiper.get_endpoint_state_ptr(endpoint);
if (!eps) {
return std::nullopt;
@@ -3035,7 +3036,6 @@ future<> storage_service::join_cluster(sharded<db::system_distributed_keyspace>&
slogger.debug("Loaded tokens: endpoint={}/{} dc={} rack={} tokens={}", host_id, st.endpoint, dc_rack.dc, dc_rack.rack, st.tokens);
tmptr->update_topology(host_id, dc_rack, locator::node::state::normal);
co_await tmptr->update_normal_tokens(st.tokens, host_id);
tmptr->update_host_id(host_id, st.endpoint);
// gossiping hasn't started yet
// so no need to lock the endpoint
co_await _gossiper.add_saved_endpoint(host_id, st, gms::null_permit_id);
@@ -3387,12 +3387,12 @@ storage_service::prepare_replacement_info(std::unordered_set<gms::inet_address>
}
}
auto dc_rack = get_dc_rack_for(replace_address).value_or(locator::endpoint_dc_rack::default_location);
if (!replace_host_id) {
replace_host_id = _gossiper.get_host_id(replace_address);
}
auto dc_rack = get_dc_rack_for(replace_host_id).value_or(locator::endpoint_dc_rack::default_location);
auto ri = replacement_info {
.tokens = std::move(tokens),
.dc_rack = std::move(dc_rack),
@@ -3449,7 +3449,7 @@ storage_service::prepare_replacement_info(std::unordered_set<gms::inet_address>
}
future<std::map<gms::inet_address, float>> storage_service::get_ownership() {
return run_with_no_api_lock([] (storage_service& ss) {
return run_with_no_api_lock([this] (storage_service& ss) {
const auto& tm = ss.get_token_metadata();
auto token_map = dht::token::describe_ownership(tm.sorted_tokens());
// describeOwnership returns tokens in an unspecified order, let's re-order them
@@ -3457,7 +3457,7 @@ future<std::map<gms::inet_address, float>> storage_service::get_ownership() {
for (auto entry : token_map) {
locator::host_id id = tm.get_endpoint(entry.first).value();
auto token_ownership = entry.second;
ownership[tm.get_endpoint_for_host_id(id)] += token_ownership;
ownership[_address_map.get(id)] += token_ownership;
}
return ownership;
});
@@ -3586,17 +3586,17 @@ future<std::unordered_map<sstring, std::vector<sstring>>> storage_service::descr
std::unordered_map<sstring, std::vector<sstring>> results;
netw::messaging_service& ms = _messaging.local();
return map_reduce(std::move(live_hosts), [&ms, as = abort_source()] (auto host) mutable {
auto f0 = ser::migration_manager_rpc_verbs::send_schema_check(&ms, netw::msg_addr{ host, 0 }, as);
auto f0 = ser::migration_manager_rpc_verbs::send_schema_check(&ms, host, as);
return std::move(f0).then_wrapped([host] (auto f) {
if (f.failed()) {
f.ignore_ready_future();
return std::pair<gms::inet_address, std::optional<table_schema_version>>(host, std::nullopt);
return std::pair<locator::host_id, std::optional<table_schema_version>>(host, std::nullopt);
}
return std::pair<gms::inet_address, std::optional<table_schema_version>>(host, f.get());
return std::pair<locator::host_id, std::optional<table_schema_version>>(host, f.get());
});
}, std::move(results), [] (auto results, auto host_and_version) {
}, std::move(results), [this] (auto results, auto host_and_version) {
auto version = host_and_version.second ? host_and_version.second->to_sstring() : UNREACHABLE;
results.try_emplace(version).first->second.emplace_back(fmt::to_string(host_and_version.first));
results.try_emplace(version).first->second.emplace_back(fmt::to_string(_address_map.get(host_and_version.first)));
return results;
}).then([this] (auto results) {
// we're done: the results map is ready to return to the client. the rest is just debug logging:
@@ -4178,7 +4178,7 @@ future<> storage_service::removenode(locator::host_id host_id, locator::host_id_
}
for (auto& hoep : ignore_nodes_params) {
ctl.ignore_nodes.insert(hoep.resolve_id(*tmptr));
ctl.ignore_nodes.insert(hoep.resolve_id(ss._gossiper));
}
if (!removed_from_token_ring) {
@@ -4406,7 +4406,7 @@ future<node_ops_cmd_response> storage_service::node_ops_cmd_handler(gms::inet_ad
mutate_token_metadata([coordinator, &req, this] (mutable_token_metadata_ptr tmptr) mutable {
for (auto& node : req.leaving_nodes) {
slogger.info("removenode[{}]: Added node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator);
tmptr->add_leaving_endpoint(tmptr->get_host_id(node));
tmptr->add_leaving_endpoint(_gossiper.get_host_id(node));
}
return update_topology_change_info(tmptr, ::format("removenode {}", req.leaving_nodes));
}).get();
@@ -4414,7 +4414,7 @@ future<node_ops_cmd_response> storage_service::node_ops_cmd_handler(gms::inet_ad
return mutate_token_metadata([this, coordinator, req = std::move(req)] (mutable_token_metadata_ptr tmptr) mutable {
for (auto& node : req.leaving_nodes) {
slogger.info("removenode[{}]: Removed node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator);
tmptr->del_leaving_endpoint(tmptr->get_host_id(node));
tmptr->del_leaving_endpoint(_gossiper.get_host_id(node));
}
return update_topology_change_info(tmptr, ::format("removenode {}", req.leaving_nodes));
});
@@ -4440,7 +4440,7 @@ future<node_ops_cmd_response> storage_service::node_ops_cmd_handler(gms::inet_ad
}).get();
} else {
slogger.info("removenode[{}]: Started to sync data for removing node={} using stream, coordinator={}", req.ops_uuid, node, coordinator);
removenode_with_stream(get_token_metadata().get_host_id(node), topo_guard, as).get();
removenode_with_stream(_gossiper.get_host_id(node), topo_guard, as).get();
}
}
} else if (req.cmd == node_ops_cmd::removenode_abort) {
@@ -4456,7 +4456,7 @@ future<node_ops_cmd_response> storage_service::node_ops_cmd_handler(gms::inet_ad
mutate_token_metadata([coordinator, &req, this] (mutable_token_metadata_ptr tmptr) mutable {
for (auto& node : req.leaving_nodes) {
slogger.info("decommission[{}]: Added node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator);
tmptr->add_leaving_endpoint(tmptr->get_host_id(node));
tmptr->add_leaving_endpoint(_gossiper.get_host_id(node));
}
return update_topology_change_info(tmptr, ::format("decommission {}", req.leaving_nodes));
}).get();
@@ -4469,11 +4469,11 @@ future<node_ops_cmd_response> storage_service::node_ops_cmd_handler(gms::inet_ad
// and waits for ring_delay. It's possible the node being decommissioned might
// die after it has sent this notification. If this happens, the node would
// have already been removed from this token_metadata, so we wouldn't find it here.
const auto node_id = tmptr->get_host_id_if_known(node);
slogger.info("decommission[{}]: Removed node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator);
if (node_id) {
tmptr->del_leaving_endpoint(*node_id);
}
try {
const auto node_id = _gossiper.get_host_id(node);
slogger.info("decommission[{}]: Removed node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator);
tmptr->del_leaving_endpoint(node_id);
} catch (...) {}
}
return update_topology_change_info(tmptr, ::format("decommission {}", req.leaving_nodes));
});
@@ -4489,7 +4489,10 @@ future<node_ops_cmd_response> storage_service::node_ops_cmd_handler(gms::inet_ad
check_again = false;
for (auto& node : req.leaving_nodes) {
auto tmptr = get_token_metadata_ptr();
const auto host_id = tmptr->get_host_id_if_known(node);
std::optional<locator::host_id> host_id;
try {
host_id = _gossiper.get_host_id(node);
} catch(...) {};
if (host_id && tmptr->is_normal_token_owner(*host_id)) {
check_again = true;
if (std::chrono::steady_clock::now() > start_time + std::chrono::seconds(60)) {
@@ -4520,50 +4523,56 @@ future<node_ops_cmd_response> storage_service::node_ops_cmd_handler(gms::inet_ad
slogger.warn("{}", msg);
throw std::runtime_error(msg);
}
if (req.replace_nodes.size() == 0) {
auto msg = ::format("replace[{}]: Replacing node was not specified", req.ops_uuid);
slogger.warn("{}", msg);
throw std::runtime_error(msg);
}
if (!coordinator_host_id) {
throw std::runtime_error("Coordinator host_id not found");
}
mutate_token_metadata([coordinator, coordinator_host_id, &req, this] (mutable_token_metadata_ptr tmptr) mutable {
for (auto& x: req.replace_nodes) {
auto existing_node = x.first;
auto replacing_node = x.second;
const auto existing_node_id = tmptr->get_host_id(existing_node);
const auto replacing_node_id = *coordinator_host_id;
slogger.info("replace[{}]: Added replacing_node={}/{} to replace existing_node={}/{}, coordinator={}/{}",
req.ops_uuid, replacing_node, replacing_node_id, existing_node, existing_node_id, coordinator, *coordinator_host_id);
// In case of replace-with-same-ip we need to map both host_id-s
// to the same IP. The locator::topology allows this specifically in case
// where one node is being_replaced and another is replacing,
// so here we adjust the state of the original node accordingly.
// The host_id -> IP map works as usual, and IP -> host_id will map
// IP to the being_replaced node - this is what is implied by the
// current code. The IP will be placed in pending_endpoints and
// excluded from normal_endpoints (maybe_remove_node_being_replaced function).
// In handle_state_normal we'll remap the IP to the new host_id.
tmptr->update_topology(existing_node_id, std::nullopt, locator::node::state::being_replaced);
tmptr->update_topology(replacing_node_id, get_dc_rack_for(replacing_node), locator::node::state::replacing);
tmptr->update_host_id(replacing_node_id, replacing_node);
tmptr->add_replacing_endpoint(existing_node_id, replacing_node_id);
auto existing_node = req.replace_nodes.begin()->first;
auto replacing_node = req.replace_nodes.begin()->second;
auto existing_node_id = _sys_ks.local().load_host_ids().get()[existing_node];
mutate_token_metadata([coordinator, coordinator_host_id, existing_node, replacing_node, existing_node_id, &req, this] (mutable_token_metadata_ptr tmptr) mutable {
if (is_me(*coordinator_host_id)) {
// A coordinor already updated token metadata in join_topology()
return make_ready_future<>();
}
const auto replacing_node_id = *coordinator_host_id;
slogger.info("replace[{}]: Added replacing_node={}/{} to replace existing_node={}/{}, coordinator={}/{}",
req.ops_uuid, replacing_node, replacing_node_id, existing_node, existing_node_id, coordinator, *coordinator_host_id);
// In case of replace-with-same-ip we need to map both host_id-s
// to the same IP. The locator::topology allows this specifically in case
// where one node is being_replaced and another is replacing,
// so here we adjust the state of the original node accordingly.
// The host_id -> IP map works as usual, and IP -> host_id will map
// IP to the being_replaced node - this is what is implied by the
// current code. The IP will be placed in pending_endpoints and
// excluded from normal_endpoints (maybe_remove_node_being_replaced function).
// In handle_state_normal we'll remap the IP to the new host_id.
tmptr->update_topology(existing_node_id, std::nullopt, locator::node::state::being_replaced);
tmptr->update_topology(replacing_node_id, get_dc_rack_for(replacing_node_id), locator::node::state::replacing);
tmptr->add_replacing_endpoint(existing_node_id, replacing_node_id);
return make_ready_future<>();
}).get();
auto ignore_nodes = std::move(req.ignore_nodes);
node_ops_insert(ops_uuid, coordinator, std::move(ignore_nodes), [this, coordinator, coordinator_host_id, req = std::move(req)] () mutable {
return mutate_token_metadata([this, coordinator, coordinator_host_id, req = std::move(req)] (mutable_token_metadata_ptr tmptr) mutable {
for (auto& x: req.replace_nodes) {
auto existing_node = x.first;
auto replacing_node = x.second;
const auto existing_node_id = tmptr->get_host_id(existing_node);
const auto replacing_node_id = *coordinator_host_id;
slogger.info("replace[{}]: Removed replacing_node={}/{} to replace existing_node={}/{}, coordinator={}/{}",
req.ops_uuid, replacing_node, replacing_node_id, existing_node, existing_node_id, coordinator, *coordinator_host_id);
tmptr->del_replacing_endpoint(existing_node_id);
const auto dc_rack = get_dc_rack_for(replacing_node);
tmptr->update_topology(existing_node_id, dc_rack, locator::node::state::normal);
tmptr->remove_endpoint(replacing_node_id);
node_ops_insert(ops_uuid, coordinator, std::move(ignore_nodes), [this, coordinator, coordinator_host_id, existing_node, replacing_node, existing_node_id, req = std::move(req)] () mutable {
return mutate_token_metadata([this, coordinator, coordinator_host_id, existing_node, replacing_node, existing_node_id, req = std::move(req)] (mutable_token_metadata_ptr tmptr) mutable {
if (is_me(*coordinator_host_id)) {
// No need to cancel replace operation on a node replacing node since it will be killed anyway
return make_ready_future<>();
}
const auto replacing_node_id = *coordinator_host_id;
slogger.info("replace[{}]: Removed replacing_node={}/{} to replace existing_node={}/{}, coordinator={}/{}",
req.ops_uuid, replacing_node, replacing_node_id, existing_node, existing_node_id, coordinator, *coordinator_host_id);
tmptr->del_replacing_endpoint(existing_node_id);
const auto dc_rack = get_dc_rack_for(replacing_node_id);
tmptr->update_topology(existing_node_id, dc_rack, locator::node::state::normal);
tmptr->remove_endpoint(replacing_node_id);
return update_topology_change_info(tmptr, ::format("replace {}", req.replace_nodes));
});
});
@@ -4606,10 +4615,9 @@ future<node_ops_cmd_response> storage_service::node_ops_cmd_handler(gms::inet_ad
auto& endpoint = x.first;
auto tokens = std::unordered_set<dht::token>(x.second.begin(), x.second.end());
const auto host_id = *coordinator_host_id;
const auto dc_rack = get_dc_rack_for(endpoint);
const auto dc_rack = get_dc_rack_for(host_id);
slogger.info("bootstrap[{}]: Added node={}/{} as bootstrap, coordinator={}/{}",
req.ops_uuid, endpoint, host_id, coordinator, *coordinator_host_id);
tmptr->update_host_id(host_id, endpoint);
tmptr->update_topology(host_id, dc_rack, locator::node::state::bootstrapping);
tmptr->add_bootstrap_tokens(tokens, host_id);
}
@@ -5256,10 +5264,11 @@ storage_service::describe_ring_for_table(const sstring& keyspace_name, const sst
for (auto& r : replicas) {
dht::endpoint_details details;
const auto& node = topology.get_node(r.host);
const auto ip = _address_map.get(r.host);
details._datacenter = node.dc_rack().dc;
details._rack = node.dc_rack().rack;
details._host = node.endpoint();
tr._rpc_endpoints.push_back(_gossiper.get_rpc_address(node.endpoint()));
details._host = ip;
tr._rpc_endpoints.push_back(_gossiper.get_rpc_address(ip));
tr._endpoints.push_back(fmt::to_string(details._host));
tr._endpoint_details.push_back(std::move(details));
}
@@ -5269,29 +5278,14 @@ storage_service::describe_ring_for_table(const sstring& keyspace_name, const sst
co_return ranges;
}
future<std::unordered_map<dht::token_range, inet_address_vector_replica_set>>
storage_service::construct_range_to_endpoint_map(
locator::effective_replication_map_ptr erm,
const dht::token_range_vector& ranges) const {
std::unordered_map<dht::token_range, inet_address_vector_replica_set> res;
res.reserve(ranges.size());
for (auto r : ranges) {
res[r] = erm->get_natural_endpoints(
r.end() ? r.end()->value() : dht::maximum_token());
co_await coroutine::maybe_yield();
}
co_return res;
}
std::map<token, inet_address> storage_service::get_token_to_endpoint_map() {
const auto& tm = get_token_metadata();
std::map<token, inet_address> result;
for (const auto [t, id]: tm.get_token_to_endpoint()) {
result.insert({t, tm.get_endpoint_for_host_id(id)});
result.insert({t, _address_map.get(id)});
}
for (const auto [t, id]: tm.get_bootstrap_tokens()) {
result.insert({t, tm.get_endpoint_for_host_id(id)});
result.insert({t, _address_map.get(id)});
}
return result;
}
@@ -5301,7 +5295,7 @@ future<std::map<token, inet_address>> storage_service::get_tablet_to_endpoint_ma
const auto& tmap = tm.tablets().get_tablet_map(table);
std::map<token, inet_address> result;
for (std::optional<locator::tablet_id> tid = tmap.first_tablet(); tid; tid = tmap.next_tablet(*tid)) {
result.emplace(tmap.get_last_token(*tid), tm.get_endpoint_for_host_id(tmap.get_primary_replica(*tid).host));
result.emplace(tmap.get_last_token(*tid), _address_map.get(tmap.get_primary_replica(*tid).host));
co_await coroutine::maybe_yield();
}
co_return result;
@@ -5357,7 +5351,7 @@ future<> storage_service::update_topology_change_info(mutable_token_metadata_ptr
return std::nullopt;
}
return get_dc_rack_for(tm.get_endpoint_for_host_id(host_id));
return get_dc_rack_for(host_id);
});
co_await tmptr->update_topology_change_info(get_dc_rack_by_host_id);
} catch (...) {
@@ -7227,7 +7221,7 @@ future<> storage_service::force_remove_completion() {
auto leaving = tm.get_leaving_endpoints();
slogger.warn("Removal not confirmed, Leaving={}", leaving);
for (auto host_id : leaving) {
const auto endpoint = tm.get_endpoint_for_host_id_if_known(host_id);
const auto endpoint = ss._address_map.find(host_id);
if (!endpoint) {
slogger.warn("No endpoint is found for host_id {}", host_id);
continue;
@@ -7351,10 +7345,13 @@ storage_service::get_natural_endpoints(const sstring& keyspace,
partition_key pk = partition_key::from_nodetool_style_string(schema, key);
dht::token token = schema->get_partitioner().get_token(*schema, pk.view());
const auto& ks = _db.local().find_keyspace(keyspace);
host_id_vector_replica_set replicas;
if (ks.uses_tablets()) {
return table.get_effective_replication_map()->get_natural_endpoints(token);
replicas = table.get_effective_replication_map()->get_natural_replicas(token);
} else {
replicas = ks.get_vnode_effective_replication_map()->get_natural_replicas(token);
}
return ks.get_vnode_effective_replication_map()->get_natural_endpoints(token);
return replicas | std::views::transform([&] (locator::host_id id) { return _address_map.get(id); }) | std::ranges::to<inet_address_vector_replica_set>();
}
future<> endpoint_lifecycle_notifier::notify_down(gms::inet_address endpoint) {
@@ -7464,7 +7461,7 @@ future<> storage_service::wait_for_normal_state_handled_on_boot() {
static auto fmt_nodes_with_statuses = [this] (const auto& eps) {
return eps | std::views::transform([this] (const auto& ep) {
return ::format("({}, status={})", ep, _gossiper.get_gossip_status(ep));
}) | std::views::join_with(',');
});
};
slogger.info("Started waiting for normal state handlers to finish");

View File

@@ -327,7 +327,7 @@ private:
return get_token_metadata_ptr()->get_topology().my_host_id();
}
bool is_me(inet_address addr) const noexcept {
return get_token_metadata_ptr()->get_topology().is_me(addr);
return addr == get_broadcast_address();
}
bool is_me(locator::host_id id) const noexcept {
return get_token_metadata_ptr()->get_topology().is_me(id);
@@ -428,8 +428,6 @@ private:
public:
future<std::unordered_map<dht::token_range, inet_address_vector_replica_set>> get_range_to_address_map(locator::effective_replication_map_ptr erm) const;
future<std::unordered_map<dht::token_range, inet_address_vector_replica_set>> get_range_to_address_map(locator::effective_replication_map_ptr erm,
const std::vector<token>& sorted_tokens) const;
/**
* The same as {@code describeRing(String)} but converts TokenRange to the String for JMX compatibility
@@ -466,15 +464,6 @@ public:
*/
future<std::map<token, inet_address>> get_tablet_to_endpoint_map(table_id table);
/**
* Construct the range to endpoint mapping based on the true view
* of the world.
* @param ranges
* @return mapping of ranges to the replicas responsible for them.
*/
future<std::unordered_map<dht::token_range, inet_address_vector_replica_set>> construct_range_to_endpoint_map(
locator::effective_replication_map_ptr erm,
const dht::token_range_vector& ranges) const;
public:
virtual future<> on_join(gms::inet_address endpoint, gms::endpoint_state_ptr ep_state, gms::permit_id) override;
/*
@@ -541,7 +530,7 @@ private:
std::unordered_set<token> get_tokens_for(inet_address endpoint);
std::optional<locator::endpoint_dc_rack> get_dc_rack_for(const gms::endpoint_state& ep_state);
std::optional<locator::endpoint_dc_rack> get_dc_rack_for(inet_address endpoint);
std::optional<locator::endpoint_dc_rack> get_dc_rack_for(locator::host_id endpoint);
private:
// Should be serialized under token_metadata_lock.
future<> replicate_to_all_cores(mutable_token_metadata_ptr tmptr) noexcept;
@@ -615,6 +604,9 @@ private:
future<std::unordered_multimap<dht::token_range, locator::host_id>> get_changed_ranges_for_leaving(locator::vnode_effective_replication_map_ptr erm, locator::host_id endpoint);
future<> maybe_reconnect_to_preferred_ip(inet_address ep, inet_address local_ip);
// Return ip of the peers table entry with given host id
future<std::optional<gms::inet_address>> get_ip_from_peers_table(locator::host_id id);
public:
sstring get_release_version();

View File

@@ -3057,11 +3057,11 @@ future<> topology_coordinator::build_coordinator_state(group0_guard guard) {
rtlogger.info("building initial raft topology state and CDC generation");
guard = co_await start_operation();
auto get_application_state = [&] (locator::host_id host_id, gms::inet_address ep, const gms::application_state_map& epmap, gms::application_state app_state) -> sstring {
auto get_application_state = [&] (locator::host_id host_id, const gms::application_state_map& epmap, gms::application_state app_state) -> sstring {
const auto it = epmap.find(app_state);
if (it == epmap.end()) {
throw std::runtime_error(format("failed to build initial raft topology state from gossip for node {}/{}: application state {} is missing in gossip",
host_id, ep, app_state));
throw std::runtime_error(format("failed to build initial raft topology state from gossip for node {}: application state {} is missing in gossip",
host_id, app_state));
}
// it's versioned_value::value(), not std::optional::value() - it does not throw
return it->second.value();
@@ -3069,17 +3069,13 @@ future<> topology_coordinator::build_coordinator_state(group0_guard guard) {
// Create a new CDC generation
auto get_sharding_info_for_host_id = [&] (locator::host_id host_id) -> std::pair<size_t, uint8_t> {
const auto ep = tmptr->get_endpoint_for_host_id_if_known(host_id);
if (!ep) {
throw std::runtime_error(format("IP of node with ID {} is not known", host_id));
}
const auto eptr = _gossiper.get_endpoint_state_ptr(*ep);
const auto eptr = _gossiper.get_endpoint_state_ptr(host_id);
if (!eptr) {
throw std::runtime_error(format("no gossiper endpoint state for node {}/{}", host_id, *ep));
throw std::runtime_error(format("no gossiper endpoint state for node {}", host_id));
}
const auto& epmap = eptr->get_application_state_map();
const auto shard_count = std::stoi(get_application_state(host_id, *ep, epmap, gms::application_state::SHARD_COUNT));
const auto ignore_msb = std::stoi(get_application_state(host_id, *ep, epmap, gms::application_state::IGNORE_MSB_BITS));
const auto shard_count = std::stoi(get_application_state(host_id, epmap, gms::application_state::SHARD_COUNT));
const auto ignore_msb = std::stoi(get_application_state(host_id, epmap, gms::application_state::IGNORE_MSB_BITS));
return std::make_pair<size_t, uint8_t>(shard_count, ignore_msb);
};
auto [cdc_gen_uuid, guard_, mutation] = co_await prepare_and_broadcast_cdc_generation_data(tmptr, std::move(guard), std::nullopt, get_sharding_info_for_host_id);
@@ -3096,23 +3092,22 @@ future<> topology_coordinator::build_coordinator_state(group0_guard guard) {
}
const auto& host_id = node.get().host_id();
const auto& ep = node.get().endpoint();
const auto eptr = _gossiper.get_endpoint_state_ptr(ep);
const auto eptr = _gossiper.get_endpoint_state_ptr(host_id);
if (!eptr) {
throw std::runtime_error(format("failed to build initial raft topology state from gossip for node {}/{} as gossip contains no data for it", host_id, ep));
throw std::runtime_error(format("failed to build initial raft topology state from gossip for node {} as gossip contains no data for it", host_id));
}
const auto& epmap = eptr->get_application_state_map();
const auto datacenter = get_application_state(host_id, ep, epmap, gms::application_state::DC);
const auto rack = get_application_state(host_id, ep, epmap, gms::application_state::RACK);
const auto datacenter = get_application_state(host_id, epmap, gms::application_state::DC);
const auto rack = get_application_state(host_id, epmap, gms::application_state::RACK);
const auto tokens_v = tmptr->get_tokens(host_id);
const std::unordered_set<dht::token> tokens(tokens_v.begin(), tokens_v.end());
const auto release_version = get_application_state(host_id, ep, epmap, gms::application_state::RELEASE_VERSION);
const auto release_version = get_application_state(host_id, epmap, gms::application_state::RELEASE_VERSION);
const auto num_tokens = tokens.size();
const auto shard_count = get_application_state(host_id, ep, epmap, gms::application_state::SHARD_COUNT);
const auto ignore_msb = get_application_state(host_id, ep, epmap, gms::application_state::IGNORE_MSB_BITS);
const auto supported_features_s = get_application_state(host_id, ep, epmap, gms::application_state::SUPPORTED_FEATURES);
const auto shard_count = get_application_state(host_id, epmap, gms::application_state::SHARD_COUNT);
const auto ignore_msb = get_application_state(host_id, epmap, gms::application_state::IGNORE_MSB_BITS);
const auto supported_features_s = get_application_state(host_id, epmap, gms::application_state::SUPPORTED_FEATURES);
const auto supported_features = gms::feature_service::to_feature_set(supported_features_s);
if (enabled_features.empty()) {

View File

@@ -32,9 +32,7 @@ SEASTAR_THREAD_TEST_CASE(test_add_node) {
auto id1 = host_id::create_random_id();
auto ep1 = gms::inet_address("127.0.0.1");
auto id2 = host_id::create_random_id();
auto ep2 = gms::inet_address("127.0.0.2");
auto id3 = host_id::create_random_id();
auto ep3 = gms::inet_address("127.0.0.3");
topology::config cfg = {
.this_endpoint = ep1,
@@ -51,16 +49,13 @@ SEASTAR_THREAD_TEST_CASE(test_add_node) {
std::unordered_set<std::reference_wrapper<const locator::node>> nodes;
nodes.insert(std::cref(topo.add_node(id2, ep2, endpoint_dc_rack::default_location, node::state::normal)));
nodes.insert(std::cref(topo.add_or_update_endpoint(id1, ep1, endpoint_dc_rack::default_location, node::state::normal)));
nodes.insert(std::cref(topo.add_node(id2, endpoint_dc_rack::default_location, node::state::normal)));
nodes.insert(std::cref(topo.add_or_update_endpoint(id1, endpoint_dc_rack::default_location, node::state::normal)));
BOOST_REQUIRE_THROW(topo.add_node(id1, ep2, endpoint_dc_rack::default_location, node::state::normal), std::runtime_error);
BOOST_REQUIRE_THROW(topo.add_node(id2, ep1, endpoint_dc_rack::default_location, node::state::normal), std::runtime_error);
BOOST_REQUIRE_THROW(topo.add_node(id2, ep2, endpoint_dc_rack::default_location, node::state::normal), std::runtime_error);
BOOST_REQUIRE_THROW(topo.add_node(id2, ep3, endpoint_dc_rack::default_location, node::state::normal), std::runtime_error);
BOOST_REQUIRE_THROW(topo.add_node(id3, ep3, endpoint_dc_rack{}, node::state::normal), std::runtime_error);
BOOST_REQUIRE_THROW(topo.add_node(id2, endpoint_dc_rack::default_location, node::state::normal), std::runtime_error);
BOOST_REQUIRE_THROW(topo.add_node(id3, endpoint_dc_rack{}, node::state::normal), std::runtime_error);
nodes.insert(std::cref(topo.add_node(id3, ep3, endpoint_dc_rack::default_location, node::state::normal)));
nodes.insert(std::cref(topo.add_node(id3, endpoint_dc_rack::default_location, node::state::normal)));
topo.for_each_node([&] (const locator::node& node) {
BOOST_REQUIRE(nodes.erase(std::cref(node)));
@@ -82,7 +77,7 @@ SEASTAR_THREAD_TEST_CASE(test_moving) {
auto topo = topology(cfg);
topo.add_or_update_endpoint(id1, ep1, endpoint_dc_rack::default_location, node::state::normal);
topo.add_or_update_endpoint(id1, endpoint_dc_rack::default_location, node::state::normal);
BOOST_REQUIRE(topo.this_node()->topology() == &topo);
@@ -101,8 +96,6 @@ SEASTAR_THREAD_TEST_CASE(test_update_node) {
auto id1 = host_id::create_random_id();
auto ep1 = gms::inet_address("127.0.0.1");
auto id2 = host_id::create_random_id();
auto ep2 = gms::inet_address("127.0.0.2");
auto ep3 = gms::inet_address("127.0.0.3");
topology::config cfg = {
.this_endpoint = ep1,
@@ -117,110 +110,52 @@ SEASTAR_THREAD_TEST_CASE(test_update_node) {
set_abort_on_internal_error(true);
});
topo.add_or_update_endpoint(id1, std::nullopt, endpoint_dc_rack::default_location, node::state::normal);
topo.add_or_update_endpoint(id1, endpoint_dc_rack::default_location, node::state::normal);
auto node = const_cast<class node*>(topo.this_node());
topo.update_node(*node, std::nullopt, ep1, std::nullopt, std::nullopt);
topo.update_node(*node, std::nullopt, std::nullopt, std::nullopt);
BOOST_REQUIRE_EQUAL(topo.find_node(id1), node);
BOOST_REQUIRE_THROW(topo.update_node(*node, host_id::create_null_id(), std::nullopt, std::nullopt, std::nullopt), std::runtime_error);
BOOST_REQUIRE_THROW(topo.update_node(*node, id2, std::nullopt, std::nullopt, std::nullopt), std::runtime_error);
BOOST_REQUIRE_THROW(topo.update_node(*node, host_id::create_null_id(), std::nullopt, std::nullopt), std::runtime_error);
BOOST_REQUIRE_THROW(topo.update_node(*node, id2, std::nullopt, std::nullopt), std::runtime_error);
BOOST_REQUIRE_EQUAL(topo.find_node(id1), node);
BOOST_REQUIRE_EQUAL(topo.find_node(id2), nullptr);
topo.update_node(*node, std::nullopt, ep2, std::nullopt, std::nullopt);
BOOST_REQUIRE_EQUAL(topo.find_node(ep1), nullptr);
BOOST_REQUIRE_EQUAL(topo.find_node(ep2), node);
auto dc_rack1 = endpoint_dc_rack{"DC1", "RACK1"};
topo.update_node(*node, std::nullopt, std::nullopt, dc_rack1, std::nullopt);
topo.update_node(*node, std::nullopt, dc_rack1, std::nullopt);
BOOST_REQUIRE(topo.get_location(id1) == dc_rack1);
BOOST_REQUIRE(topo.get_location(ep1) == dc_rack1);
auto dc_rack2 = endpoint_dc_rack{"DC2", "RACK2"};
topo.update_node(*node, std::nullopt, std::nullopt, dc_rack2, std::nullopt);
topo.update_node(*node, std::nullopt, dc_rack2, std::nullopt);
BOOST_REQUIRE(topo.get_location(id1) == dc_rack2);
BOOST_REQUIRE(topo.get_location(ep1) == dc_rack2);
BOOST_REQUIRE_NE(node->get_state(), locator::node::state::being_decommissioned);
topo.update_node(*node, std::nullopt, std::nullopt, std::nullopt, locator::node::state::being_decommissioned);
topo.update_node(*node, std::nullopt, std::nullopt, locator::node::state::being_decommissioned);
BOOST_REQUIRE_EQUAL(node->get_state(), locator::node::state::being_decommissioned);
auto dc_rack3 = endpoint_dc_rack{"DC3", "RACK3"};
// Note: engage state option, but keep node::state value the same
// to reproduce #13502
topo.update_node(*node, std::nullopt, ep3, dc_rack3, locator::node::state::being_decommissioned);
topo.update_node(*node, std::nullopt, dc_rack3, locator::node::state::being_decommissioned);
BOOST_REQUIRE_EQUAL(topo.find_node(id1), node);
BOOST_REQUIRE_EQUAL(topo.find_node(ep1), nullptr);
BOOST_REQUIRE_EQUAL(topo.find_node(ep2), nullptr);
BOOST_REQUIRE_EQUAL(topo.find_node(ep3), node);
BOOST_REQUIRE(topo.get_location(id1) == dc_rack3);
BOOST_REQUIRE(topo.get_location(ep2) == endpoint_dc_rack::default_location);
BOOST_REQUIRE(topo.get_location(ep3) == dc_rack3);
BOOST_REQUIRE_EQUAL(node->get_state(), locator::node::state::being_decommissioned);
// In state::left the node will remain indexed only by its host_id
topo.update_node(*node, std::nullopt, std::nullopt, std::nullopt, locator::node::state::left);
BOOST_REQUIRE_EQUAL(topo.find_node(id1), node);
BOOST_REQUIRE_EQUAL(topo.find_node(ep1), nullptr);
BOOST_REQUIRE_EQUAL(topo.find_node(ep2), nullptr);
BOOST_REQUIRE_EQUAL(topo.find_node(ep3), nullptr);
BOOST_REQUIRE(topo.get_location(id1) == dc_rack3);
BOOST_REQUIRE(topo.get_location(ep2) == endpoint_dc_rack::default_location);
BOOST_REQUIRE(topo.get_location(ep3) == endpoint_dc_rack::default_location);
BOOST_REQUIRE_EQUAL(node->get_state(), locator::node::state::left);
}
SEASTAR_THREAD_TEST_CASE(test_add_or_update_by_host_id) {
auto id1 = host_id::create_random_id();
auto id2 = host_id::create_random_id();
auto ep1 = gms::inet_address("127.0.0.1");
// In this test we check that add_or_update_endpoint searches by host_id first.
// We create two nodes, one matches by id, another - by ip,
// and SCYLLA_ASSERT that add_or_update_endpoint updates the first.
// We need to make the second node 'being_decommissioned', so that
// it gets removed from ip index and we don't get the non-unique IP error.
auto topo = topology({
.this_host_id = id1,
.local_dc_rack = endpoint_dc_rack::default_location,
});
topo.add_or_update_endpoint(id1, gms::inet_address{}, endpoint_dc_rack::default_location, node::state::normal);
topo.add_node(id2, ep1, endpoint_dc_rack::default_location, node::state::being_decommissioned);
topo.add_or_update_endpoint(id1, ep1, std::nullopt, node::state::bootstrapping);
auto* n = topo.find_node(id1);
BOOST_REQUIRE_EQUAL(n->get_state(), node::state::bootstrapping);
BOOST_REQUIRE_EQUAL(n->host_id(), id1);
BOOST_REQUIRE_EQUAL(n->endpoint(), ep1);
auto* n2 = topo.find_node(ep1);
BOOST_REQUIRE_EQUAL(n, n2);
auto* n3 = topo.find_node(id2);
BOOST_REQUIRE_EQUAL(n3->get_state(), node::state::being_decommissioned);
BOOST_REQUIRE_EQUAL(n3->host_id(), id2);
BOOST_REQUIRE_EQUAL(n3->endpoint(), ep1);
}
SEASTAR_THREAD_TEST_CASE(test_remove_endpoint) {
using dc_endpoints_t = std::unordered_map<sstring, std::unordered_set<inet_address>>;
using dc_racks_t = std::unordered_map<sstring, std::unordered_map<sstring, std::unordered_set<inet_address>>>;
using dc_endpoints_t = std::unordered_map<sstring, std::unordered_set<locator::host_id>>;
using dc_racks_t = std::unordered_map<sstring, std::unordered_map<sstring, std::unordered_set<locator::host_id>>>;
using dcs_t = std::unordered_set<sstring>;
const auto id1 = host_id::create_random_id();
const auto ep1 = gms::inet_address("127.0.0.1");
const auto id2 = host_id::create_random_id();
const auto ep2 = gms::inet_address("127.0.0.2");
const auto dc_rack1 = endpoint_dc_rack {
.dc = "dc1",
.rack = "rack1"
@@ -238,22 +173,22 @@ SEASTAR_THREAD_TEST_CASE(test_remove_endpoint) {
auto topo = topology(cfg);
topo.add_or_update_endpoint(id1, ep1, dc_rack1, node::state::normal);
topo.add_node(id2, ep2, dc_rack2, node::state::normal);
topo.add_or_update_endpoint(id1, dc_rack1, node::state::normal);
topo.add_node(id2, dc_rack2, node::state::normal);
BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{{"dc1", {ep1, ep2}}}));
BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{{"dc1", {{"rack1", {ep1}}, {"rack2", {ep2}}}}}));
BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{{"dc1", {id1, id2}}}));
BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{{"dc1", {{"rack1", {id1}}, {"rack2", {id2}}}}}));
BOOST_REQUIRE_EQUAL(topo.get_datacenters(), (dcs_t{"dc1"}));
topo.remove_endpoint(id2);
BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{{"dc1", {ep1}}}));
BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{{"dc1", {{"rack1", {ep1}}}}}));
BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{{"dc1", {id1}}}));
BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{{"dc1", {{"rack1", {id1}}}}}));
BOOST_REQUIRE_EQUAL(topo.get_datacenters(), (dcs_t{"dc1"}));
// Local endpoint cannot be removed
topo.remove_endpoint(id1);
BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{{"dc1", {ep1}}}));
BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{{"dc1", {{"rack1", {ep1}}}}}));
BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{{"dc1", {id1}}}));
BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{{"dc1", {{"rack1", {id1}}}}}));
BOOST_REQUIRE_EQUAL(topo.get_datacenters(), (dcs_t{"dc1"}));
}
@@ -280,9 +215,6 @@ SEASTAR_THREAD_TEST_CASE(test_load_sketch) {
});
stm.mutate_token_metadata([&] (token_metadata& tm) {
tm.update_host_id(host1, ip1);
tm.update_host_id(host2, ip2);
tm.update_host_id(host3, ip3);
tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::normal, node1_shard_count);
tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, node2_shard_count);
tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::normal, node3_shard_count);
@@ -380,9 +312,7 @@ SEASTAR_THREAD_TEST_CASE(test_left_node_is_kept_outside_dc) {
auto id1 = host_id::create_random_id();
auto ep1 = gms::inet_address("127.0.0.1");
auto id2 = host_id::create_random_id();
auto ep2 = gms::inet_address("127.0.0.2");
auto id3 = host_id::create_random_id();
auto ep3 = gms::inet_address("127.0.0.3");
const auto dc_rack1 = endpoint_dc_rack {
.dc = "dc1",
@@ -403,8 +333,8 @@ SEASTAR_THREAD_TEST_CASE(test_left_node_is_kept_outside_dc) {
std::unordered_set<std::reference_wrapper<const locator::node>> nodes;
nodes.insert(std::cref(topo.add_node(id2, ep2, dc_rack1, node::state::normal)));
nodes.insert(std::cref(topo.add_node(id3, ep3, dc_rack1, node::state::left)));
nodes.insert(std::cref(topo.add_node(id2, dc_rack1, node::state::normal)));
nodes.insert(std::cref(topo.add_node(id3, dc_rack1, node::state::left)));
topo.for_each_node([&] (const locator::node& node) {
BOOST_REQUIRE(node.host_id() != id3);
@@ -417,7 +347,7 @@ SEASTAR_THREAD_TEST_CASE(test_left_node_is_kept_outside_dc) {
}
// left nodes are not members.
BOOST_REQUIRE(!topo.get_datacenter_endpoints().at(dc_rack1.dc).contains(ep3));
BOOST_REQUIRE(!topo.get_datacenter_endpoints().at(dc_rack1.dc).contains(id3));
BOOST_REQUIRE(topo.get_datacenter(id3) == dc_rack1.dc);
BOOST_REQUIRE(topo.get_rack(id3) == dc_rack1.rack);

View File

@@ -50,7 +50,7 @@ struct ring_point {
host_id id = host_id::create_random_id();
};
void print_natural_endpoints(double point, const inet_address_vector_replica_set v) {
void print_natural_endpoints(double point, const host_id_vector_replica_set v) {
testlog.debug("Natural endpoints for a token {}:", point);
std::string str;
std::ostringstream strm(str);
@@ -104,11 +104,11 @@ void strategy_sanity_check(
void endpoints_check(
replication_strategy_ptr ars_ptr,
const token_metadata_ptr& tm,
const inet_address_vector_replica_set& endpoints,
const host_id_vector_replica_set& endpoints,
const locator::topology& topo,
bool strict_dc_rf = false) {
auto&& nodes_per_dc = tm->get_datacenter_token_owners_ips();
auto&& nodes_per_dc = tm->get_datacenter_token_owners();
const network_topology_strategy* nts_ptr =
dynamic_cast<const network_topology_strategy*>(ars_ptr.get());
@@ -123,7 +123,7 @@ void endpoints_check(
BOOST_CHECK_LE(total_rf, ars_ptr->get_replication_factor(*tm));
// Check the uniqueness
std::unordered_set<inet_address> ep_set(endpoints.begin(), endpoints.end());
std::unordered_set<locator::host_id> ep_set(endpoints.begin(), endpoints.end());
BOOST_CHECK_EQUAL(endpoints.size(), ep_set.size());
// Check the per-DC RF
@@ -166,7 +166,7 @@ void full_ring_check(const std::vector<ring_point>& ring_points,
for (auto& rp : ring_points) {
double cur_point1 = rp.point - 0.5;
token t1(tests::d2t(cur_point1 / ring_points.size()));
auto endpoints1 = erm->get_natural_endpoints(t1);
auto endpoints1 = erm->get_natural_replicas(t1);
endpoints_check(ars_ptr, tmptr, endpoints1, topo);
@@ -179,7 +179,7 @@ void full_ring_check(const std::vector<ring_point>& ring_points,
//
double cur_point2 = rp.point - 0.2;
token t2(tests::d2t(cur_point2 / ring_points.size()));
auto endpoints2 = erm->get_natural_endpoints(t2);
auto endpoints2 = erm->get_natural_replicas(t2);
endpoints_check(ars_ptr, tmptr, endpoints2, topo);
check_ranges_are_sorted(erm, rp.id).get();
@@ -193,17 +193,17 @@ void full_ring_check(const tablet_map& tmap,
auto& tm = *tmptr;
const auto& topo = tm.get_topology();
auto to_endpoint_set = [&] (const tablet_replica_set& replicas) {
inet_address_vector_replica_set result;
auto to_replica_set = [&] (const tablet_replica_set& replicas) {
host_id_vector_replica_set result;
result.reserve(replicas.size());
for (auto&& replica : replicas) {
result.emplace_back(tm.get_endpoint_for_host_id(replica.host));
result.emplace_back(replica.host);
}
return result;
};
for (tablet_id tb : tmap.tablet_ids()) {
endpoints_check(rs_ptr, tmptr, to_endpoint_set(tmap.get_tablet_info(tb).replicas), topo, true);
endpoints_check(rs_ptr, tmptr, to_replica_set(tmap.get_tablet_info(tb).replicas), topo, true);
}
}
@@ -304,7 +304,7 @@ void simple_test() {
for (const auto& [ring_point, endpoint, id] : ring_points) {
std::unordered_set<token> tokens;
tokens.insert(token{tests::d2t(ring_point / ring_points.size())});
topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal);
topo.add_node(id, make_endpoint_dc_rack(endpoint), locator::node::state::normal);
co_await tm.update_normal_tokens(std::move(tokens), id);
}
}).get();
@@ -412,7 +412,7 @@ void heavy_origin_test() {
stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> {
auto& topo = tm.get_topology();
for (const auto& [ring_point, endpoint, id] : ring_points) {
topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal);
topo.add_node(id, make_endpoint_dc_rack(endpoint), locator::node::state::normal);
co_await tm.update_normal_tokens(tokens[endpoint], id);
}
}).get();
@@ -484,8 +484,7 @@ SEASTAR_THREAD_TEST_CASE(NetworkTopologyStrategy_tablets_test) {
for (const auto& [ring_point, endpoint, id] : ring_points) {
std::unordered_set<token> tokens;
tokens.insert(token{tests::d2t(ring_point / ring_points.size())});
topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal, shard_count);
tm.update_host_id(id, endpoint);
topo.add_node(id, make_endpoint_dc_rack(endpoint), locator::node::state::normal, shard_count);
co_await tm.update_normal_tokens(std::move(tokens), id);
}
}).get();
@@ -576,8 +575,7 @@ static void test_random_balancing(sharded<snitch_ptr>& snitch, gms::inet_address
for (const auto& [ring_point, endpoint, id] : ring_points) {
std::unordered_set<token> tokens;
tokens.insert(token{tests::d2t(ring_point / ring_points.size())});
topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal, shard_count);
tm.update_host_id(id, endpoint);
topo.add_node(id, make_endpoint_dc_rack(endpoint), locator::node::state::normal, shard_count);
co_await tm.update_normal_tokens(std::move(tokens), id);
}
}).get();
@@ -672,7 +670,7 @@ static size_t get_replication_factor(const sstring& dc,
static bool has_sufficient_replicas(const sstring& dc,
const std::unordered_map<sstring, std::unordered_set<host_id>>& dc_replicas,
const std::unordered_map<sstring, std::unordered_set<inet_address>>& all_endpoints,
const std::unordered_map<sstring, std::unordered_set<host_id>>& all_endpoints,
const std::unordered_map<sstring, size_t>& datacenters) noexcept {
auto dc_replicas_it = dc_replicas.find(dc);
if (dc_replicas_it == dc_replicas.end()) {
@@ -690,7 +688,7 @@ static bool has_sufficient_replicas(const sstring& dc,
static bool has_sufficient_replicas(
const std::unordered_map<sstring, std::unordered_set<host_id>>& dc_replicas,
const std::unordered_map<sstring, std::unordered_set<inet_address>>& all_endpoints,
const std::unordered_map<sstring, std::unordered_set<host_id>>& all_endpoints,
const std::unordered_map<sstring, size_t>& datacenters) noexcept {
for (auto& dc : datacenters | std::views::keys) {
@@ -741,16 +739,16 @@ static locator::host_id_set calculate_natural_endpoints(
// the token-owning members of a DC
//
const std::unordered_map<sstring,
std::unordered_set<inet_address>>
all_endpoints = tm.get_datacenter_token_owners_ips();
std::unordered_set<locator::host_id>>
all_endpoints = tm.get_datacenter_token_owners();
//
// all racks (with non-token owners filtered out) in a DC so we can check
// when we have exhausted all racks in a DC
//
const std::unordered_map<sstring,
std::unordered_map<sstring,
std::unordered_set<inet_address>>>
racks = tm.get_datacenter_racks_token_owners_ips();
std::unordered_set<host_id>>>
racks = tm.get_datacenter_racks_token_owners();
// not aware of any cluster members
SCYLLA_ASSERT(!all_endpoints.empty() && !racks.empty());
@@ -865,12 +863,11 @@ void generate_topology(topology& topo, const std::unordered_map<sstring, size_t>
out = std::fill_n(out, rf, std::cref(dc));
}
unsigned i = 0;
for (auto& node : nodes) {
const sstring& dc = dcs[udist(0, dcs.size() - 1)(e1)];
auto rc = racks_per_dc.at(dc);
auto r = udist(0, rc)(e1);
topo.add_or_update_endpoint(node, inet_address((127u << 24) | ++i), endpoint_dc_rack{dc, to_sstring(r)}, locator::node::state::normal);
topo.add_or_update_endpoint(node, endpoint_dc_rack{dc, to_sstring(r)}, locator::node::state::normal);
}
}
@@ -1116,7 +1113,6 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) {
auto host1 = host_id(utils::make_random_uuid());
auto host2 = host_id(utils::make_random_uuid());
auto host3 = host_id(utils::make_random_uuid());
auto ip1_dc_rack = endpoint_dc_rack{ "dc1", "rack_ip1" };
auto ip1_dc_rack_v2 = endpoint_dc_rack{ "dc1", "rack_ip1_v2" };
@@ -1135,8 +1131,6 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) {
BOOST_REQUIRE(stm.get()->get_topology().get_location() == ip1_dc_rack);
stm.mutate_token_metadata([&] (token_metadata& tm) {
tm.update_host_id(host2, ip2);
tm.update_host_id(host1, ip1); // this_node added last on purpose
// Need to move to non left or none state in order to be indexed by ip
tm.update_topology(host1, {}, locator::node::state::normal);
tm.update_topology(host2, {}, locator::node::state::normal);
@@ -1145,35 +1139,26 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) {
const node* n1 = stm.get()->get_topology().find_node(host1);
BOOST_REQUIRE(n1);
n1 = stm.get()->get_topology().find_node(ip1);
BOOST_REQUIRE(n1);
BOOST_REQUIRE(bool(n1->is_this_node()));
BOOST_REQUIRE_EQUAL(n1->host_id(), host1);
BOOST_REQUIRE_EQUAL(n1->endpoint(), ip1);
BOOST_REQUIRE(n1->dc_rack() == ip1_dc_rack);
BOOST_REQUIRE(stm.get()->get_topology().get_location() == ip1_dc_rack);
const node* n2 = stm.get()->get_topology().find_node(host2);
BOOST_REQUIRE(n2);
n2 = stm.get()->get_topology().find_node(ip2);
BOOST_REQUIRE(n2);
BOOST_REQUIRE(!bool(n2->is_this_node()));
BOOST_REQUIRE_EQUAL(n2->host_id(), host2);
BOOST_REQUIRE_EQUAL(n2->endpoint(), ip2);
BOOST_REQUIRE(n2->dc_rack() == endpoint_dc_rack::default_location);
// Local node cannot be removed
stm.mutate_token_metadata([&] (token_metadata& tm) {
tm.remove_endpoint(host1);
tm.update_host_id(host3, ip3);
return make_ready_future<>();
}).get();
n1 = stm.get()->get_topology().find_node(host1);
BOOST_REQUIRE(n1);
n1 = stm.get()->get_topology().find_node(ip1);
BOOST_REQUIRE(n1);
// Removing node with no local node
@@ -1184,22 +1169,19 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) {
n2 = stm.get()->get_topology().find_node(host2);
BOOST_REQUIRE(!n2);
n2 = stm.get()->get_topology().find_node(ip2);
BOOST_REQUIRE(!n2);
// Repopulate after clear_gently()
stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> {
co_await tm.clear_gently();
tm.update_host_id(host2, ip2);
tm.update_host_id(host1, ip1); // this_node added last on purpose
tm.update_topology(host2, std::nullopt, std::nullopt);
tm.update_topology(host1, std::nullopt, std::nullopt); // this_node added last on purpose
}).get();
n1 = stm.get()->get_topology().find_node(host1);
BOOST_REQUIRE(n1);
BOOST_REQUIRE(bool(n1->is_this_node()));
BOOST_REQUIRE_EQUAL(n1->host_id(), host1);
BOOST_REQUIRE_EQUAL(n1->endpoint(), ip1);
BOOST_REQUIRE(n1->dc_rack() == ip1_dc_rack);
BOOST_REQUIRE(stm.get()->get_topology().get_location() == ip1_dc_rack);
@@ -1207,21 +1189,19 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) {
BOOST_REQUIRE(n2);
BOOST_REQUIRE(!bool(n2->is_this_node()));
BOOST_REQUIRE_EQUAL(n2->host_id(), host2);
BOOST_REQUIRE_EQUAL(n2->endpoint(), ip2);
BOOST_REQUIRE(n2->dc_rack() == endpoint_dc_rack::default_location);
// get_location() should pick up endpoint_dc_rack from node info
stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> {
co_await tm.clear_gently();
tm.get_topology().add_or_update_endpoint(host1, ip1, ip1_dc_rack_v2, node::state::being_decommissioned);
tm.get_topology().add_or_update_endpoint(host1, ip1_dc_rack_v2, node::state::being_decommissioned);
}).get();
n1 = stm.get()->get_topology().find_node(host1);
BOOST_REQUIRE(n1);
BOOST_REQUIRE(bool(n1->is_this_node()));
BOOST_REQUIRE_EQUAL(n1->host_id(), host1);
BOOST_REQUIRE_EQUAL(n1->endpoint(), ip1);
BOOST_REQUIRE(n1->dc_rack() == ip1_dc_rack_v2);
BOOST_REQUIRE(stm.get()->get_topology().get_location() == ip1_dc_rack_v2);
}

View File

@@ -773,9 +773,6 @@ SEASTAR_TEST_CASE(test_get_shard) {
tablet_id tid1(0);
stm.mutate_token_metadata([&] (token_metadata& tm) {
tm.update_host_id(h1, ip1);
tm.update_host_id(h2, ip2);
tm.update_host_id(h3, ip3);
tm.update_topology(h1, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
tm.update_topology(h2, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
tm.update_topology(h3, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
@@ -1020,7 +1017,7 @@ SEASTAR_TEST_CASE(test_sharder) {
auto table1 = table_id(utils::UUID_gen::get_time_UUID());
token_metadata tokm(token_metadata::config{ .topo_cfg{ .this_host_id = h1, .local_dc_rack = locator::endpoint_dc_rack::default_location } });
tokm.get_topology().add_or_update_endpoint(h1, tokm.get_topology().my_address());
tokm.get_topology().add_or_update_endpoint(h1);
std::vector<tablet_id> tablet_ids;
{
@@ -1235,7 +1232,7 @@ SEASTAR_TEST_CASE(test_intranode_sharding) {
auto table1 = table_id(utils::UUID_gen::get_time_UUID());
token_metadata tokm(token_metadata::config{ .topo_cfg{ .this_host_id = h1, .local_dc_rack = locator::endpoint_dc_rack::default_location } });
tokm.get_topology().add_or_update_endpoint(h1, tokm.get_topology().my_address());
tokm.get_topology().add_or_update_endpoint(h1);
auto leaving_replica = tablet_replica{h1, 5};
auto pending_replica = tablet_replica{h1, 7};
@@ -1515,9 +1512,6 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_empty_node) {
});
stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> {
tm.update_host_id(host1, ip1);
tm.update_host_id(host2, ip2);
tm.update_host_id(host3, ip3);
tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
@@ -1616,9 +1610,6 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_skiplist) {
});
stm.mutate_token_metadata([&] (token_metadata& tm) {
tm.update_host_id(host1, ip1);
tm.update_host_id(host2, ip2);
tm.update_host_id(host3, ip3);
tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
@@ -1707,9 +1698,6 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rf_met) {
stm.mutate_token_metadata([&](token_metadata& tm) -> future<> {
const unsigned shard_count = 2;
tm.update_host_id(host1, ip1);
tm.update_host_id(host2, ip2);
tm.update_host_id(host3, ip3);
tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::being_decommissioned,
@@ -1807,10 +1795,6 @@ SEASTAR_THREAD_TEST_CASE(test_table_creation_during_decommission) {
const unsigned shard_count = 1;
stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> {
tm.update_host_id(host1, ip1);
tm.update_host_id(host2, ip2);
tm.update_host_id(host3, ip3);
tm.update_host_id(host4, ip4);
tm.update_topology(host1, dcrack, node::state::normal, shard_count);
tm.update_topology(host2, dcrack, node::state::normal, shard_count);
tm.update_topology(host3, dcrack, node::state::being_decommissioned, shard_count);
@@ -1877,10 +1861,6 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_two_racks) {
stm.mutate_token_metadata([&](token_metadata& tm) -> future<> {
const unsigned shard_count = 1;
tm.update_host_id(host1, ip1);
tm.update_host_id(host2, ip2);
tm.update_host_id(host3, ip3);
tm.update_host_id(host4, ip4);
tm.update_topology(host1, racks[0], node::state::normal, shard_count);
tm.update_topology(host2, racks[1], node::state::normal, shard_count);
tm.update_topology(host3, racks[0], node::state::normal, shard_count);
@@ -1984,10 +1964,6 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rack_load_failure) {
stm.mutate_token_metadata([&](token_metadata& tm) -> future<> {
const unsigned shard_count = 1;
tm.update_host_id(host1, ip1);
tm.update_host_id(host2, ip2);
tm.update_host_id(host3, ip3);
tm.update_host_id(host4, ip4);
tm.update_topology(host1, racks[0], node::state::normal, shard_count);
tm.update_topology(host2, racks[0], node::state::normal, shard_count);
tm.update_topology(host3, racks[0], node::state::normal, shard_count);
@@ -2063,9 +2039,6 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rf_not_met) {
stm.mutate_token_metadata([&](token_metadata& tm) -> future<> {
const unsigned shard_count = 2;
tm.update_host_id(host1, ip1);
tm.update_host_id(host2, ip2);
tm.update_host_id(host3, ip3);
tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::being_decommissioned,
@@ -2122,9 +2095,6 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_works_with_in_progress_transitions)
});
stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> {
tm.update_host_id(host1, ip1);
tm.update_host_id(host2, ip2);
tm.update_host_id(host3, ip3);
tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::normal, 1);
tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, 1);
tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::normal, 2);
@@ -2196,9 +2166,6 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancer_shuffle_mode) {
});
stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> {
tm.update_host_id(host1, ip1);
tm.update_host_id(host2, ip2);
tm.update_host_id(host3, ip3);
tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::normal, 1);
tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, 1);
tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::normal, 2);
@@ -2263,10 +2230,6 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_two_empty_nodes) {
});
stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> {
tm.update_host_id(host1, ip1);
tm.update_host_id(host2, ip2);
tm.update_host_id(host3, ip3);
tm.update_host_id(host4, ip4);
tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
@@ -2328,9 +2291,6 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_asymmetric_node_capacity) {
});
stm.mutate_token_metadata([&](token_metadata& tm) -> future<> {
tm.update_host_id(host1, ip1);
tm.update_host_id(host2, ip2);
tm.update_host_id(host3, ip3);
tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::being_decommissioned, 8);
tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, 1);
tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::normal, 7);
@@ -2395,8 +2355,6 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancer_disabling) {
// host1 is loaded and host2 is empty, resulting in an imbalance.
// host1's shard 0 is loaded and shard 1 is empty, resulting in intra-node imbalance.
stm.mutate_token_metadata([&] (auto& tm) -> future<> {
tm.update_host_id(host1, ip1);
tm.update_host_id(host2, ip2);
tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
co_await tm.update_normal_tokens(std::unordered_set{token(tests::d2t(1. / 2))}, host1);
@@ -2487,8 +2445,6 @@ SEASTAR_THREAD_TEST_CASE(test_drained_node_is_not_balanced_internally) {
});
stm.mutate_token_metadata([&] (locator::token_metadata& tm) -> future<> {
tm.update_host_id(host1, ip1);
tm.update_host_id(host2, ip2);
tm.update_topology(host1, locator::endpoint_dc_rack::default_location, locator::node::state::being_removed, shard_count);
tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
co_await tm.update_normal_tokens(std::unordered_set{token(tests::d2t(1. / 2))}, host1);
@@ -2537,7 +2493,6 @@ SEASTAR_THREAD_TEST_CASE(test_plan_fails_when_removing_last_replica) {
});
stm.mutate_token_metadata([&] (locator::token_metadata& tm) -> future<> {
tm.update_host_id(host1, ip1);
tm.update_topology(host1, locator::endpoint_dc_rack::default_location, locator::node::state::being_removed, shard_count);
co_await tm.update_normal_tokens(std::unordered_set{token(tests::d2t(1. / 2))}, host1);
@@ -2589,9 +2544,6 @@ SEASTAR_THREAD_TEST_CASE(test_skiplist_is_ignored_when_draining) {
});
stm.mutate_token_metadata([&] (locator::token_metadata& tm) -> future<> {
tm.update_host_id(host1, ip1);
tm.update_host_id(host2, ip2);
tm.update_host_id(host3, ip3);
tm.update_topology(host1, locator::endpoint_dc_rack::default_location, locator::node::state::being_removed, shard_count);
tm.update_topology(host2, locator::endpoint_dc_rack::default_location, locator::node::state::normal, shard_count);
tm.update_topology(host3, locator::endpoint_dc_rack::default_location, locator::node::state::normal, shard_count);
@@ -2699,10 +2651,8 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_random_load) {
int i = 0;
for (auto h : hosts) {
auto ip = inet_address(format("192.168.0.{}", ++i));
auto shard_count = 2;
tm.update_host_id(h, ip);
auto rack = racks[i % racks.size()];
auto rack = racks[++i % racks.size()];
tm.update_topology(h, rack, node::state::normal, shard_count);
if (h != hosts[0]) {
// Leave the first host empty by making it invisible to allocation algorithm.
@@ -2871,13 +2821,11 @@ static void do_test_load_balancing_merge_colocation(cql_test_env& e, const int n
int i = 0;
for (auto h : hosts) {
auto ip = inet_address(format("192.168.0.{}", ++i));
tm.update_host_id(h, ip);
auto rack = racks[i % racks.size()];
auto rack = racks[++i % racks.size()];
hosts_by_rack[rack.rack].push_back(h);
tm.update_topology(h, rack, node::state::normal, shard_count);
co_await tm.update_normal_tokens(std::unordered_set{token(tests::d2t(float(i) / hosts.size()))}, h);
testlog.debug("adding host {}, ip {}, rack {}, token {}", h, ip, rack.rack, token(tests::d2t(1. / hosts.size())));
testlog.debug("adding host {}, rack {}, token {}", h, rack.rack, token(tests::d2t(1. / hosts.size())));
}
tablet_map tmap(initial_tablets);
@@ -3056,8 +3004,6 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_resize_requests) {
});
stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> {
tm.update_host_id(host1, ip1);
tm.update_host_id(host2, ip2);
tm.update_topology(host1, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
tm.update_topology(host2, locator::endpoint_dc_rack::default_location, node::state::normal, shard_count);
co_await tm.update_normal_tokens(std::unordered_set{token(tests::d2t(1. / 2))}, host1);
@@ -3340,8 +3286,7 @@ static void execute_tablet_for_new_rf_test(calculate_tablet_replicas_for_new_rf_
for (const auto& [ring_point, endpoint, id] : test_config.ring_points) {
std::unordered_set<token> tokens;
tokens.insert(dht::token{tests::d2t(ring_point / test_config.ring_points.size())});
topo.add_or_update_endpoint(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal, 1);
tm.update_host_id(id, endpoint);
topo.add_or_update_endpoint(id, make_endpoint_dc_rack(endpoint), locator::node::state::normal, 1);
co_await tm.update_normal_tokens(std::move(tokens), id);
}
}).get();
@@ -3362,7 +3307,6 @@ static void execute_tablet_for_new_rf_test(calculate_tablet_replicas_for_new_rf_
stm.mutate_token_metadata([&] (token_metadata& tm) {
for (size_t i = 0; i < test_config.ring_points.size(); ++i) {
auto& [ring_point, endpoint, id] = test_config.ring_points[i];
tm.update_host_id(id, endpoint);
tm.update_topology(id, make_endpoint_dc_rack(endpoint), node::state::normal, nodes_shard_count[i]);
}
return make_ready_future<>();

View File

@@ -50,16 +50,12 @@ namespace {
}
SEASTAR_THREAD_TEST_CASE(test_pending_and_read_endpoints_for_everywhere_strategy) {
const auto e1 = inet_address("192.168.0.1");
const auto e2 = inet_address("192.168.0.2");
const auto e1_id = gen_id(1);
const auto e2_id = gen_id(2);
const auto t1 = dht::token::from_int64(10);
const auto t2 = dht::token::from_int64(20);
auto token_metadata = create_token_metadata(e1_id);
token_metadata->update_host_id(e1_id, e1);
token_metadata->update_host_id(e2_id, e2);
token_metadata->update_topology(e1_id, get_dc_rack(e1_id), node::state::normal);
token_metadata->update_topology(e2_id, get_dc_rack(e2_id), node::state::normal);
token_metadata->update_normal_tokens({t1}, e1_id).get();
@@ -74,16 +70,12 @@ SEASTAR_THREAD_TEST_CASE(test_pending_and_read_endpoints_for_everywhere_strategy
}
SEASTAR_THREAD_TEST_CASE(test_pending_endpoints_for_bootstrap_second_node) {
const auto e1 = inet_address("192.168.0.1");
const auto t1 = dht::token::from_int64(1);
const auto e2 = inet_address("192.168.0.2");
const auto t2 = dht::token::from_int64(100);
const auto e1_id = gen_id(1);
const auto e2_id = gen_id(2);
auto token_metadata = create_token_metadata(e1_id);
token_metadata->update_host_id(e1_id, e1);
token_metadata->update_host_id(e2_id, e2);
token_metadata->update_topology(e1_id, get_dc_rack(e1_id), node::state::normal);
token_metadata->update_topology(e2_id, get_dc_rack(e2_id), node::state::normal);
token_metadata->update_normal_tokens({t1}, e1_id).get();
@@ -107,17 +99,11 @@ SEASTAR_THREAD_TEST_CASE(test_pending_endpoints_for_bootstrap_with_replicas) {
const auto t10 = dht::token::from_int64(10);
const auto t100 = dht::token::from_int64(100);
const auto t1000 = dht::token::from_int64(1000);
const auto e1 = inet_address("192.168.0.1");
const auto e2 = inet_address("192.168.0.2");
const auto e3 = inet_address("192.168.0.3");
const auto e1_id = gen_id(1);
const auto e2_id = gen_id(2);
const auto e3_id = gen_id(3);
auto token_metadata = create_token_metadata(e1_id);
token_metadata->update_host_id(e1_id, e1);
token_metadata->update_host_id(e2_id, e2);
token_metadata->update_host_id(e3_id, e3);
token_metadata->update_topology(e1_id, get_dc_rack(e1_id), node::state::normal);
token_metadata->update_topology(e2_id, get_dc_rack(e2_id), node::state::normal);
token_metadata->update_topology(e3_id, get_dc_rack(e3_id), node::state::normal);
@@ -143,17 +129,11 @@ SEASTAR_THREAD_TEST_CASE(test_pending_endpoints_for_leave_with_replicas) {
const auto t10 = dht::token::from_int64(10);
const auto t100 = dht::token::from_int64(100);
const auto t1000 = dht::token::from_int64(1000);
const auto e1 = inet_address("192.168.0.1");
const auto e2 = inet_address("192.168.0.2");
const auto e3 = inet_address("192.168.0.3");
const auto e1_id = gen_id(1);
const auto e2_id = gen_id(2);
const auto e3_id = gen_id(3);
auto token_metadata = create_token_metadata(e1_id);
token_metadata->update_host_id(e1_id, e1);
token_metadata->update_host_id(e2_id, e2);
token_metadata->update_host_id(e3_id, e3);
token_metadata->update_topology(e1_id, get_dc_rack(e1_id), node::state::normal);
token_metadata->update_topology(e2_id, get_dc_rack(e2_id), node::state::normal);
token_metadata->update_topology(e3_id, get_dc_rack(e3_id), node::state::normal);
@@ -180,20 +160,12 @@ SEASTAR_THREAD_TEST_CASE(test_pending_endpoints_for_replace_with_replicas) {
const auto t10 = dht::token::from_int64(10);
const auto t100 = dht::token::from_int64(100);
const auto t1000 = dht::token::from_int64(1000);
const auto e1 = inet_address("192.168.0.1");
const auto e2 = inet_address("192.168.0.2");
const auto e3 = inet_address("192.168.0.3");
const auto e4 = inet_address("192.168.0.4");
const auto e1_id = gen_id(1);
const auto e2_id = gen_id(2);
const auto e3_id = gen_id(3);
const auto e4_id = gen_id(4);
auto token_metadata = create_token_metadata(e1_id);
token_metadata->update_host_id(e1_id, e1);
token_metadata->update_host_id(e2_id, e2);
token_metadata->update_host_id(e3_id, e3);
token_metadata->update_host_id(e4_id, e4);
token_metadata->update_topology(e1_id, get_dc_rack(e1_id), node::state::normal);
token_metadata->update_topology(e2_id, get_dc_rack(e2_id), node::state::normal);
token_metadata->update_topology(e3_id, get_dc_rack(e3_id), node::state::normal);
@@ -225,17 +197,11 @@ SEASTAR_THREAD_TEST_CASE(test_endpoints_for_reading_when_bootstrap_with_replicas
const auto t10 = dht::token::from_int64(10);
const auto t100 = dht::token::from_int64(100);
const auto t1000 = dht::token::from_int64(1000);
const auto e1 = inet_address("192.168.0.1");
const auto e2 = inet_address("192.168.0.2");
const auto e3 = inet_address("192.168.0.3");
const auto e1_id = gen_id(1);
const auto e2_id = gen_id(2);
const auto e3_id = gen_id(3);
auto token_metadata = create_token_metadata(e1_id);
token_metadata->update_host_id(e1_id, e1);
token_metadata->update_host_id(e2_id, e2);
token_metadata->update_host_id(e3_id, e3);
token_metadata->update_topology(e1_id, get_dc_rack(e1_id), node::state::normal);
token_metadata->update_topology(e2_id, get_dc_rack(e2_id), node::state::normal);
token_metadata->update_topology(e3_id, get_dc_rack(e3_id), node::state::normal);
@@ -285,22 +251,18 @@ SEASTAR_THREAD_TEST_CASE(test_endpoints_for_reading_when_bootstrap_with_replicas
SEASTAR_THREAD_TEST_CASE(test_replace_node_with_same_endpoint) {
const auto t1 = dht::token::from_int64(1);
const auto e1 = inet_address("192.168.0.1");
const auto e1_id1 = gen_id(1);
const auto e1_id2 = gen_id(2);
auto token_metadata = create_token_metadata(e1_id2);
token_metadata->update_host_id(e1_id1, e1);
token_metadata->update_topology(e1_id1, get_dc_rack(e1_id1), node::state::being_replaced);
token_metadata->update_normal_tokens({t1}, e1_id1).get();
token_metadata->update_topology(e1_id2, get_dc_rack(e1_id2), node::state::replacing);
token_metadata->update_host_id(e1_id2, e1);
token_metadata->add_replacing_endpoint(e1_id1, e1_id2);
auto erm = create_erm<simple_strategy>(token_metadata, {{"replication_factor", "2"}});
BOOST_REQUIRE_EQUAL(token_metadata->get_host_id(e1), e1_id1);
BOOST_REQUIRE_EQUAL(erm->get_pending_replicas(dht::token::from_int64(1)),
host_id_vector_topology_change{e1_id2});
BOOST_REQUIRE_EQUAL(erm->get_natural_replicas(dht::token::from_int64(1)),

View File

@@ -698,11 +698,10 @@ private:
host_id = linfo.host_id;
_sys_ks.local().save_local_info(std::move(linfo), _snitch.local()->get_location(), my_address, my_address).get();
}
locator::shared_token_metadata::mutate_on_all_shards(_token_metadata, [hostid = host_id, &cfg_in] (locator::token_metadata& tm) {
locator::shared_token_metadata::mutate_on_all_shards(_token_metadata, [hostid = host_id] (locator::token_metadata& tm) {
auto& topo = tm.get_topology();
topo.set_host_id_cfg(hostid);
topo.add_or_update_endpoint(hostid,
cfg_in.broadcast_address,
std::nullopt,
locator::node::state::normal,
smp::count);
@@ -755,7 +754,7 @@ private:
// Once the seastar issue is fixed, we can just keep the tmp socket aliva across
// the listen invoke below.
tmp = {};
_ms.invoke_on_all(&netw::messaging_service::start_listen, std::ref(_token_metadata)).get();
_ms.invoke_on_all(&netw::messaging_service::start_listen, std::ref(_token_metadata), [host_id] (gms::inet_address ip) {return host_id; }).get();
}
} catch (std::system_error& e) {
// if we still hit a used port (quick other process), just shut down ms and try again.

View File

@@ -224,7 +224,7 @@ int main(int ac, char ** av) {
auto deinit_testers = deferred_action([&testers] {
testers.invoke_on_all(&tester::deinit_handler).get();
});
messaging.invoke_on_all(&netw::messaging_service::start_listen, std::ref(token_metadata)).get();
messaging.invoke_on_all(&netw::messaging_service::start_listen, std::ref(token_metadata), [] (gms::inet_address ip){ return locator::host_id{}; }).get();
if (config.contains("server")) {
auto ip = config["server"].as<std::string>();
auto cpuid = config["cpuid"].as<uint32_t>();

View File

@@ -53,7 +53,6 @@ struct sort_by_proximity_topology {
auto id = locator::host_id{utils::UUID(0, i)};
nodes[dc][rack].emplace_back(id);
topology.add_or_update_endpoint(id,
gms::inet_address((127u << 24) | i),
locator::endpoint_dc_rack{format("dc{}", dc), format("rack{}", rack)},
locator::node::state::normal);
}

View File

@@ -243,7 +243,6 @@ future<results> test_load_balancing_with_many_tables(params p, bool tablet_aware
};
auto add_host_to_topology = [&] (token_metadata& tm, int i) -> future<> {
tm.update_host_id(hosts[i], ips[i]);
tm.update_topology(hosts[i], rack1, node::state::normal, shard_count);
co_await tm.update_normal_tokens(std::unordered_set{token(tests::d2t(float(i) / hosts.size()))}, hosts[i]);
};