From f21f23483c5bf5aeb0411c5682003d3ebdfda594 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Wed, 6 Dec 2023 19:41:16 +0400 Subject: [PATCH 01/51] token_metadata: drop unused method get_endpoint_to_token_map_for_reading --- locator/token_metadata.cc | 15 --------------- locator/token_metadata.hh | 2 -- 2 files changed, 17 deletions(-) diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index 5d883e6f1f..a75a23524d 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -268,8 +268,6 @@ public: } public: - /** @return an endpoint to token multimap representation of tokenToEndpointMap (a copy) */ - std::multimap get_endpoint_to_token_map_for_reading() const; /** * @return a (stable copy, won't be modified) Token to Endpoint map for all the normal and bootstrapping nodes * in the cluster. @@ -846,14 +844,6 @@ std::map token_metadata_impl::get_normal_and_bootstrapping_ return ret; } -std::multimap token_metadata_impl::get_endpoint_to_token_map_for_reading() const { - std::multimap cloned; - for (const auto& x : _token_to_endpoint_map) { - cloned.emplace(x.second, x.first); - } - return cloned; -} - topology_change_info::topology_change_info(token_metadata target_token_metadata_, std::optional base_token_metadata_, std::vector all_tokens_, @@ -1154,11 +1144,6 @@ token_metadata::set_read_new(read_new_t read_new) { _impl->set_read_new(read_new); } -std::multimap -token_metadata::get_endpoint_to_token_map_for_reading() const { - return _impl->get_endpoint_to_token_map_for_reading(); -} - std::map token_metadata::get_normal_and_bootstrapping_token_to_endpoint_map() const { return _impl->get_normal_and_bootstrapping_token_to_endpoint_map(); diff --git a/locator/token_metadata.hh b/locator/token_metadata.hh index 66122cc34a..14e5da5a43 100644 --- a/locator/token_metadata.hh +++ b/locator/token_metadata.hh @@ -271,8 +271,6 @@ public: using read_new_t = bool_class; void set_read_new(read_new_t value); - /** @return an endpoint to token multimap representation of tokenToEndpointMap (a copy) */ - std::multimap get_endpoint_to_token_map_for_reading() const; /** * @return a (stable copy, won't be modified) Token to Endpoint map for all the normal and bootstrapping nodes * in the cluster. From 2f137776c3ccbff480798d2f3f56fabcb2ddc75c Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Thu, 23 Nov 2023 14:40:18 +0400 Subject: [PATCH 02/51] token_metadata: topology_change_info: change field types to token_metadata_ptr In subsequent commits we'll need the following api for token_metadata: token_metadata(token_metadata2_ptr); get_new() -> token_metadata2* where token_metadata2 is the new version of token_metadata, based on host_id. In other words: * token_metadata knows the new version of itself and returns a pointer to it through get_new() * token_metadata can be constructed based solely on the new version, without its own implementation. In this case the only method we can use on it is get_new. This allows to pass token_metadata2 to API's with token_metadata in method signature, if these APIs are known to only use the get_new method on the passed token_metadata. And back to topology_change_info - if we got it from the new token_metadata we want to be able to construct token_metadata from token_metadata2 contained in it, and this requires it to be a ptr, not value. --- locator/abstract_replication_strategy.cc | 2 +- locator/token_metadata.cc | 12 ++++++------ locator/token_metadata.hh | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/locator/abstract_replication_strategy.cc b/locator/abstract_replication_strategy.cc index 39621b48cb..fbdb579977 100644 --- a/locator/abstract_replication_strategy.cc +++ b/locator/abstract_replication_strategy.cc @@ -373,7 +373,7 @@ future calculate_effective_replicat const auto token = all_tokens[i]; auto current_endpoints = co_await rs->calculate_natural_endpoints(token, base_token_metadata); - auto target_endpoints = co_await rs->calculate_natural_endpoints(token, topology_changes->target_token_metadata); + auto target_endpoints = co_await rs->calculate_natural_endpoints(token, *topology_changes->target_token_metadata); auto add_mapping = [&](ring_mapping& target, std::unordered_set&& endpoints) { using interval = ring_mapping::interval_type; diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index a75a23524d..ef9ca6559f 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -796,8 +796,8 @@ future<> token_metadata_impl::update_topology_change_info(dc_rack_fn& get_dc_rac std::sort(begin(all_tokens), end(all_tokens)); auto prev_value = std::move(_topology_change_info); - _topology_change_info.emplace(token_metadata(std::move(target_token_metadata)), - base_token_metadata ? std::optional(token_metadata(std::move(base_token_metadata))): std::nullopt, + _topology_change_info.emplace(make_token_metadata_ptr(std::move(target_token_metadata)), + base_token_metadata ? make_token_metadata_ptr(std::move(base_token_metadata)): nullptr, std::move(all_tokens), _read_new); co_await utils::clear_gently(prev_value); @@ -844,10 +844,10 @@ std::map token_metadata_impl::get_normal_and_bootstrapping_ return ret; } -topology_change_info::topology_change_info(token_metadata target_token_metadata_, - std::optional base_token_metadata_, - std::vector all_tokens_, - token_metadata::read_new_t read_new_) +topology_change_info::topology_change_info(token_metadata_ptr target_token_metadata_, + token_metadata_ptr base_token_metadata_, + std::vector all_tokens_, + token_metadata::read_new_t read_new_) : target_token_metadata(std::move(target_token_metadata_)) , base_token_metadata(std::move(base_token_metadata_)) , all_tokens(std::move(all_tokens_)) diff --git a/locator/token_metadata.hh b/locator/token_metadata.hh index 14e5da5a43..aee774cf70 100644 --- a/locator/token_metadata.hh +++ b/locator/token_metadata.hh @@ -290,13 +290,13 @@ private: }; struct topology_change_info { - token_metadata target_token_metadata; - std::optional base_token_metadata; + token_metadata_ptr target_token_metadata; + token_metadata_ptr base_token_metadata; std::vector all_tokens; token_metadata::read_new_t read_new; - topology_change_info(token_metadata target_token_metadata_, - std::optional base_token_metadata_, + topology_change_info(token_metadata_ptr target_token_metadata_, + token_metadata_ptr base_token_metadata_, std::vector all_tokens_, token_metadata::read_new_t read_new_); future<> clear_gently(); From 5227b71363d5025c2635f8246285f4455f76e999 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Wed, 4 Oct 2023 15:50:49 +0200 Subject: [PATCH 03/51] locator/topology: add key_kind parameter For the host_id-based token_metadata we want host_id to be the main node key, meaning it should be used in add_or_update_endpoint to find the node to update. For the inet_address-based token_metadata version we want to retain the old behaviour during transition period. In this commit we introduce key_kind parameter and use key_kind::inet_address in all current topology usages. Later we'll use key_kind::host_id for the new token_metadata. In the last commits of the series, when the new token_metadata version is used everywhere, we will remove key_kind enum. --- locator/token_metadata.cc | 4 +-- locator/topology.cc | 51 +++++++++++++++++++++-------- locator/topology.hh | 11 +++++-- test/boost/locator_topology_test.cc | 40 +++++++++++++++++++--- 4 files changed, 84 insertions(+), 22 deletions(-) diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index ef9ca6559f..9889f90cc3 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -94,9 +94,9 @@ private: struct shallow_copy {}; public: token_metadata_impl(shallow_copy, const token_metadata_impl& o) noexcept - : _topology(topology::config{}) + : _topology(topology::config{}, topology::key_kind::inet_address) {} - token_metadata_impl(token_metadata::config cfg) noexcept : _topology(std::move(cfg.topo_cfg)) {}; + token_metadata_impl(token_metadata::config cfg) noexcept : _topology(std::move(cfg.topo_cfg), topology::key_kind::inet_address) {}; token_metadata_impl(const token_metadata_impl&) = delete; // it's too huge for direct copy, use clone_async() token_metadata_impl(token_metadata_impl&&) noexcept = default; const std::vector& sorted_tokens() const; diff --git a/locator/topology.cc b/locator/topology.cc index 39be2acc96..b23a6c238f 100644 --- a/locator/topology.cc +++ b/locator/topology.cc @@ -70,10 +70,11 @@ future<> topology::clear_gently() noexcept { co_await utils::clear_gently(_nodes); } -topology::topology(config cfg) +topology::topology(config cfg, key_kind k) : _shard(this_shard_id()) , _cfg(cfg) , _sort_by_proximity(!cfg.disable_proximity_sorting) + , _key_kind(k) { tlogger.trace("topology[{}]: constructing using config: endpoint={} dc={} rack={}", fmt::ptr(this), cfg.this_endpoint, cfg.local_dc_rack.dc, cfg.local_dc_rack.rack); @@ -92,6 +93,7 @@ topology::topology(topology&& o) noexcept , _dc_racks(std::move(o._dc_racks)) , _sort_by_proximity(o._sort_by_proximity) , _datacenters(std::move(o._datacenters)) + , _key_kind(o._key_kind) { assert(_shard == this_shard_id()); tlogger.trace("topology[{}]: move from [{}]", fmt::ptr(this), fmt::ptr(&o)); @@ -112,7 +114,7 @@ topology& topology::operator=(topology&& o) noexcept { } future topology::clone_gently() const { - topology ret(_cfg); + topology ret(_cfg, _key_kind); tlogger.debug("topology[{}]: clone_gently to {} from shard {}", fmt::ptr(this), fmt::ptr(&ret), _shard); for (const auto& nptr : _nodes) { if (nptr) { @@ -437,24 +439,45 @@ const node* topology::find_node(node::idx_type idx) const noexcept { return _nodes.at(idx).get(); } -const node* topology::add_or_update_endpoint(inet_address ep, std::optional opt_id, std::optional opt_dr, std::optional opt_st, std::optional shard_count) +const node* topology::add_or_update_endpoint(std::optional opt_ep, std::optional opt_id, std::optional opt_dr, std::optional opt_st, std::optional shard_count) { if (tlogger.is_enabled(log_level::trace)) { tlogger.trace("topology[{}]: add_or_update_endpoint: ep={} host_id={} dc={} rack={} state={} shards={}, at {}", fmt::ptr(this), - ep, opt_id.value_or(host_id::create_null_id()), opt_dr.value_or(endpoint_dc_rack{}).dc, opt_dr.value_or(endpoint_dc_rack{}).rack, opt_st.value_or(node::state::none), shard_count, + opt_ep, opt_id.value_or(host_id::create_null_id()), opt_dr.value_or(endpoint_dc_rack{}).dc, opt_dr.value_or(endpoint_dc_rack{}).rack, opt_st.value_or(node::state::none), shard_count, current_backtrace()); } - auto n = find_node(ep); - if (n) { - return update_node(make_mutable(n), opt_id, std::nullopt, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); - } else if (opt_id && (n = find_node(*opt_id))) { - return update_node(make_mutable(n), std::nullopt, ep, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); - } else { - return add_node(opt_id.value_or(host_id::create_null_id()), ep, - opt_dr.value_or(endpoint_dc_rack::default_location), - opt_st.value_or(node::state::normal), - shard_count.value_or(0)); + + const node* n; + switch (_key_kind) { + case topology::key_kind::host_id: + if (!opt_id) { + on_internal_error(tlogger, format("topology: host_id is not set, ep={}", opt_ep)); + } + n = find_node(*opt_id); + if (n) { + return update_node(make_mutable(n), std::nullopt, opt_ep, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); + } else if (opt_ep && (n = find_node(*opt_ep))) { + return update_node(make_mutable(n), opt_id, std::nullopt, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); + } + break; + case topology::key_kind::inet_address: + if (!opt_ep) { + on_internal_error(tlogger, format("topology: endpoint is not set, id={}", opt_id)); + } + n = find_node(*opt_ep); + if (n) { + return update_node(make_mutable(n), opt_id, std::nullopt, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); + } else if (opt_id && (n = find_node(*opt_id))) { + return update_node(make_mutable(n), std::nullopt, opt_ep, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); + } + break; } + + return add_node(opt_id.value_or(host_id::create_null_id()), + opt_ep.value_or(inet_address{}), + opt_dr.value_or(endpoint_dc_rack::default_location), + opt_st.value_or(node::state::normal), + shard_count.value_or(0)); } bool topology::remove_endpoint(inet_address ep) diff --git a/locator/topology.hh b/locator/topology.hh index 7b35f0969e..5e87b66291 100644 --- a/locator/topology.hh +++ b/locator/topology.hh @@ -159,6 +159,11 @@ private: class topology { public: + enum class key_kind { + inet_address, + host_id, + }; + struct config { inet_address this_endpoint; inet_address this_cql_address; // corresponds to broadcast_rpc_address @@ -168,7 +173,7 @@ public: bool operator==(const config&) const = default; }; - topology(config cfg); + topology(config cfg, key_kind k); topology(topology&&) noexcept; topology& operator=(topology&&) noexcept; @@ -234,7 +239,7 @@ public: * * Adds or updates a node with given endpoint */ - const node* add_or_update_endpoint(inet_address ep, std::optional opt_id, + const node* add_or_update_endpoint(std::optional ep, std::optional opt_id, std::optional opt_dr, std::optional opt_st, std::optional shard_count = std::nullopt); @@ -409,6 +414,8 @@ private: // pre-calculated std::unordered_set _datacenters; + key_kind _key_kind; + void calculate_datacenters(); const std::unordered_map& get_nodes_by_endpoint() const noexcept { diff --git a/test/boost/locator_topology_test.cc b/test/boost/locator_topology_test.cc index 850bf545a3..5245e96825 100644 --- a/test/boost/locator_topology_test.cc +++ b/test/boost/locator_topology_test.cc @@ -36,7 +36,7 @@ SEASTAR_THREAD_TEST_CASE(test_add_node) { .local_dc_rack = endpoint_dc_rack::default_location, }; - auto topo = topology(cfg); + auto topo = topology(cfg, topology::key_kind::inet_address); set_abort_on_internal_error(false); auto reset_on_internal_abort = seastar::defer([] { @@ -73,7 +73,7 @@ SEASTAR_THREAD_TEST_CASE(test_moving) { .local_dc_rack = endpoint_dc_rack::default_location, }; - auto topo = topology(cfg); + auto topo = topology(cfg, topology::key_kind::inet_address); topo.add_node(id1, ep1, endpoint_dc_rack::default_location, node::state::normal); @@ -102,7 +102,7 @@ SEASTAR_THREAD_TEST_CASE(test_update_node) { .local_dc_rack = endpoint_dc_rack::default_location, }; - auto topo = topology(cfg); + auto topo = topology(cfg, topology::key_kind::inet_address); set_abort_on_internal_error(false); auto reset_on_internal_abort = seastar::defer([] { @@ -171,6 +171,38 @@ SEASTAR_THREAD_TEST_CASE(test_update_node) { BOOST_REQUIRE_EQUAL(node->get_state(), locator::node::state::left); } +SEASTAR_THREAD_TEST_CASE(test_add_or_update_by_host_id) { + auto id1 = host_id::create_random_id(); + auto id2 = host_id::create_random_id(); + auto ep1 = gms::inet_address("127.0.0.1"); + + // In this test we check that add_or_update_endpoint searches by host_id first. + // We create two nodes, one matches by id, another - by ip, + // and assert that add_or_update_endpoint updates the first. + // We need to make the second node 'being_decommissioned', so that + // it gets removed from ip index and we don't get the non-unique IP error. + + auto topo = topology({}, topology::key_kind::host_id); + //auto topo = topology({}); + topo.add_node(id1, gms::inet_address{}, endpoint_dc_rack::default_location, node::state::normal); + topo.add_node(id2, ep1, endpoint_dc_rack::default_location, node::state::being_decommissioned); + + topo.add_or_update_endpoint(ep1, id1, std::nullopt, node::state::bootstrapping); + + auto* n = topo.find_node(id1); + BOOST_REQUIRE_EQUAL(n->get_state(), node::state::bootstrapping); + BOOST_REQUIRE_EQUAL(n->host_id(), id1); + BOOST_REQUIRE_EQUAL(n->endpoint(), ep1); + + auto* n2 = topo.find_node(ep1); + BOOST_REQUIRE_EQUAL(n, n2); + + auto* n3 = topo.find_node(id2); + BOOST_REQUIRE_EQUAL(n3->get_state(), node::state::being_decommissioned); + BOOST_REQUIRE_EQUAL(n3->host_id(), id2); + BOOST_REQUIRE_EQUAL(n3->endpoint(), ep1); +} + SEASTAR_THREAD_TEST_CASE(test_remove_endpoint) { using dc_endpoints_t = std::unordered_map>; using dc_racks_t = std::unordered_map>>; @@ -194,7 +226,7 @@ SEASTAR_THREAD_TEST_CASE(test_remove_endpoint) { .local_dc_rack = dc_rack1 }; - auto topo = topology(cfg); + auto topo = topology(cfg, topology::key_kind::inet_address); topo.add_node(id1, ep1, dc_rack1, node::state::normal); topo.add_node(id2, ep2, dc_rack2, node::state::normal); From c9fbe3d3777bd89ee54b334fcbc7b4df993a0cfc Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Fri, 20 Oct 2023 18:40:31 +0400 Subject: [PATCH 04/51] locator: make dc_rack_fn a template In the next commits token_metadata will be made a template with NodeId=inet_address|host_id parameter. This parameter will be passed to dc_rack_fn function, so it also should be made a template. --- locator/token_metadata.cc | 6 +++--- locator/token_metadata.hh | 2 +- locator/types.hh | 3 ++- service/storage_service.cc | 2 +- test/boost/token_metadata_test.cc | 4 ++-- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index 9889f90cc3..d0996a1f6f 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -236,7 +236,7 @@ public: static range interval_to_range(boost::icl::interval::interval_type i); public: - future<> update_topology_change_info(dc_rack_fn& get_dc_rack); + future<> update_topology_change_info(dc_rack_fn& get_dc_rack); const std::optional& get_topology_change_info() const { return _topology_change_info; } @@ -723,7 +723,7 @@ token_metadata_impl::interval_to_range(boost::icl::interval::interval_typ return range({{i.lower(), start_inclusive}}, {{i.upper(), end_inclusive}}); } -future<> token_metadata_impl::update_topology_change_info(dc_rack_fn& get_dc_rack) { +future<> token_metadata_impl::update_topology_change_info(dc_rack_fn& get_dc_rack) { if (_bootstrap_tokens.empty() && _leaving_endpoints.empty() && _replacing_endpoints.empty()) { co_await utils::clear_gently(_topology_change_info); _topology_change_info.reset(); @@ -1115,7 +1115,7 @@ token_metadata::interval_to_range(boost::icl::interval::interval_type i) } future<> -token_metadata::update_topology_change_info(dc_rack_fn& get_dc_rack) { +token_metadata::update_topology_change_info(dc_rack_fn& get_dc_rack) { return _impl->update_topology_change_info(get_dc_rack); } diff --git a/locator/token_metadata.hh b/locator/token_metadata.hh index aee774cf70..841b68fd37 100644 --- a/locator/token_metadata.hh +++ b/locator/token_metadata.hh @@ -251,7 +251,7 @@ public: static boost::icl::interval::interval_type range_to_interval(range r); static range interval_to_range(boost::icl::interval::interval_type i); - future<> update_topology_change_info(dc_rack_fn& get_dc_rack); + future<> update_topology_change_info(dc_rack_fn& get_dc_rack); const std::optional& get_topology_change_info() const; diff --git a/locator/types.hh b/locator/types.hh index 4625062ead..96b491dafc 100644 --- a/locator/types.hh +++ b/locator/types.hh @@ -31,6 +31,7 @@ struct endpoint_dc_rack { bool operator==(const endpoint_dc_rack&) const = default; }; -using dc_rack_fn = seastar::noncopyable_function(inet_address)>; +template +using dc_rack_fn = seastar::noncopyable_function(NodeId)>; } // namespace locator diff --git a/service/storage_service.cc b/service/storage_service.cc index 302270a0bb..0147d37d15 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -6109,7 +6109,7 @@ future<> storage_service::update_topology_change_info(mutable_token_metadata_ptr assert(this_shard_id() == 0); try { - locator::dc_rack_fn get_dc_rack_from_gossiper([this] (inet_address ep) { return get_dc_rack_for(ep); }); + locator::dc_rack_fn get_dc_rack_from_gossiper([this] (inet_address ep) { return get_dc_rack_for(ep); }); co_await tmptr->update_topology_change_info(get_dc_rack_from_gossiper); } catch (...) { auto ep = std::current_exception(); diff --git a/test/boost/token_metadata_test.cc b/test/boost/token_metadata_test.cc index 3dfe63155d..e4251bf248 100644 --- a/test/boost/token_metadata_test.cc +++ b/test/boost/token_metadata_test.cc @@ -17,7 +17,7 @@ using namespace locator; namespace { const auto ks_name = sstring("test-ks"); - endpoint_dc_rack get_dc_rack(inet_address) { + endpoint_dc_rack get_dc_rack(gms::inet_address) { return { .dc = "unk-dc", .rack = "unk-rack" @@ -36,7 +36,7 @@ namespace { template mutable_vnode_erm_ptr create_erm(mutable_token_metadata_ptr tmptr, replication_strategy_config_options opts = {}) { - dc_rack_fn get_dc_rack_fn = get_dc_rack; + dc_rack_fn get_dc_rack_fn = get_dc_rack; tmptr->update_topology_change_info(get_dc_rack_fn).get(); auto strategy = seastar::make_shared(std::move(opts)); return calculate_effective_replication_map(std::move(strategy), std::move(tmptr)).get0(); From 63f64f3303e52201bd96322dade765f72c4c01e4 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Wed, 4 Oct 2023 10:39:57 +0400 Subject: [PATCH 05/51] token_metadata: make it a template with NodeId=inet_address/host_id NodeId is used in all internal token_metadata data structures, that previously used inet_address. We choose topology::key_kind based on the value of the template parameter. generic_token_metadata::update_topology overload with host_id parameter is added to make update_topology_change_info work, it now uses NodeId as a parameter type. topology::remove_endpoint(host_id) is added to make generic_token_metadata::remove_endpoint(NodeId) work. pending_endpoints_for and endpoints_for_reading are just removed - they are not used and not implemented. The declarations were left by mistake from a refactoring in which these methods were moved to erm. generic_token_metadata_base is extracted to contain declarations, common to both token_metadata versions. Templates are explicitly instantiated inside token_metadata.cc, since implementation part is also a template and it's not exposed to the header. There are no other behavioral changes in this commit, just syntax fixes to make token_metadata a template. --- api/api_init.hh | 10 +- cdc/log.hh | 6 +- cql3/statements/create_keyspace_statement.hh | 5 +- cql3/statements/ks_prop_defs.hh | 9 +- db/view/view_update_checks.hh | 6 +- locator/token_metadata.cc | 487 ++++++++++++------- locator/token_metadata.hh | 103 ++-- locator/token_metadata_fwd.hh | 8 +- locator/topology.cc | 11 + locator/topology.hh | 3 + node_ops/node_ops_ctl.hh | 6 +- 11 files changed, 420 insertions(+), 234 deletions(-) diff --git a/api/api_init.hh b/api/api_init.hh index c86c57925f..cff8089a8f 100644 --- a/api/api_init.hh +++ b/api/api_init.hh @@ -10,6 +10,7 @@ #include #include +#include "locator/host_id.hh" #include "replica/database_fwd.hh" #include "tasks/task_manager.hh" #include "seastarx.hh" @@ -32,9 +33,16 @@ namespace streaming { class stream_manager; } +namespace gms { + class inet_address; +} + namespace locator { -class token_metadata; +template +class generic_token_metadata; +using token_metadata = generic_token_metadata; +using token_metadata2 = generic_token_metadata; class shared_token_metadata; class snitch_ptr; diff --git a/cdc/log.hh b/cdc/log.hh index 0af27cd5e6..d81f98ea2f 100644 --- a/cdc/log.hh +++ b/cdc/log.hh @@ -29,13 +29,17 @@ #include "timestamp.hh" #include "tracing/trace_state.hh" #include "utils/UUID.hh" +#include "locator/host_id.hh" class schema; using schema_ptr = seastar::lw_shared_ptr; namespace locator { -class token_metadata; +template +class generic_token_metadata; +using token_metadata = generic_token_metadata; +using token_metadata2 = generic_token_metadata; } // namespace locator diff --git a/cql3/statements/create_keyspace_statement.hh b/cql3/statements/create_keyspace_statement.hh index 02946325a2..2aff726ad5 100644 --- a/cql3/statements/create_keyspace_statement.hh +++ b/cql3/statements/create_keyspace_statement.hh @@ -17,7 +17,10 @@ namespace locator { -class token_metadata; +template +class generic_token_metadata; +using token_metadata = generic_token_metadata; +using token_metadata2 = generic_token_metadata; }; diff --git a/cql3/statements/ks_prop_defs.hh b/cql3/statements/ks_prop_defs.hh index 32bc4bec63..11182df218 100644 --- a/cql3/statements/ks_prop_defs.hh +++ b/cql3/statements/ks_prop_defs.hh @@ -12,6 +12,7 @@ #include "cql3/statements/property_definitions.hh" #include "data_dictionary/storage_options.hh" +#include "locator/host_id.hh" #include #include @@ -20,9 +21,15 @@ namespace data_dictionary { class keyspace_metadata; } +namespace gms { + class inet_address; +} namespace locator { - class token_metadata; + template + class generic_token_metadata; + using token_metadata = generic_token_metadata; + using token_metadata2 = generic_token_metadata; class shared_token_metadata; struct snitch_ptr; class abstract_replication_strategy; diff --git a/db/view/view_update_checks.hh b/db/view/view_update_checks.hh index 77b7113c0d..ccb3eacafa 100644 --- a/db/view/view_update_checks.hh +++ b/db/view/view_update_checks.hh @@ -10,6 +10,7 @@ #include #include "streaming/stream_reason.hh" +#include "locator/host_id.hh" #include "seastarx.hh" namespace replica { @@ -23,7 +24,10 @@ class system_distributed_keyspace; } namespace locator { -class token_metadata; +template +class generic_token_metadata; +using token_metadata = generic_token_metadata; +using token_metadata2 = generic_token_metadata; } namespace db::view { diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index d0996a1f6f..a9d1639bfe 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -27,6 +27,12 @@ namespace locator { static logging::logger tlogger("token_metadata"); +template +inline static constexpr const topology::key_kind kind_for_node_id_type + = std::is_same_v + ? topology::key_kind::inet_address + : topology::key_kind::host_id; + template static void remove_by_value(C& container, V value) { for (auto it = container.begin(); it != container.end();) { @@ -38,9 +44,8 @@ static void remove_by_value(C& container, V value) { } } +template class token_metadata_impl final { -public: - using inet_address = gms::inet_address; private: /** * Maintains token to endpoint map of every node in the cluster. @@ -48,17 +53,17 @@ private: * multiple tokens. Hence, the BiMultiValMap collection. */ // FIXME: have to be BiMultiValMap - std::unordered_map _token_to_endpoint_map; + std::unordered_map _token_to_endpoint_map; // Track the unique set of nodes in _token_to_endpoint_map - std::unordered_set _normal_token_owners; + std::unordered_set _normal_token_owners; - std::unordered_map _bootstrap_tokens; - std::unordered_set _leaving_endpoints; + std::unordered_map _bootstrap_tokens; + std::unordered_set _leaving_endpoints; // The map between the existing node to be replaced and the replacing node - std::unordered_map _replacing_endpoints; + std::unordered_map _replacing_endpoints; - std::optional _topology_change_info; + std::optional> _topology_change_info; std::vector _sorted_tokens; @@ -94,26 +99,26 @@ private: struct shallow_copy {}; public: token_metadata_impl(shallow_copy, const token_metadata_impl& o) noexcept - : _topology(topology::config{}, topology::key_kind::inet_address) + : _topology(topology::config{}, kind_for_node_id_type) {} - token_metadata_impl(token_metadata::config cfg) noexcept : _topology(std::move(cfg.topo_cfg), topology::key_kind::inet_address) {}; + token_metadata_impl(token_metadata::config cfg) noexcept : _topology(std::move(cfg.topo_cfg), kind_for_node_id_type) {}; token_metadata_impl(const token_metadata_impl&) = delete; // it's too huge for direct copy, use clone_async() token_metadata_impl(token_metadata_impl&&) noexcept = default; const std::vector& sorted_tokens() const; - future<> update_normal_tokens(std::unordered_set tokens, inet_address endpoint); + future<> update_normal_tokens(std::unordered_set tokens, NodeId endpoint); const token& first_token(const token& start) const; size_t first_token_index(const token& start) const; - std::optional get_endpoint(const token& token) const; - std::vector get_tokens(const inet_address& addr) const; - const std::unordered_map& get_token_to_endpoint() const { + std::optional get_endpoint(const token& token) const; + std::vector get_tokens(const NodeId& addr) const; + const std::unordered_map& get_token_to_endpoint() const { return _token_to_endpoint_map; } - const std::unordered_set& get_leaving_endpoints() const { + const std::unordered_set& get_leaving_endpoints() const { return _leaving_endpoints; } - const std::unordered_map& get_bootstrap_tokens() const { + const std::unordered_map& get_bootstrap_tokens() const { return _bootstrap_tokens; } @@ -121,6 +126,10 @@ public: _topology.add_or_update_endpoint(ep, std::nullopt, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); } + void update_topology(host_id ep, std::optional opt_dr, std::optional opt_st, std::optional shard_count = std::nullopt) { + _topology.add_or_update_endpoint(std::nullopt, ep, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); + } + /** * Creates an iterable range of the sorted tokens starting at the token next * after the given one. @@ -129,9 +138,9 @@ public: * * @return The requested range (see the description above) */ - boost::iterator_range ring_range(const token& start) const; + boost::iterator_range::tokens_iterator> ring_range(const token& start) const; - boost::iterator_range ring_range(dht::ring_position_view pos) const; + boost::iterator_range::tokens_iterator> ring_range(dht::ring_position_view pos) const; topology& get_topology() { return _topology; @@ -164,30 +173,30 @@ public: /** @return a copy of the endpoint-to-id map for read-only operations */ std::unordered_map get_endpoint_to_host_id_map_for_reading() const; - void add_bootstrap_token(token t, inet_address endpoint); + void add_bootstrap_token(token t, NodeId endpoint); - void add_bootstrap_tokens(std::unordered_set tokens, inet_address endpoint); + void add_bootstrap_tokens(std::unordered_set tokens, NodeId endpoint); void remove_bootstrap_tokens(std::unordered_set tokens); - void add_leaving_endpoint(inet_address endpoint); - void del_leaving_endpoint(inet_address endpoint); + void add_leaving_endpoint(NodeId endpoint); + void del_leaving_endpoint(NodeId endpoint); public: - void remove_endpoint(inet_address endpoint); + void remove_endpoint(NodeId endpoint); - bool is_normal_token_owner(inet_address endpoint) const; + bool is_normal_token_owner(NodeId endpoint) const; - bool is_leaving(inet_address endpoint) const; + bool is_leaving(NodeId endpoint) const; // Is this node being replaced by another node - bool is_being_replaced(inet_address endpoint) const; + bool is_being_replaced(NodeId endpoint) const; // Is any node being replaced by another node bool is_any_node_being_replaced() const; - void add_replacing_endpoint(inet_address existing_node, inet_address replacing_node); + void add_replacing_endpoint(NodeId existing_node, NodeId replacing_node); - void del_replacing_endpoint(inet_address existing_node); + void del_replacing_endpoint(NodeId existing_node); public: /** @@ -236,8 +245,8 @@ public: static range interval_to_range(boost::icl::interval::interval_type i); public: - future<> update_topology_change_info(dc_rack_fn& get_dc_rack); - const std::optional& get_topology_change_info() const { + future<> update_topology_change_info(dc_rack_fn& get_dc_rack); + const std::optional>& get_topology_change_info() const { return _topology_change_info; } public: @@ -248,7 +257,7 @@ public: // node that is still joining the cluster, e.g., a node that is still // streaming data before it finishes the bootstrap process and turns into // NORMAL status. - const std::unordered_set& get_all_endpoints() const noexcept { + const std::unordered_set& get_all_endpoints() const noexcept { return _normal_token_owners; } @@ -258,11 +267,6 @@ public: private: future<> update_normal_token_owners(); public: - // returns empty vector if keyspace_name not found. - inet_address_vector_topology_change pending_endpoints_for(const token& token, const sstring& keyspace_name) const; - - std::optional endpoints_for_reading(const token& token, const sstring& keyspace_name) const; - void set_read_new(token_metadata::read_new_t read_new) { _read_new = read_new; } @@ -272,7 +276,7 @@ public: * @return a (stable copy, won't be modified) Token to Endpoint map for all the normal and bootstrapping nodes * in the cluster. */ - std::map get_normal_and_bootstrapping_token_to_endpoint_map() const; + std::map get_normal_and_bootstrapping_token_to_endpoint_map() const; long get_ring_version() const { return _ring_version; @@ -289,11 +293,11 @@ public: void set_version(token_metadata::version_t version) { if (version <= 0) { on_internal_error(tlogger, - format("token_metadata_impl::set_version: invalid new version {}", version)); + format("token_metadata_impl::set_version: invalid new version {}", version)); } if (version < _version) { on_internal_error(tlogger, - format("token_metadata_impl::set_version: new version can't be smaller than the previous one, " + format("token_metadata_impl::set_version: new version can't be smaller than the previous one, " "new version {}, previous version {}", version, _version)); } _version = version; @@ -302,26 +306,31 @@ public: _version_tracker = std::move(tracker); } - friend class token_metadata; + friend class generic_token_metadata; }; -thread_local long token_metadata_impl::_static_ring_version; +template +thread_local long token_metadata_impl::_static_ring_version; -token_metadata::tokens_iterator::tokens_iterator(const token& start, const token_metadata_impl* token_metadata) +template +generic_token_metadata::tokens_iterator::tokens_iterator(const token& start, const token_metadata_impl* token_metadata) : _token_metadata(token_metadata) { _cur_it = _token_metadata->sorted_tokens().begin() + _token_metadata->first_token_index(start); _remaining = _token_metadata->sorted_tokens().size(); } -bool token_metadata::tokens_iterator::operator==(const tokens_iterator& it) const { +template +bool generic_token_metadata::tokens_iterator::operator==(const tokens_iterator& it) const { return _remaining == it._remaining; } -const token& token_metadata::tokens_iterator::operator*() const { +template +const token& generic_token_metadata::tokens_iterator::operator*() const { return *_cur_it; } -token_metadata::tokens_iterator& token_metadata::tokens_iterator::operator++() { +template +typename generic_token_metadata::tokens_iterator& generic_token_metadata::tokens_iterator::operator++() { ++_cur_it; if (_cur_it == _token_metadata->sorted_tokens().end()) { _cur_it = _token_metadata->sorted_tokens().begin(); @@ -330,19 +339,22 @@ token_metadata::tokens_iterator& token_metadata::tokens_iterator::operator++() { return *this; } -host_id token_metadata::get_my_id() const { +template +host_id generic_token_metadata::get_my_id() const { return get_topology().get_config().this_host_id; } +template inline -boost::iterator_range -token_metadata_impl::ring_range(const token& start) const { - auto begin = token_metadata::tokens_iterator(start, this); - auto end = token_metadata::tokens_iterator(); +boost::iterator_range::tokens_iterator> +token_metadata_impl::ring_range(const token& start) const { + auto begin = typename generic_token_metadata::tokens_iterator(start, this); + auto end = typename generic_token_metadata::tokens_iterator(); return boost::make_iterator_range(begin, end); } -future> token_metadata_impl::clone_async() const noexcept { +template +future>> token_metadata_impl::clone_async() const noexcept { auto ret = co_await clone_only_token_map(); ret->_bootstrap_tokens.reserve(_bootstrap_tokens.size()); for (const auto& p : _bootstrap_tokens) { @@ -356,7 +368,8 @@ future> token_metadata_impl::clone_async() co_return ret; } -future> token_metadata_impl::clone_only_token_map(bool clone_sorted_tokens) const noexcept { +template +future>> token_metadata_impl::clone_only_token_map(bool clone_sorted_tokens) const noexcept { auto ret = std::make_unique(shallow_copy{}, *this); ret->_token_to_endpoint_map.reserve(_token_to_endpoint_map.size()); for (const auto& p : _token_to_endpoint_map) { @@ -374,7 +387,8 @@ future> token_metadata_impl::clone_only_tok co_return ret; } -future<> token_metadata_impl::clear_gently() noexcept { +template +future<> token_metadata_impl::clear_gently() noexcept { co_await utils::clear_gently(_token_to_endpoint_map); co_await utils::clear_gently(_normal_token_owners); co_await utils::clear_gently(_bootstrap_tokens); @@ -386,7 +400,8 @@ future<> token_metadata_impl::clear_gently() noexcept { co_return; } -void token_metadata_impl::sort_tokens() { +template +void token_metadata_impl::sort_tokens() { std::vector sorted; sorted.reserve(_token_to_endpoint_map.size()); @@ -399,23 +414,28 @@ void token_metadata_impl::sort_tokens() { _sorted_tokens = std::move(sorted); } -const tablet_metadata& token_metadata::tablets() const { +template +const tablet_metadata& generic_token_metadata::tablets() const { return _impl->tablets(); } -tablet_metadata& token_metadata::tablets() { +template +tablet_metadata& generic_token_metadata::tablets() { return _impl->tablets(); } -void token_metadata::set_tablets(tablet_metadata tm) { +template +void generic_token_metadata::set_tablets(tablet_metadata tm) { _impl->set_tablets(std::move(tm)); } -const std::vector& token_metadata_impl::sorted_tokens() const { +template +const std::vector& token_metadata_impl::sorted_tokens() const { return _sorted_tokens; } -std::vector token_metadata_impl::get_tokens(const inet_address& addr) const { +template +std::vector token_metadata_impl::get_tokens(const NodeId& addr) const { std::vector res; for (auto&& i : _token_to_endpoint_map) { if (i.second == addr) { @@ -426,12 +446,13 @@ std::vector token_metadata_impl::get_tokens(const inet_address& addr) con return res; } -future<> token_metadata_impl::update_normal_tokens(std::unordered_set tokens, inet_address endpoint) { +template +future<> token_metadata_impl::update_normal_tokens(std::unordered_set tokens, NodeId endpoint) { if (tokens.empty()) { co_return; } - if (!_topology.has_endpoint(endpoint)) { + if (!_topology.has_node(endpoint)) { on_internal_error(tlogger, format("token_metadata_impl: {} must be a member of topology to update normal tokens", endpoint)); } @@ -465,7 +486,7 @@ future<> token_metadata_impl::update_normal_tokens(std::unordered_set tok for (const token& t : tokens) { co_await coroutine::maybe_yield(); - auto prev = _token_to_endpoint_map.insert(std::pair(t, endpoint)); + auto prev = _token_to_endpoint_map.insert(std::pair(t, endpoint)); should_sort_tokens |= prev.second; // new token inserted -> sort if (prev.first->second != endpoint) { tlogger.debug("Token {} changing ownership from {} to {}", t, prev.first->second, endpoint); @@ -483,7 +504,8 @@ future<> token_metadata_impl::update_normal_tokens(std::unordered_set tok co_return; } -size_t token_metadata_impl::first_token_index(const token& start) const { +template +size_t token_metadata_impl::first_token_index(const token& start) const { if (_sorted_tokens.empty()) { auto msg = format("sorted_tokens is empty in first_token_index!"); tlogger.error("{}", msg); @@ -497,11 +519,13 @@ size_t token_metadata_impl::first_token_index(const token& start) const { } } -const token& token_metadata_impl::first_token(const token& start) const { +template +const token& token_metadata_impl::first_token(const token& start) const { return _sorted_tokens[first_token_index(start)]; } -std::optional token_metadata_impl::get_endpoint(const token& token) const { +template +std::optional token_metadata_impl::get_endpoint(const token& token) const { auto it = _token_to_endpoint_map.find(token); if (it == _token_to_endpoint_map.end()) { return std::nullopt; @@ -510,7 +534,8 @@ std::optional token_metadata_impl::get_endpoint(const token& token } } -void token_metadata_impl::debug_show() const { +template +void token_metadata_impl::debug_show() const { auto reporter = std::make_shared>(); reporter->set_callback ([reporter, this] { fmt::print("Endpoint -> Token\n"); @@ -525,11 +550,13 @@ void token_metadata_impl::debug_show() const { reporter->arm_periodic(std::chrono::seconds(1)); } -void token_metadata_impl::update_host_id(const host_id& host_id, inet_address endpoint) { +template +void token_metadata_impl::update_host_id(const host_id& host_id, inet_address endpoint) { _topology.add_or_update_endpoint(endpoint, host_id); } -host_id token_metadata_impl::get_host_id(inet_address endpoint) const { +template +host_id token_metadata_impl::get_host_id(inet_address endpoint) const { if (const auto* node = _topology.find_node(endpoint)) [[likely]] { return node->host_id(); } else { @@ -537,7 +564,8 @@ host_id token_metadata_impl::get_host_id(inet_address endpoint) const { } } -std::optional token_metadata_impl::get_host_id_if_known(inet_address endpoint) const { +template +std::optional token_metadata_impl::get_host_id_if_known(inet_address endpoint) const { if (const auto* node = _topology.find_node(endpoint)) [[likely]] { return node->host_id(); } else { @@ -545,7 +573,8 @@ std::optional token_metadata_impl::get_host_id_if_known(inet_address en } } -std::optional token_metadata_impl::get_endpoint_for_host_id(host_id host_id) const { +template +std::optional token_metadata_impl::get_endpoint_for_host_id(host_id host_id) const { if (const auto* node = _topology.find_node(host_id)) [[likely]] { return node->endpoint(); } else { @@ -553,7 +582,8 @@ std::optional token_metadata_impl::get_endpoint_for_host_id(host_i } } -std::unordered_map token_metadata_impl::get_endpoint_to_host_id_map_for_reading() const { +template +std::unordered_map token_metadata_impl::get_endpoint_to_host_id_map_for_reading() const { const auto& nodes = _topology.get_nodes_by_endpoint(); std::unordered_map map; map.reserve(nodes.size()); @@ -571,21 +601,25 @@ std::unordered_map token_metadata_impl::get_endpoint_to_h return map; } -bool token_metadata_impl::is_normal_token_owner(inet_address endpoint) const { +template +bool token_metadata_impl::is_normal_token_owner(NodeId endpoint) const { return _normal_token_owners.contains(endpoint); } -void token_metadata_impl::add_bootstrap_token(token t, inet_address endpoint) { +template +void token_metadata_impl::add_bootstrap_token(token t, NodeId endpoint) { std::unordered_set tokens{t}; add_bootstrap_tokens(tokens, endpoint); } -boost::iterator_range -token_metadata_impl::ring_range(const dht::ring_position_view start) const { +template +boost::iterator_range::tokens_iterator> +token_metadata_impl::ring_range(const dht::ring_position_view start) const { return ring_range(start.token()); } -void token_metadata_impl::add_bootstrap_tokens(std::unordered_set tokens, inet_address endpoint) { +template +void token_metadata_impl::add_bootstrap_tokens(std::unordered_set tokens, NodeId endpoint) { for (auto t : tokens) { auto old_endpoint = _bootstrap_tokens.find(t); if (old_endpoint != _bootstrap_tokens.end() && (*old_endpoint).second != endpoint) { @@ -600,14 +634,15 @@ void token_metadata_impl::add_bootstrap_tokens(std::unordered_set tokens, } } - std::erase_if(_bootstrap_tokens, [endpoint] (const std::pair& n) { return n.second == endpoint; }); + std::erase_if(_bootstrap_tokens, [endpoint] (const std::pair& n) { return n.second == endpoint; }); for (auto t : tokens) { _bootstrap_tokens[t] = endpoint; } } -void token_metadata_impl::remove_bootstrap_tokens(std::unordered_set tokens) { +template +void token_metadata_impl::remove_bootstrap_tokens(std::unordered_set tokens) { if (tokens.empty()) { tlogger.warn("tokens is empty in remove_bootstrap_tokens!"); return; @@ -617,19 +652,23 @@ void token_metadata_impl::remove_bootstrap_tokens(std::unordered_set toke } } -bool token_metadata_impl::is_leaving(inet_address endpoint) const { +template +bool token_metadata_impl::is_leaving(NodeId endpoint) const { return _leaving_endpoints.contains(endpoint); } -bool token_metadata_impl::is_being_replaced(inet_address endpoint) const { +template +bool token_metadata_impl::is_being_replaced(NodeId endpoint) const { return _replacing_endpoints.contains(endpoint); } -bool token_metadata_impl::is_any_node_being_replaced() const { +template +bool token_metadata_impl::is_any_node_being_replaced() const { return !_replacing_endpoints.empty(); } -void token_metadata_impl::remove_endpoint(inet_address endpoint) { +template +void token_metadata_impl::remove_endpoint(NodeId endpoint) { remove_by_value(_bootstrap_tokens, endpoint); remove_by_value(_token_to_endpoint_map, endpoint); _normal_token_owners.erase(endpoint); @@ -639,7 +678,8 @@ void token_metadata_impl::remove_endpoint(inet_address endpoint) { invalidate_cached_rings(); } -token token_metadata_impl::get_predecessor(token t) const { +template +token token_metadata_impl::get_predecessor(token t) const { auto& tokens = sorted_tokens(); auto it = std::lower_bound(tokens.begin(), tokens.end(), t); if (it == tokens.end() || *it != t) { @@ -655,7 +695,8 @@ token token_metadata_impl::get_predecessor(token t) const { } } -dht::token_range_vector token_metadata_impl::get_primary_ranges_for(std::unordered_set tokens) const { +template +dht::token_range_vector token_metadata_impl::get_primary_ranges_for(std::unordered_set tokens) const { dht::token_range_vector ranges; ranges.reserve(tokens.size() + 1); // one of the ranges will wrap for (auto right : tokens) { @@ -668,12 +709,14 @@ dht::token_range_vector token_metadata_impl::get_primary_ranges_for(std::unorder return ranges; } -dht::token_range_vector token_metadata_impl::get_primary_ranges_for(token right) const { +template +dht::token_range_vector token_metadata_impl::get_primary_ranges_for(token right) const { return get_primary_ranges_for(std::unordered_set{right}); } +template boost::icl::interval::interval_type -token_metadata_impl::range_to_interval(range r) { +token_metadata_impl::range_to_interval(range r) { bool start_inclusive = false; bool end_inclusive = false; token start = dht::minimum_token(); @@ -700,8 +743,9 @@ token_metadata_impl::range_to_interval(range r) { } } +template range -token_metadata_impl::interval_to_range(boost::icl::interval::interval_type i) { +token_metadata_impl::interval_to_range(boost::icl::interval::interval_type i) { bool start_inclusive; bool end_inclusive; auto bounds = i.bounds().bits(); @@ -723,7 +767,8 @@ token_metadata_impl::interval_to_range(boost::icl::interval::interval_typ return range({{i.lower(), start_inclusive}}, {{i.upper(), end_inclusive}}); } -future<> token_metadata_impl::update_topology_change_info(dc_rack_fn& get_dc_rack) { +template +future<> token_metadata_impl::update_topology_change_info(dc_rack_fn& get_dc_rack) { if (_bootstrap_tokens.empty() && _leaving_endpoints.empty() && _replacing_endpoints.empty()) { co_await utils::clear_gently(_topology_change_info); _topology_change_info.reset(); @@ -736,7 +781,7 @@ future<> token_metadata_impl::update_topology_change_info(dc_rack_fn> new_normal_tokens; + std::unordered_map> new_normal_tokens; if (!_replacing_endpoints.empty()) { for (const auto& [token, inet_address]: _token_to_endpoint_map) { const auto it = _replacing_endpoints.find(inet_address); @@ -796,19 +841,21 @@ future<> token_metadata_impl::update_topology_change_info(dc_rack_fn>(std::move(target_token_metadata)), + base_token_metadata ? make_lw_shared>(std::move(base_token_metadata)): nullptr, std::move(all_tokens), _read_new); co_await utils::clear_gently(prev_value); } -size_t token_metadata_impl::count_normal_token_owners() const { +template +size_t token_metadata_impl::count_normal_token_owners() const { return _normal_token_owners.size(); } -future<> token_metadata_impl::update_normal_token_owners() { - std::unordered_set eps; +template +future<> token_metadata_impl::update_normal_token_owners() { + std::unordered_set eps; for (auto [t, ep]: _token_to_endpoint_map) { eps.insert(ep); co_await coroutine::maybe_yield(); @@ -816,21 +863,25 @@ future<> token_metadata_impl::update_normal_token_owners() { _normal_token_owners = std::move(eps); } -void token_metadata_impl::add_leaving_endpoint(inet_address endpoint) { +template +void token_metadata_impl::add_leaving_endpoint(NodeId endpoint) { _leaving_endpoints.emplace(endpoint); } -void token_metadata_impl::del_leaving_endpoint(inet_address endpoint) { +template +void token_metadata_impl::del_leaving_endpoint(NodeId endpoint) { _leaving_endpoints.erase(endpoint); } -void token_metadata_impl::add_replacing_endpoint(inet_address existing_node, inet_address replacing_node) { +template +void token_metadata_impl::add_replacing_endpoint(NodeId existing_node, NodeId replacing_node) { tlogger.info("Added node {} as pending replacing endpoint which replaces existing node {}", replacing_node, existing_node); _replacing_endpoints[existing_node] = replacing_node; } -void token_metadata_impl::del_replacing_endpoint(inet_address existing_node) { +template +void token_metadata_impl::del_replacing_endpoint(NodeId existing_node) { if (_replacing_endpoints.contains(existing_node)) { tlogger.info("Removed node {} as pending replacing endpoint which replaces existing node {}", _replacing_endpoints[existing_node], existing_node); @@ -838,14 +889,16 @@ void token_metadata_impl::del_replacing_endpoint(inet_address existing_node) { _replacing_endpoints.erase(existing_node); } -std::map token_metadata_impl::get_normal_and_bootstrapping_token_to_endpoint_map() const { - std::map ret(_token_to_endpoint_map.begin(), _token_to_endpoint_map.end()); +template +std::map token_metadata_impl::get_normal_and_bootstrapping_token_to_endpoint_map() const { + std::map ret(_token_to_endpoint_map.begin(), _token_to_endpoint_map.end()); ret.insert(_bootstrap_tokens.begin(), _bootstrap_tokens.end()); return ret; } -topology_change_info::topology_change_info(token_metadata_ptr target_token_metadata_, - token_metadata_ptr base_token_metadata_, +template +topology_change_info::topology_change_info(lw_shared_ptr> target_token_metadata_, + lw_shared_ptr> base_token_metadata_, std::vector all_tokens_, token_metadata::read_new_t read_new_) : target_token_metadata(std::move(target_token_metadata_)) @@ -855,84 +908,101 @@ topology_change_info::topology_change_info(token_metadata_ptr target_token_metad { } -future<> topology_change_info::clear_gently() { +template +future<> topology_change_info::clear_gently() { co_await utils::clear_gently(target_token_metadata); co_await utils::clear_gently(base_token_metadata); co_await utils::clear_gently(all_tokens); } -token_metadata::token_metadata(std::unique_ptr impl) +template +generic_token_metadata::generic_token_metadata(std::unique_ptr> impl) : _impl(std::move(impl)) { } -token_metadata::token_metadata(config cfg) - : _impl(std::make_unique(std::move(cfg))) { +template +generic_token_metadata::generic_token_metadata(config cfg) + : _impl(std::make_unique>(std::move(cfg))) { } -token_metadata::~token_metadata() = default; +template +generic_token_metadata::~generic_token_metadata() = default; +template +generic_token_metadata::generic_token_metadata(generic_token_metadata&&) noexcept = default; -token_metadata::token_metadata(token_metadata&&) noexcept = default; - -token_metadata& token_metadata::token_metadata::operator=(token_metadata&&) noexcept = default; +template +generic_token_metadata& generic_token_metadata::generic_token_metadata::operator=(generic_token_metadata&&) noexcept = default; +template const std::vector& -token_metadata::sorted_tokens() const { +generic_token_metadata::sorted_tokens() const { return _impl->sorted_tokens(); } +template future<> -token_metadata::update_normal_tokens(std::unordered_set tokens, inet_address endpoint) { +generic_token_metadata::update_normal_tokens(std::unordered_set tokens, NodeId endpoint) { return _impl->update_normal_tokens(std::move(tokens), endpoint); } +template const token& -token_metadata::first_token(const token& start) const { +generic_token_metadata::first_token(const token& start) const { return _impl->first_token(start); } +template size_t -token_metadata::first_token_index(const token& start) const { +generic_token_metadata::first_token_index(const token& start) const { return _impl->first_token_index(start); } -std::optional -token_metadata::get_endpoint(const token& token) const { +template +std::optional +generic_token_metadata::get_endpoint(const token& token) const { return _impl->get_endpoint(token); } +template std::vector -token_metadata::get_tokens(const inet_address& addr) const { +generic_token_metadata::get_tokens(const NodeId& addr) const { return _impl->get_tokens(addr); } -const std::unordered_map& -token_metadata::get_token_to_endpoint() const { +template +const std::unordered_map& +generic_token_metadata::get_token_to_endpoint() const { return _impl->get_token_to_endpoint(); } -const std::unordered_set& -token_metadata::get_leaving_endpoints() const { +template +const std::unordered_set& +generic_token_metadata::get_leaving_endpoints() const { return _impl->get_leaving_endpoints(); } -const std::unordered_map& -token_metadata::get_bootstrap_tokens() const { +template +const std::unordered_map& +generic_token_metadata::get_bootstrap_tokens() const { return _impl->get_bootstrap_tokens(); } +template void -token_metadata::update_topology(inet_address ep, std::optional opt_dr, std::optional opt_st, std::optional shard_count) { +generic_token_metadata::update_topology(NodeId ep, std::optional opt_dr, std::optional opt_st, std::optional shard_count) { _impl->update_topology(ep, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); } -boost::iterator_range -token_metadata::ring_range(const token& start) const { +template +boost::iterator_range::tokens_iterator> +generic_token_metadata::ring_range(const token& start) const { return _impl->ring_range(start); } -boost::iterator_range -token_metadata::ring_range(dht::ring_position_view start) const { +template +boost::iterator_range::tokens_iterator> +generic_token_metadata::ring_range(dht::ring_position_view start) const { return _impl->ring_range(start); } @@ -965,210 +1035,251 @@ std::unique_ptr make_splitter(token_metadata_ptr return std::make_unique(std::move(tmptr)); } +template topology& -token_metadata::get_topology() { +generic_token_metadata::get_topology() { return _impl->get_topology(); } +template const topology& -token_metadata::get_topology() const { +generic_token_metadata::get_topology() const { return _impl->get_topology(); } +template void -token_metadata::debug_show() const { +generic_token_metadata::debug_show() const { _impl->debug_show(); } +template void -token_metadata::update_host_id(const host_id& host_id, inet_address endpoint) { +generic_token_metadata::update_host_id(const host_id& host_id, inet_address endpoint) { _impl->update_host_id(host_id, endpoint); } +template host_id -token_metadata::get_host_id(inet_address endpoint) const { +generic_token_metadata::get_host_id(inet_address endpoint) const { return _impl->get_host_id(endpoint); } +template std::optional -token_metadata::get_host_id_if_known(inet_address endpoint) const { +generic_token_metadata::get_host_id_if_known(inet_address endpoint) const { return _impl->get_host_id_if_known(endpoint); } -std::optional -token_metadata::get_endpoint_for_host_id(host_id host_id) const { +template +std::optional::inet_address> +generic_token_metadata::get_endpoint_for_host_id(host_id host_id) const { return _impl->get_endpoint_for_host_id(host_id); } -host_id_or_endpoint token_metadata::parse_host_id_and_endpoint(const sstring& host_id_string) const { +template +host_id_or_endpoint generic_token_metadata::parse_host_id_and_endpoint(const sstring& host_id_string) const { auto res = host_id_or_endpoint(host_id_string); res.resolve(*this); return res; } +template std::unordered_map -token_metadata::get_endpoint_to_host_id_map_for_reading() const { +generic_token_metadata::get_endpoint_to_host_id_map_for_reading() const { return _impl->get_endpoint_to_host_id_map_for_reading(); } +template void -token_metadata::add_bootstrap_token(token t, inet_address endpoint) { +generic_token_metadata::add_bootstrap_token(token t, NodeId endpoint) { _impl->add_bootstrap_token(t, endpoint); } +template void -token_metadata::add_bootstrap_tokens(std::unordered_set tokens, inet_address endpoint) { +generic_token_metadata::add_bootstrap_tokens(std::unordered_set tokens, NodeId endpoint) { _impl->add_bootstrap_tokens(std::move(tokens), endpoint); } +template void -token_metadata::remove_bootstrap_tokens(std::unordered_set tokens) { +generic_token_metadata::remove_bootstrap_tokens(std::unordered_set tokens) { _impl->remove_bootstrap_tokens(std::move(tokens)); } +template void -token_metadata::add_leaving_endpoint(inet_address endpoint) { +generic_token_metadata::add_leaving_endpoint(NodeId endpoint) { _impl->add_leaving_endpoint(endpoint); } +template void -token_metadata::del_leaving_endpoint(inet_address endpoint) { +generic_token_metadata::del_leaving_endpoint(NodeId endpoint) { _impl->del_leaving_endpoint(endpoint); } +template void -token_metadata::remove_endpoint(inet_address endpoint) { +generic_token_metadata::remove_endpoint(NodeId endpoint) { _impl->remove_endpoint(endpoint); _impl->sort_tokens(); } +template bool -token_metadata::is_normal_token_owner(inet_address endpoint) const { +generic_token_metadata::is_normal_token_owner(NodeId endpoint) const { return _impl->is_normal_token_owner(endpoint); } +template bool -token_metadata::is_leaving(inet_address endpoint) const { +generic_token_metadata::is_leaving(NodeId endpoint) const { return _impl->is_leaving(endpoint); } +template bool -token_metadata::is_being_replaced(inet_address endpoint) const { +generic_token_metadata::is_being_replaced(NodeId endpoint) const { return _impl->is_being_replaced(endpoint); } +template bool -token_metadata::is_any_node_being_replaced() const { +generic_token_metadata::is_any_node_being_replaced() const { return _impl->is_any_node_being_replaced(); } -void token_metadata::add_replacing_endpoint(inet_address existing_node, inet_address replacing_node) { +template +void generic_token_metadata::add_replacing_endpoint(NodeId existing_node, NodeId replacing_node) { _impl->add_replacing_endpoint(existing_node, replacing_node); } -void token_metadata::del_replacing_endpoint(inet_address existing_node) { +template +void generic_token_metadata::del_replacing_endpoint(NodeId existing_node) { _impl->del_replacing_endpoint(existing_node); } -future token_metadata::clone_async() const noexcept { - return _impl->clone_async().then([] (std::unique_ptr impl) { - return make_ready_future(std::move(impl)); +template +future> generic_token_metadata::clone_async() const noexcept { + return _impl->clone_async().then([] (std::unique_ptr> impl) { + return make_ready_future(std::move(impl)); }); } -future -token_metadata::clone_only_token_map() const noexcept { - return _impl->clone_only_token_map().then([] (std::unique_ptr impl) { - return token_metadata(std::move(impl)); +template +future> +generic_token_metadata::clone_only_token_map() const noexcept { + return _impl->clone_only_token_map().then([] (std::unique_ptr> impl) { + return generic_token_metadata(std::move(impl)); }); } -future -token_metadata::clone_after_all_left() const noexcept { - return _impl->clone_after_all_left().then([] (std::unique_ptr impl) { - return token_metadata(std::move(impl)); +template +future> +generic_token_metadata::clone_after_all_left() const noexcept { + return _impl->clone_after_all_left().then([] (std::unique_ptr> impl) { + return generic_token_metadata(std::move(impl)); }); } -future<> token_metadata::clear_gently() noexcept { +template +future<> generic_token_metadata::clear_gently() noexcept { return _impl->clear_gently(); } +template dht::token_range_vector -token_metadata::get_primary_ranges_for(std::unordered_set tokens) const { +generic_token_metadata::get_primary_ranges_for(std::unordered_set tokens) const { return _impl->get_primary_ranges_for(std::move(tokens)); } +template dht::token_range_vector -token_metadata::get_primary_ranges_for(token right) const { +generic_token_metadata::get_primary_ranges_for(token right) const { return _impl->get_primary_ranges_for(right); } +template boost::icl::interval::interval_type -token_metadata::range_to_interval(range r) { - return token_metadata_impl::range_to_interval(std::move(r)); +generic_token_metadata::range_to_interval(range r) { + return token_metadata_impl::range_to_interval(std::move(r)); } +template range -token_metadata::interval_to_range(boost::icl::interval::interval_type i) { - return token_metadata_impl::interval_to_range(std::move(i)); +generic_token_metadata::interval_to_range(boost::icl::interval::interval_type i) { + return token_metadata_impl::interval_to_range(std::move(i)); } +template future<> -token_metadata::update_topology_change_info(dc_rack_fn& get_dc_rack) { +generic_token_metadata::update_topology_change_info(dc_rack_fn& get_dc_rack) { return _impl->update_topology_change_info(get_dc_rack); } -const std::optional& -token_metadata::get_topology_change_info() const { +template +const std::optional>& +generic_token_metadata::get_topology_change_info() const { return _impl->get_topology_change_info(); } +template token -token_metadata::get_predecessor(token t) const { +generic_token_metadata::get_predecessor(token t) const { return _impl->get_predecessor(t); } -const std::unordered_set& -token_metadata::get_all_endpoints() const { +template +const std::unordered_set& +generic_token_metadata::get_all_endpoints() const { return _impl->get_all_endpoints(); } +template size_t -token_metadata::count_normal_token_owners() const { +generic_token_metadata::count_normal_token_owners() const { return _impl->count_normal_token_owners(); } +template void -token_metadata::set_read_new(read_new_t read_new) { +generic_token_metadata::set_read_new(read_new_t read_new) { _impl->set_read_new(read_new); } -std::map -token_metadata::get_normal_and_bootstrapping_token_to_endpoint_map() const { +template +std::map +generic_token_metadata::get_normal_and_bootstrapping_token_to_endpoint_map() const { return _impl->get_normal_and_bootstrapping_token_to_endpoint_map(); } +template long -token_metadata::get_ring_version() const { +generic_token_metadata::get_ring_version() const { return _impl->get_ring_version(); } +template void -token_metadata::invalidate_cached_rings() { +generic_token_metadata::invalidate_cached_rings() { _impl->invalidate_cached_rings(); } +template auto -token_metadata::get_version() const -> version_t { +generic_token_metadata::get_version() const -> version_t { return _impl->get_version(); } +template void -token_metadata::set_version(version_t version) { +generic_token_metadata::set_version(version_t version) { _impl->set_version(version); } +template void -token_metadata::set_version_tracker(version_tracker_t tracker) { +generic_token_metadata::set_version_tracker(version_tracker_t tracker) { _impl->set_version_tracker(std::move(tracker)); } @@ -1190,7 +1301,7 @@ void shared_token_metadata::set(mutable_token_metadata_ptr tmptr) noexcept { void shared_token_metadata::update_fence_version(token_metadata::version_t version) { if (const auto current_version = _shared->get_version(); version > current_version) { - // The token_metadata::version under no circumstance can go backwards. + // The generic_token_metadata::version under no circumstance can go backwards. // Even in case of topology change coordinator moving to another node // this condition must hold, that is why we treat its violation // as an internal error. @@ -1277,7 +1388,8 @@ host_id_or_endpoint::host_id_or_endpoint(const sstring& s, param_type restrict) } } -void host_id_or_endpoint::resolve(const token_metadata& tm) { +template +void host_id_or_endpoint::resolve(const generic_token_metadata& tm) { if (id) { auto endpoint_opt = tm.get_endpoint_for_host_id(id); if (!endpoint_opt) { @@ -1293,4 +1405,9 @@ void host_id_or_endpoint::resolve(const token_metadata& tm) { } } +template class generic_token_metadata; +template class generic_token_metadata; +template void host_id_or_endpoint::resolve(const token_metadata& tm); +template void host_id_or_endpoint::resolve(const token_metadata2& tm); + } // namespace locator diff --git a/locator/token_metadata.hh b/locator/token_metadata.hh index 841b68fd37..8b5b4310f6 100644 --- a/locator/token_metadata.hh +++ b/locator/token_metadata.hh @@ -43,7 +43,10 @@ class abstract_replication_strategy; using token = dht::token; -class token_metadata; +template +class generic_token_metadata; +using token_metadata = generic_token_metadata; +using token_metadata2 = generic_token_metadata; class tablet_metadata; struct host_id_or_endpoint { @@ -68,14 +71,16 @@ struct host_id_or_endpoint { // Map the host_id to endpoint based on whichever of them is set, // using the token_metadata - void resolve(const token_metadata& tm); + template + void resolve(const generic_token_metadata& tm); }; +template class token_metadata_impl; +template struct topology_change_info; -class token_metadata final { - std::unique_ptr _impl; +class generic_token_metadata_base { public: struct config { topology::config topo_cfg; @@ -83,6 +88,11 @@ public: using inet_address = gms::inet_address; using version_t = service::topology::version_t; using version_tracker_t = utils::phased_barrier::operation; +}; + +template +class generic_token_metadata final: public generic_token_metadata_base { + std::unique_ptr> _impl; private: friend class token_metadata_ring_splitter; class tokens_iterator { @@ -94,24 +104,24 @@ private: using reference = token&; public: tokens_iterator() = default; - tokens_iterator(const token& start, const token_metadata_impl* token_metadata); + tokens_iterator(const token& start, const token_metadata_impl* token_metadata); bool operator==(const tokens_iterator& it) const; const token& operator*() const; tokens_iterator& operator++(); private: std::vector::const_iterator _cur_it; size_t _remaining = 0; - const token_metadata_impl* _token_metadata = nullptr; + const token_metadata_impl* _token_metadata = nullptr; - friend class token_metadata_impl; + friend class token_metadata_impl; }; public: - token_metadata(config cfg); - explicit token_metadata(std::unique_ptr impl); - token_metadata(token_metadata&&) noexcept; // Can't use "= default;" - hits some static_assert in unique_ptr - token_metadata& operator=(token_metadata&&) noexcept; - ~token_metadata(); + generic_token_metadata(config cfg); + explicit generic_token_metadata(std::unique_ptr> impl); + generic_token_metadata(generic_token_metadata&&) noexcept; // Can't use "= default;" - hits some static_assert in unique_ptr + generic_token_metadata& operator=(generic_token_metadata&&) noexcept; + ~generic_token_metadata(); const std::vector& sorted_tokens() const; const tablet_metadata& tablets() const; tablet_metadata& tablets(); @@ -121,19 +131,19 @@ public: // // Note: the function is not exception safe! // It must be called only on a temporary copy of the token_metadata - future<> update_normal_tokens(std::unordered_set tokens, inet_address endpoint); + future<> update_normal_tokens(std::unordered_set tokens, NodeId endpoint); const token& first_token(const token& start) const; size_t first_token_index(const token& start) const; - std::optional get_endpoint(const token& token) const; - std::vector get_tokens(const inet_address& addr) const; - const std::unordered_map& get_token_to_endpoint() const; - const std::unordered_set& get_leaving_endpoints() const; - const std::unordered_map& get_bootstrap_tokens() const; + std::optional get_endpoint(const token& token) const; + std::vector get_tokens(const NodeId& addr) const; + const std::unordered_map& get_token_to_endpoint() const; + const std::unordered_set& get_leaving_endpoints() const; + const std::unordered_map& get_bootstrap_tokens() const; /** * Update or add endpoint given its inet_address and endpoint_dc_rack. */ - void update_topology(inet_address ep, std::optional opt_dr, std::optional opt_st = std::nullopt, + void update_topology(NodeId ep, std::optional opt_dr, std::optional opt_st = std::nullopt, std::optional shard_count = std::nullopt); /** * Creates an iterable range of the sorted tokens starting at the token t @@ -182,39 +192,39 @@ public: /// Returns host_id of the local node. host_id get_my_id() const; - void add_bootstrap_token(token t, inet_address endpoint); + void add_bootstrap_token(token t, NodeId endpoint); - void add_bootstrap_tokens(std::unordered_set tokens, inet_address endpoint); + void add_bootstrap_tokens(std::unordered_set tokens, NodeId endpoint); void remove_bootstrap_tokens(std::unordered_set tokens); - void add_leaving_endpoint(inet_address endpoint); - void del_leaving_endpoint(inet_address endpoint); + void add_leaving_endpoint(NodeId endpoint); + void del_leaving_endpoint(NodeId endpoint); - void remove_endpoint(inet_address endpoint); + void remove_endpoint(NodeId endpoint); // Checks if the node is part of the token ring. If yes, the node is one of // the nodes that owns the tokens and inside the set _normal_token_owners. - bool is_normal_token_owner(inet_address endpoint) const; + bool is_normal_token_owner(NodeId endpoint) const; - bool is_leaving(inet_address endpoint) const; + bool is_leaving(NodeId endpoint) const; // Is this node being replaced by another node - bool is_being_replaced(inet_address endpoint) const; + bool is_being_replaced(NodeId endpoint) const; // Is any node being replaced by another node bool is_any_node_being_replaced() const; - void add_replacing_endpoint(inet_address existing_node, inet_address replacing_node); + void add_replacing_endpoint(NodeId existing_node, NodeId replacing_node); - void del_replacing_endpoint(inet_address existing_node); + void del_replacing_endpoint(NodeId existing_node); /** * Create a full copy of token_metadata using asynchronous continuations. * The caller must ensure that the cloned object will not change if * the function yields. */ - future clone_async() const noexcept; + future clone_async() const noexcept; /** * Create a copy of TokenMetadata with only tokenToEndpointMap. That is, pending ranges, @@ -222,7 +232,7 @@ public: * The caller must ensure that the cloned object will not change if * the function yields. */ - future clone_only_token_map() const noexcept; + future clone_only_token_map() const noexcept; /** * Create a copy of TokenMetadata with tokenToEndpointMap reflecting situation after all * current leave operations have finished. @@ -231,7 +241,7 @@ public: * * @return a future holding a new token metadata */ - future clone_after_all_left() const noexcept; + future clone_after_all_left() const noexcept; /** * Gently clear the token_metadata members. @@ -251,13 +261,13 @@ public: static boost::icl::interval::interval_type range_to_interval(range r); static range interval_to_range(boost::icl::interval::interval_type i); - future<> update_topology_change_info(dc_rack_fn& get_dc_rack); + future<> update_topology_change_info(dc_rack_fn& get_dc_rack); - const std::optional& get_topology_change_info() const; + const std::optional>& get_topology_change_info() const; token get_predecessor(token t) const; - const std::unordered_set& get_all_endpoints() const; + const std::unordered_set& get_all_endpoints() const; /* Returns the number of different endpoints that own tokens in the ring. * Bootstrapping tokens are not taken into account. */ @@ -275,7 +285,7 @@ public: * @return a (stable copy, won't be modified) Token to Endpoint map for all the normal and bootstrapping nodes * in the cluster. */ - std::map get_normal_and_bootstrapping_token_to_endpoint_map() const; + std::map get_normal_and_bootstrapping_token_to_endpoint_map() const; long get_ring_version() const; void invalidate_cached_rings(); @@ -283,20 +293,26 @@ public: version_t get_version() const; void set_version(version_t version); - friend class token_metadata_impl; + friend class token_metadata_impl; friend class shared_token_metadata; private: void set_version_tracker(version_tracker_t tracker); }; +extern template class generic_token_metadata; +extern template class generic_token_metadata; +extern template void host_id_or_endpoint::resolve(const token_metadata& tm); +extern template void host_id_or_endpoint::resolve(const token_metadata2& tm); + +template struct topology_change_info { - token_metadata_ptr target_token_metadata; - token_metadata_ptr base_token_metadata; + lw_shared_ptr> target_token_metadata; + lw_shared_ptr> base_token_metadata; std::vector all_tokens; token_metadata::read_new_t read_new; - topology_change_info(token_metadata_ptr target_token_metadata_, - token_metadata_ptr base_token_metadata_, + topology_change_info(lw_shared_ptr> target_token_metadata_, + lw_shared_ptr> base_token_metadata_, std::vector all_tokens_, token_metadata::read_new_t read_new_); future<> clear_gently(); @@ -310,6 +326,11 @@ mutable_token_metadata_ptr make_token_metadata_ptr(Args... args) { return make_lw_shared(std::forward(args)...); } +template +mutable_token_metadata2_ptr make_token_metadata2_ptr(Args... args) { + return make_lw_shared(std::forward(args)...); +} + class shared_token_metadata { mutable_token_metadata_ptr _shared; token_metadata_lock_func _lock_func; diff --git a/locator/token_metadata_fwd.hh b/locator/token_metadata_fwd.hh index 07f470bca7..5e67605e3b 100644 --- a/locator/token_metadata_fwd.hh +++ b/locator/token_metadata_fwd.hh @@ -11,9 +11,13 @@ namespace locator { -class token_metadata; - +template +class generic_token_metadata; +using token_metadata = generic_token_metadata; using token_metadata_ptr = lw_shared_ptr; using mutable_token_metadata_ptr = lw_shared_ptr; +using token_metadata2 = generic_token_metadata; +using token_metadata2_ptr = lw_shared_ptr; +using mutable_token_metadata2_ptr = lw_shared_ptr; } // namespace locator diff --git a/locator/topology.cc b/locator/topology.cc index b23a6c238f..55e6631c46 100644 --- a/locator/topology.cc +++ b/locator/topology.cc @@ -491,6 +491,17 @@ bool topology::remove_endpoint(inet_address ep) return false; } +bool topology::remove_endpoint(locator::host_id host_id) +{ + auto node = find_node(host_id); + tlogger.debug("topology[{}]: remove_endpoint: host_id={}: {}", fmt::ptr(this), host_id, debug_format(node)); + if (node) { + remove_node(node); + return true; + } + return false; +} + bool topology::has_node(host_id id) const noexcept { auto node = find_node(id); tlogger.trace("topology[{}]: has_node: host_id={}: {}", fmt::ptr(this), id, debug_format(node)); diff --git a/locator/topology.hh b/locator/topology.hh index 5e87b66291..ad436af2bf 100644 --- a/locator/topology.hh +++ b/locator/topology.hh @@ -258,6 +258,8 @@ public: */ bool remove_endpoint(inet_address ep); + bool remove_endpoint(locator::host_id ep); + /** * Returns true iff contains given endpoint. */ @@ -422,6 +424,7 @@ private: return _nodes_by_endpoint; }; + template friend class token_metadata_impl; public: void test_compare_endpoints(const inet_address& address, const inet_address& a1, const inet_address& a2) const; diff --git a/node_ops/node_ops_ctl.hh b/node_ops/node_ops_ctl.hh index cc2d9dfdab..3d30136bc5 100644 --- a/node_ops/node_ops_ctl.hh +++ b/node_ops/node_ops_ctl.hh @@ -13,6 +13,7 @@ #include "locator/host_id.hh" #include "node_ops/id.hh" #include "schema/schema_fwd.hh" +#include "locator/host_id.hh" #include @@ -24,7 +25,10 @@ class storage_service; } namespace locator { -class token_metadata; +template +class generic_token_metadata; +using token_metadata = generic_token_metadata; +using token_metadata2 = generic_token_metadata; } class node_ops_info { From 9edf0709e60afe20d998310caf607301d2fcbb5a Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Fri, 20 Oct 2023 17:30:42 +0400 Subject: [PATCH 06/51] token_metadata: support host_id-based version In this commit we enhance token_metadata with a pointer to the new host_id-based generic_token_metadata specialisation (token_metadata2). The idea is that in the following commits we'll go over all token_metadata modifications and make the corresponding modifications to its new host_id-based alternative. The pointer to token_metadata2 is stored in the generic_token_metadata::_new_value field. The pointer can be mutable, immutable, or absent altogether (std::monostate). It's mutable if this generic_token_metadata owns it, meaning it was created using the generic_token_metadata(config cfg) constructor. It's immutable if the generic_token_metadata(lw_shared_ptr new_value); constructor was used. This means this old token_metadata is a wrapper for new token_metadata and we can only use the get_new() method on it. The field _new_value is empty for the new host_id-based token_metadata version. The generic_token_metadata(std::unique_ptr> impl, token_metadata2 new_value); constructor is used for clone methods. We clone both versions, and we need to pass a cloned token_metadata2 into constructor. There are two overloads of get_new, for mutable and immutable generic_token_metadata. Both of them throws an exception if they can't get the appropriate pointer. There is also a get_new_strong method, which returns an immutable owning pointer. This is convenient since a lot of API's want an owning pointer. We can't make the get_new/get_new_strong API simpler and use get_new_strong everywhere since it mutate the original generic_token_metadata by incrementing the reference counter and this causes raises when it's passed between shards in replicate_to_all_cores. --- locator/token_metadata.cc | 70 ++++++++++++++++++++++++++++++++------- locator/token_metadata.hh | 40 ++++++++++++++++++++++ 2 files changed, 98 insertions(+), 12 deletions(-) diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index a9d1639bfe..af8739e468 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -917,12 +917,36 @@ future<> topology_change_info::clear_gently() { template generic_token_metadata::generic_token_metadata(std::unique_ptr> impl) - : _impl(std::move(impl)) { + : _impl(std::move(impl)) +{ +} + +template +template +requires std::is_same_v +generic_token_metadata::generic_token_metadata(std::unique_ptr> impl, + token_metadata2 new_value) + : _impl(std::move(impl)) + , _new_value(make_token_metadata2_ptr(std::move(new_value))) +{ +} + +template +template +requires std::is_same_v +generic_token_metadata::generic_token_metadata(token_metadata2_ptr new_value) + : _impl(nullptr) + , _new_value(std::move(new_value)) +{ } template generic_token_metadata::generic_token_metadata(config cfg) - : _impl(std::make_unique>(std::move(cfg))) { + : _impl(std::make_unique>(cfg)) +{ + if constexpr (std::is_same_v) { + _new_value = make_token_metadata2_ptr(std::move(cfg)); + } } template @@ -1163,30 +1187,47 @@ void generic_token_metadata::del_replacing_endpoint(NodeId existing_node template future> generic_token_metadata::clone_async() const noexcept { - return _impl->clone_async().then([] (std::unique_ptr> impl) { - return make_ready_future(std::move(impl)); - }); + if constexpr (std::is_same_v) { + co_return !holds_alternative(_new_value) + ? generic_token_metadata(co_await _impl->clone_async(), co_await get_new()->clone_async()) + : generic_token_metadata(co_await _impl->clone_async()); + } else { + co_return generic_token_metadata(co_await _impl->clone_async()); + } } template future> generic_token_metadata::clone_only_token_map() const noexcept { - return _impl->clone_only_token_map().then([] (std::unique_ptr> impl) { - return generic_token_metadata(std::move(impl)); - }); + if constexpr (std::is_same_v) { + co_return !holds_alternative(_new_value) + ? generic_token_metadata(co_await _impl->clone_only_token_map(), co_await get_new()->clone_only_token_map()) + : generic_token_metadata(co_await _impl->clone_only_token_map()); + } else { + co_return generic_token_metadata(co_await _impl->clone_only_token_map()); + } } template future> generic_token_metadata::clone_after_all_left() const noexcept { - return _impl->clone_after_all_left().then([] (std::unique_ptr> impl) { - return generic_token_metadata(std::move(impl)); - }); + if constexpr (std::is_same_v) { + co_return !holds_alternative(_new_value) + ? generic_token_metadata(co_await _impl->clone_after_all_left(), co_await get_new()->clone_after_all_left()) + : generic_token_metadata(co_await _impl->clone_after_all_left()); + } else { + co_return generic_token_metadata(co_await _impl->clone_after_all_left()); + } } template future<> generic_token_metadata::clear_gently() noexcept { - return _impl->clear_gently(); + co_await _impl->clear_gently(); + if constexpr (std::is_same_v) { + if (holds_alternative>(_new_value)) { + co_await get_new()->clear_gently(); + } + } } template @@ -1409,5 +1450,10 @@ template class generic_token_metadata; template class generic_token_metadata; template void host_id_or_endpoint::resolve(const token_metadata& tm); template void host_id_or_endpoint::resolve(const token_metadata2& tm); +template token_metadata2* generic_token_metadata::get_new<>(); +template const token_metadata2* generic_token_metadata::get_new<>() const; +template lw_shared_ptr generic_token_metadata::get_new_strong<>() const; +template generic_token_metadata::generic_token_metadata(std::unique_ptr>, token_metadata2); +template generic_token_metadata::generic_token_metadata(token_metadata2_ptr); } // namespace locator diff --git a/locator/token_metadata.hh b/locator/token_metadata.hh index 8b5b4310f6..64e13e6866 100644 --- a/locator/token_metadata.hh +++ b/locator/token_metadata.hh @@ -93,6 +93,7 @@ public: template class generic_token_metadata final: public generic_token_metadata_base { std::unique_ptr> _impl; + std::variant, lw_shared_ptr> _new_value; private: friend class token_metadata_ring_splitter; class tokens_iterator { @@ -119,6 +120,12 @@ private: public: generic_token_metadata(config cfg); explicit generic_token_metadata(std::unique_ptr> impl); + template + requires std::is_same_v + generic_token_metadata(std::unique_ptr> impl, token_metadata2 new_value); + template + requires std::is_same_v + generic_token_metadata(lw_shared_ptr new_value); generic_token_metadata(generic_token_metadata&&) noexcept; // Can't use "= default;" - hits some static_assert in unique_ptr generic_token_metadata& operator=(generic_token_metadata&&) noexcept; ~generic_token_metadata(); @@ -140,6 +147,39 @@ public: const std::unordered_set& get_leaving_endpoints() const; const std::unordered_map& get_bootstrap_tokens() const; + template + requires std::is_same_v + token_metadata2* get_new() { + if (holds_alternative>(_new_value)) { + return get>(_new_value).get(); + } + throw_with_backtrace("no mutable new value"); + } + + template + requires std::is_same_v + const token_metadata2* get_new() const { + if (holds_alternative>(_new_value)) { + return get>(_new_value).get(); + } + if (holds_alternative>(_new_value)) { + return get>(_new_value).get(); + } + throw_with_backtrace("no new value"); + } + + template + requires std::is_same_v + lw_shared_ptr get_new_strong() const { + if (holds_alternative>(_new_value)) { + return get>(_new_value); + } + if (holds_alternative>(_new_value)) { + return get>(_new_value); + } + throw_with_backtrace("no new value"); + } + /** * Update or add endpoint given its inet_address and endpoint_dc_rack. */ From 39bbe5f457781638cbc615b19f690e5e61e5261e Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Tue, 28 Nov 2023 11:41:42 +0400 Subject: [PATCH 07/51] token_metadata: add get_all_ips method This is convenient for migrating code that uses get_all_endpoints. --- locator/token_metadata.cc | 14 ++++++++++++++ locator/token_metadata.hh | 4 ++++ 2 files changed, 18 insertions(+) diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index af8739e468..d0b37ed29b 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -1278,6 +1278,19 @@ generic_token_metadata::get_all_endpoints() const { return _impl->get_all_endpoints(); } +template +template +requires std::is_same_v +std::unordered_set generic_token_metadata::get_all_ips() const { + const auto& host_ids = _impl->get_all_endpoints(); + std::unordered_set result; + result.reserve(host_ids.size()); + for (const auto& id: host_ids) { + result.insert(_impl->get_endpoint_for_host_id(id)); + } + return result; +} + template size_t generic_token_metadata::count_normal_token_owners() const { @@ -1455,5 +1468,6 @@ template const token_metadata2* generic_token_metadata::get_n template lw_shared_ptr generic_token_metadata::get_new_strong<>() const; template generic_token_metadata::generic_token_metadata(std::unique_ptr>, token_metadata2); template generic_token_metadata::generic_token_metadata(token_metadata2_ptr); +template std::unordered_set generic_token_metadata::get_all_ips<>() const; } // namespace locator diff --git a/locator/token_metadata.hh b/locator/token_metadata.hh index 64e13e6866..da90112b35 100644 --- a/locator/token_metadata.hh +++ b/locator/token_metadata.hh @@ -309,6 +309,10 @@ public: const std::unordered_set& get_all_endpoints() const; + template + requires std::is_same_v + std::unordered_set get_all_ips() const; + /* Returns the number of different endpoints that own tokens in the ring. * Bootstrapping tokens are not taken into account. */ size_t count_normal_token_owners() const; From 08b47d645a4108b1491103558348b7244c7e243a Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Tue, 5 Dec 2023 20:01:49 +0400 Subject: [PATCH 08/51] token_metadata: get_host_id: exception -> on_internal_error It's a bug to use get_host_id on a non-existent endpoint, so on_internal_error is more appropriate. Also, it's easier to debug since it provides a backtrace. If a missing inet_address is expected, get_host_id_if_known should be used instead. We update one such case in storage_service::force_remove_completion. Other usages of get_host_id are correct. --- locator/token_metadata.cc | 2 +- service/storage_service.cc | 12 +++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index d0b37ed29b..4249e9c136 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -560,7 +560,7 @@ host_id token_metadata_impl::get_host_id(inet_address endpoint) const { if (const auto* node = _topology.find_node(endpoint)) [[likely]] { return node->host_id(); } else { - throw std::runtime_error(format("host_id for endpoint {} is not found", endpoint)); + on_internal_error(tlogger, format("host_id for endpoint {} is not found", endpoint)); } } diff --git a/service/storage_service.cc b/service/storage_service.cc index 0147d37d15..19186cb7b4 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -7180,17 +7180,15 @@ future<> storage_service::force_remove_completion() { auto leaving = tm.get_leaving_endpoints(); slogger.warn("Removal not confirmed, Leaving={}", leaving); for (auto endpoint : leaving) { - locator::host_id host_id; - auto tokens = tm.get_tokens(endpoint); - try { - host_id = tm.get_host_id(endpoint); - } catch (...) { + const auto host_id = tm.get_host_id_if_known(endpoint); + if (!host_id) { slogger.warn("No host_id is found for endpoint {}", endpoint); continue; } + auto tokens = tm.get_tokens(endpoint); auto permit = co_await ss._gossiper.lock_endpoint(endpoint, gms::null_permit_id); const auto& pid = permit.id(); - co_await ss._gossiper.advertise_token_removed(endpoint, host_id, pid); + co_await ss._gossiper.advertise_token_removed(endpoint, *host_id, pid); std::unordered_set tokens_set(tokens.begin(), tokens.end()); co_await ss.excise(tokens_set, endpoint, pid); @@ -7198,7 +7196,7 @@ future<> storage_service::force_remove_completion() { assert(ss._group0); bool raft_available = co_await ss._group0->wait_for_raft(); if (raft_available) { - co_await ss._group0->remove_from_group0(raft::server_id{host_id.uuid()}); + co_await ss._group0->remove_from_group0(raft::server_id{host_id->uuid()}); } } } else { From 5a1418fdba5d315ec0ed55c993a3cd7fe1bdb73f Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Tue, 5 Dec 2023 20:07:44 +0400 Subject: [PATCH 09/51] token_metadata: get_endpoint_for_host_id -> get_endpoint_for_host_id_if_known This commit fixes an inconsistency in method names: get_host_id and get_host_id_if_known are (internal_error, returns null), but there was only one method for the opposite conversion - get_endpoint_for_host_id, and it returns null. In this commit we change it to on_internal_error if it can't find the argument and add another method get_endpoint_for_host_id_if_known which returns null in this case. We can't use get_endpoint_for_host_id/get_host_id in host_id_or_endpoint::resolve since it's called from storage_service::parse_node_list -> token_metadata::parse_host_id_and_endpoint, and exceptions are caught and handled in `storage_service::parse_node_list`. --- db/view/view.cc | 2 +- locator/tablets.cc | 11 ++------- locator/token_metadata.cc | 26 +++++++++++++++++--- locator/token_metadata.hh | 5 +++- service/storage_proxy.cc | 2 +- service/storage_service.cc | 4 +-- test/boost/network_topology_strategy_test.cc | 8 +----- 7 files changed, 33 insertions(+), 25 deletions(-) diff --git a/db/view/view.cc b/db/view/view.cc index 34000c47a7..726974fadd 100644 --- a/db/view/view.cc +++ b/db/view/view.cc @@ -2579,7 +2579,7 @@ future check_view_build_ongoing(db::system_distributed_keyspace& sys_dist_ return sys_dist_ks.view_status(ks_name, cf_name).then([&tm] (view_statuses_type&& view_statuses) { return boost::algorithm::any_of(view_statuses, [&tm] (const view_statuses_type::value_type& view_status) { // Only consider status of known hosts. - return view_status.second == "STARTED" && tm.get_endpoint_for_host_id(view_status.first); + return view_status.second == "STARTED" && tm.get_endpoint_for_host_id_if_known(view_status.first); }); }); } diff --git a/locator/tablets.cc b/locator/tablets.cc index 5e7e81679a..3b731324b0 100644 --- a/locator/tablets.cc +++ b/locator/tablets.cc @@ -334,18 +334,11 @@ class tablet_effective_replication_map : public effective_replication_map { table_id _table; tablet_sharder _sharder; private: - gms::inet_address get_endpoint_for_host_id(host_id host) const { - auto endpoint_opt = _tmptr->get_endpoint_for_host_id(host); - if (!endpoint_opt) { - on_internal_error(tablet_logger, format("Host ID {} not found in the cluster", host)); - } - return *endpoint_opt; - } inet_address_vector_replica_set to_replica_set(const tablet_replica_set& replicas) const { inet_address_vector_replica_set result; result.reserve(replicas.size()); for (auto&& replica : replicas) { - result.emplace_back(get_endpoint_for_host_id(replica.host)); + result.emplace_back(_tmptr->get_endpoint_for_host_id(replica.host)); } return result; } @@ -406,7 +399,7 @@ public: case write_replica_set_selector::both: tablet_logger.trace("get_pending_endpoints({}): table={}, tablet={}, replica={}", search_token, _table, tablet, info->pending_replica); - return {get_endpoint_for_host_id(info->pending_replica.host)}; + return {_tmptr->get_endpoint_for_host_id(info->pending_replica.host)}; case write_replica_set_selector::next: return {}; } diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index 4249e9c136..a6f82aa3de 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -167,8 +167,11 @@ public: /// Return the unique host ID for an end-point or nullopt if not found. std::optional get_host_id_if_known(inet_address endpoint) const; - /** Return the end-point for a unique host ID */ - std::optional get_endpoint_for_host_id(host_id) const; + /** Return the end-point for a unique host ID or nullopt if not found.*/ + std::optional get_endpoint_for_host_id_if_known(host_id) const; + + /** Return the end-point for a unique host ID.*/ + inet_address get_endpoint_for_host_id(host_id) const; /** @return a copy of the endpoint-to-id map for read-only operations */ std::unordered_map get_endpoint_to_host_id_map_for_reading() const; @@ -574,7 +577,7 @@ std::optional token_metadata_impl::get_host_id_if_known(inet_ad } template -std::optional token_metadata_impl::get_endpoint_for_host_id(host_id host_id) const { +std::optional token_metadata_impl::get_endpoint_for_host_id_if_known(host_id host_id) const { if (const auto* node = _topology.find_node(host_id)) [[likely]] { return node->endpoint(); } else { @@ -582,6 +585,15 @@ std::optional token_metadata_impl::get_endpoint_for_host_i } } +template +inet_address token_metadata_impl::get_endpoint_for_host_id(host_id host_id) const { + if (const auto* node = _topology.find_node(host_id)) [[likely]] { + return node->endpoint(); + } else { + on_internal_error(tlogger, format("endpoint for host_id {} is not found", host_id)); + } +} + template std::unordered_map token_metadata_impl::get_endpoint_to_host_id_map_for_reading() const { const auto& nodes = _topology.get_nodes_by_endpoint(); @@ -1097,6 +1109,12 @@ generic_token_metadata::get_host_id_if_known(inet_address endpoint) cons template std::optional::inet_address> +generic_token_metadata::get_endpoint_for_host_id_if_known(host_id host_id) const { + return _impl->get_endpoint_for_host_id_if_known(host_id); +} + +template +typename generic_token_metadata::inet_address generic_token_metadata::get_endpoint_for_host_id(host_id host_id) const { return _impl->get_endpoint_for_host_id(host_id); } @@ -1445,7 +1463,7 @@ host_id_or_endpoint::host_id_or_endpoint(const sstring& s, param_type restrict) template void host_id_or_endpoint::resolve(const generic_token_metadata& tm) { if (id) { - auto endpoint_opt = tm.get_endpoint_for_host_id(id); + auto endpoint_opt = tm.get_endpoint_for_host_id_if_known(id); if (!endpoint_opt) { throw std::runtime_error(format("Host ID {} not found in the cluster", id)); } diff --git a/locator/token_metadata.hh b/locator/token_metadata.hh index da90112b35..e58a21d405 100644 --- a/locator/token_metadata.hh +++ b/locator/token_metadata.hh @@ -219,8 +219,11 @@ public: /// Return the unique host ID for an end-point or nullopt if not found. std::optional get_host_id_if_known(inet_address endpoint) const; + /** Return the end-point for a unique host ID or nullopt if not found. */ + std::optional get_endpoint_for_host_id_if_known(locator::host_id host_id) const; + /** Return the end-point for a unique host ID */ - std::optional get_endpoint_for_host_id(locator::host_id host_id) const; + inet_address get_endpoint_for_host_id(locator::host_id host_id) const; /// Parses the \c host_id_string either as a host uuid or as an ip address and returns the mapping. /// Throws std::invalid_argument on parse error or std::runtime_error if the host_id wasn't found. diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc index 749ed3a23d..ee038d3b3d 100644 --- a/service/storage_proxy.cc +++ b/service/storage_proxy.cc @@ -2291,7 +2291,7 @@ replica_ids_to_endpoints(const locator::token_metadata& tm, const std::vector storage_service::handle_state_normal(inet_address endpoint, gms::permit }; // Order Matters, TM.updateHostID() should be called before TM.updateNormalToken(), (see CASSANDRA-4300). auto host_id = _gossiper.get_host_id(endpoint); - auto existing = tmptr->get_endpoint_for_host_id(host_id); + auto existing = tmptr->get_endpoint_for_host_id_if_known(host_id); if (existing && *existing != endpoint) { if (*existing == get_broadcast_address()) { slogger.warn("Not updating host ID {} for {} because it's mine", host_id, endpoint); @@ -5136,7 +5136,7 @@ future<> storage_service::removenode(locator::host_id host_id, std::listget_endpoint_for_host_id(host_id); + auto endpoint_opt = tmptr->get_endpoint_for_host_id_if_known(host_id); assert(ss._group0); auto raft_id = raft::server_id{host_id.uuid()}; bool raft_available = ss._group0->wait_for_raft().get(); diff --git a/test/boost/network_topology_strategy_test.cc b/test/boost/network_topology_strategy_test.cc index 80fd48443e..96885556f7 100644 --- a/test/boost/network_topology_strategy_test.cc +++ b/test/boost/network_topology_strategy_test.cc @@ -194,17 +194,11 @@ void full_ring_check(const tablet_map& tmap, auto& tm = *tmptr; const auto& topo = tm.get_topology(); - auto get_endpoint_for_host_id = [&] (host_id host) { - auto endpoint_opt = tm.get_endpoint_for_host_id(host); - assert(endpoint_opt); - return *endpoint_opt; - }; - auto to_endpoint_set = [&] (const tablet_replica_set& replicas) { inet_address_vector_replica_set result; result.reserve(replicas.size()); for (auto&& replica : replicas) { - result.emplace_back(get_endpoint_for_host_id(replica.host)); + result.emplace_back(tm.get_endpoint_for_host_id(replica.host)); } return result; }; From e4253776a1c9af6060e4108efd60516799e64c61 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Mon, 4 Dec 2023 13:11:50 +0400 Subject: [PATCH 10/51] locator::topology: allow being_replaced and replacing nodes to have the same IP When we're replacing a node with the same IP address, we want the following behavior: * host_id -> IP mapping should work and return the same IP address for two different host_ids - old and new. * the IP -> host_id mapping should return the host_id of the old (replaced) host. This variant is most convenient for preserving the current behavior of the code, especially the functions maybe_remove_node_being_replaced, erm::get_natural_endpoints_without_node_being_replaced, erm::get_pending_endpoints. The 'being_replaced' node will be properly removed in maybe_remove_node_being_replaced and 'replacing' node will be added to the pending_endpoints. --- locator/topology.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/locator/topology.cc b/locator/topology.cc index 55e6631c46..0b10606414 100644 --- a/locator/topology.cc +++ b/locator/topology.cc @@ -318,7 +318,12 @@ void topology::index_node(const node* node) { if (node->endpoint() != inet_address{}) { auto eit = _nodes_by_endpoint.find(node->endpoint()); if (eit != _nodes_by_endpoint.end()) { - if (eit->second->is_leaving() || eit->second->left()) { + if (eit->second->get_state() == node::state::replacing && node->get_state() == node::state::being_replaced) { + // replace-with-same-ip, map ip to the old node + _nodes_by_endpoint.erase(node->endpoint()); + } else if (eit->second->get_state() == node::state::being_replaced && node->get_state() == node::state::replacing) { + // replace-with-same-ip, map ip to the old node, do nothing if it's already the case + } else if (eit->second->is_leaving() || eit->second->left()) { _nodes_by_endpoint.erase(node->endpoint()); } else if (!node->is_leaving() && !node->left()) { if (node->host_id()) { From 66c30e4f8ed81d98f434ee2f5614c75d4057f7ed Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Sun, 22 Oct 2023 18:32:06 +0400 Subject: [PATCH 11/51] topology: set self host_id on the new topology With this commit, we begin the next stage of the refactoring - updating the new version of the token_metadata in all places where the old version is currently being updated. In this commit we assign host_id of this node, both in main.cc and in boost tests. --- main.cc | 2 ++ test/lib/cql_test_env.cc | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/main.cc b/main.cc index 1a01486031..e60816bcd1 100644 --- a/main.cc +++ b/main.cc @@ -1212,6 +1212,8 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl // still be found in the config. tm.get_topology().set_host_id_cfg(host_id); tm.get_topology().add_or_update_endpoint(endpoint, host_id); + tm.get_new()->get_topology().set_host_id_cfg(host_id); + tm.get_new()->get_topology().add_or_update_endpoint(endpoint, host_id); return make_ready_future<>(); }).get(); diff --git a/test/lib/cql_test_env.cc b/test/lib/cql_test_env.cc index edd09e2478..0f6321ef25 100644 --- a/test/lib/cql_test_env.cc +++ b/test/lib/cql_test_env.cc @@ -648,6 +648,13 @@ private: std::nullopt, locator::node::state::normal, smp::count); + auto& topo_new = tm.get_new()->get_topology(); + topo_new.set_host_id_cfg(hostid); + topo_new.add_or_update_endpoint(utils::fb_utilities::get_broadcast_address(), + hostid, + std::nullopt, + locator::node::state::normal, + smp::count); return make_ready_future<>(); }).get(); From e7e1c4e63c3a3b61e6ee59ab2e3637684c171f4f Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Wed, 4 Oct 2023 12:46:52 +0200 Subject: [PATCH 12/51] storage_service: adjust update_topology_change_info to update new token_metadata Both versions of the token_metadata need to be updated. For the new version we provide a dc_rack_fn function which looks for dc_rack by host_id in topology_state_machine if raft topology is on. Otherwise, it looks for IP for the given host_id and falls back to the gossiper-based function get_dc_rack_for. --- service/storage_service.cc | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/service/storage_service.cc b/service/storage_service.cc index c4cdc7c0ea..586685c786 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -6111,6 +6111,23 @@ future<> storage_service::update_topology_change_info(mutable_token_metadata_ptr try { locator::dc_rack_fn get_dc_rack_from_gossiper([this] (inet_address ep) { return get_dc_rack_for(ep); }); co_await tmptr->update_topology_change_info(get_dc_rack_from_gossiper); + + locator::dc_rack_fn get_dc_rack_by_host_id([this, &tm = *tmptr->get_new()] (locator::host_id host_id) -> std::optional { + if (_raft_topology_change_enabled) { + const auto server_id = raft::server_id(host_id.uuid()); + const auto* node = _topology_state_machine._topology.find(server_id); + if (node) { + return locator::endpoint_dc_rack { + .dc = node->second.datacenter, + .rack = node->second.rack, + }; + } + return std::nullopt; + } + + return get_dc_rack_for(tm.get_endpoint_for_host_id(host_id)); + }); + co_await tmptr->get_new()->update_topology_change_info(get_dc_rack_by_host_id); } catch (...) { auto ep = std::current_exception(); slogger.error("Failed to update topology change info for {}: {}", reason, ep); From b6fbbe28aa74108576e7034c5f619fb1dfe1390b Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Thu, 23 Nov 2023 12:49:09 +0400 Subject: [PATCH 13/51] storage_service: topology_state_load: fill new token_metadata For each inet_address-based modification of token_metadata we make a corresponding host_id-based change in token_metadata->get_new(). The _gossiper.add_saved_endpoint logic is switched to the new token_metadata. --- service/storage_service.cc | 31 ++++++++++++++++++++++--------- service/storage_service.hh | 3 +++ 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 586685c786..0abcb0821b 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -399,11 +399,15 @@ future<> storage_service::topology_state_load() { co_await tmptr->clear_gently(); // drop previous state tmptr->set_version(_topology_state_machine._topology.version); + tmptr->get_new()->set_version(_topology_state_machine._topology.version); auto update_topology = [&] (locator::host_id id, inet_address ip, const replica_state& rs) { tmptr->update_topology(ip, locator::endpoint_dc_rack{rs.datacenter, rs.rack}, to_topology_node_state(rs.state), rs.shard_count); + tmptr->get_new()->update_topology(id, locator::endpoint_dc_rack{rs.datacenter, rs.rack}, + to_topology_node_state(rs.state), rs.shard_count); tmptr->update_host_id(id, ip); + tmptr->get_new()->update_host_id(id, ip); }; auto add_normal_node = [&] (raft::server_id id, const replica_state& rs) -> future<> { @@ -432,13 +436,14 @@ future<> storage_service::topology_state_load() { } update_topology(host_id, ip, rs); co_await tmptr->update_normal_tokens(rs.ring.value().tokens, ip); + co_await tmptr->get_new()->update_normal_tokens(rs.ring.value().tokens, host_id); }; for (const auto& [id, rs]: _topology_state_machine._topology.normal_nodes) { co_await add_normal_node(id, rs); } - tmptr->set_read_new(std::invoke([](std::optional state) { + const auto read_new = std::invoke([](std::optional state) { using read_new_t = locator::token_metadata::read_new_t; if (!state.has_value()) { return read_new_t::no; @@ -457,7 +462,9 @@ future<> storage_service::topology_state_load() { case topology::transition_state::write_both_read_new: return read_new_t::yes; } - }, _topology_state_machine._topology.tstate)); + }, _topology_state_machine._topology.tstate); + tmptr->set_read_new(read_new); + tmptr->get_new()->set_read_new(read_new); for (const auto& [id, rs]: _topology_state_machine._topology.transition_nodes) { locator::host_id host_id{id.uuid()}; @@ -484,8 +491,10 @@ future<> storage_service::topology_state_load() { // (such as the CDC generation write). // It doesn't break anything to set the tokens to normal early in this single-node case. co_await tmptr->update_normal_tokens(rs.ring.value().tokens, ip); + co_await tmptr->get_new()->update_normal_tokens(rs.ring.value().tokens, host_id); } else { tmptr->add_bootstrap_tokens(rs.ring.value().tokens, ip); + tmptr->get_new()->add_bootstrap_tokens(rs.ring.value().tokens, host_id); co_await update_topology_change_info(tmptr, ::format("bootstrapping node {}/{}", id, ip)); } } @@ -494,7 +503,9 @@ future<> storage_service::topology_state_load() { case node_state::removing: update_topology(host_id, ip, rs); co_await tmptr->update_normal_tokens(rs.ring.value().tokens, ip); + co_await tmptr->get_new()->update_normal_tokens(rs.ring.value().tokens, host_id); tmptr->add_leaving_endpoint(ip); + tmptr->get_new()->add_leaving_endpoint(host_id); co_await update_topology_change_info(tmptr, ::format("{} {}/{}", rs.state, id, ip)); break; case node_state::replacing: { @@ -507,11 +518,11 @@ future<> storage_service::topology_state_load() { on_fatal_internal_error(slogger, ::format("Cannot map id of a node being replaced {} to its ip", replaced_id)); } assert(existing_ip); - // FIXME: Topology cannot hold two IPs with different host ids yet so - // when replacing we must advertise the replaced_id for the ip, otherwise - // topology will complain about host id of a local node changing and fail. - update_topology(ip == existing_ip ? locator::host_id(replaced_id.uuid()) : host_id, ip, rs); + const auto replaced_host_id = locator::host_id(replaced_id.uuid()); + tmptr->get_new()->update_topology(replaced_host_id, std::nullopt, locator::node::state::being_replaced); + update_topology(host_id, ip, rs); tmptr->add_replacing_endpoint(*existing_ip, ip); + tmptr->get_new()->add_replacing_endpoint(replaced_host_id, host_id); co_await update_topology_change_info(tmptr, ::format("replacing {}/{} by {}/{}", replaced_id, *existing_ip, id, ip)); } } @@ -545,9 +556,11 @@ future<> storage_service::topology_state_load() { // of the cluster state. To work correctly, the gossiper needs to know the current // endpoints. We cannot rely on seeds alone, since it is not guaranteed that seeds // will be up to date and reachable at the time of restart. - for (const auto& e: get_token_metadata_ptr()->get_all_endpoints()) { - if (!is_me(e) && !_gossiper.get_endpoint_state_ptr(e)) { - co_await _gossiper.add_saved_endpoint(e); + const auto* tmptr = get_token_metadata_ptr()->get_new(); + for (const auto& e: tmptr->get_all_endpoints()) { + const auto ep = tmptr->get_endpoint_for_host_id(e); + if (!is_me(e) && !_gossiper.get_endpoint_state_ptr(ep)) { + co_await _gossiper.add_saved_endpoint(ep); } } diff --git a/service/storage_service.hh b/service/storage_service.hh index 99bf6f58c6..c8ad5970c4 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -270,6 +270,9 @@ private: bool is_me(inet_address addr) const noexcept { return get_token_metadata_ptr()->get_topology().is_me(addr); } + bool is_me(locator::host_id id) const noexcept { + return get_token_metadata_ptr()->get_topology().is_me(id); + } /* This abstraction maintains the token/endpoint metadata information */ shared_token_metadata& _shared_token_metadata; From 6412cd64f135524ba3205785ad7874e9f9d565b2 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Thu, 19 Oct 2023 18:51:45 +0400 Subject: [PATCH 14/51] storage_service: handle_state_normal: fill new token_metadata --- service/storage_service.cc | 83 +++++++++++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 0abcb0821b..4bff5a4b00 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -3694,29 +3694,86 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit std::unordered_set endpoints_to_remove; auto do_remove_node = [&] (gms::inet_address node) { + // this lambda is called in three cases: + // 1. old endpoint for the given host_id is ours, we remove the new endpoint; + // 2. new endpoint for the given host_id has bigger generation, we remove the old endpoint; + // 3. old endpoint for the given host_id has bigger generation, we remove the new endpoint. + // In all of these cases host_id is retained, only the IP addresses are changed. + // That's why we don't need to call remove_endpoint on tmptr->get_new(). + // However, it will be called eventually through the chain storage_service::remove_endpoint -> + // _gossiper.remove_endpoint -> storage_service::on_remove, and we should handle + // the case when we wouldn't be able to find endpoint -> ip mapping in tm->get_new(). + // This could happen e.g. when the new endpoint has bigger generation - the code + // below will remap host_id to new IP and we won't find old IP in storage_service::on_remove. + // We should just skip the remove in that case. + tmptr->remove_endpoint(node); endpoints_to_remove.insert(node); }; // Order Matters, TM.updateHostID() should be called before TM.updateNormalToken(), (see CASSANDRA-4300). auto host_id = _gossiper.get_host_id(endpoint); auto existing = tmptr->get_endpoint_for_host_id_if_known(host_id); + + // Old node in replace-with-same-IP scenario. + std::optional replaced_id; + if (existing && *existing != endpoint) { + // This branch in taken when a node changes its IP address. + if (*existing == get_broadcast_address()) { slogger.warn("Not updating host ID {} for {} because it's mine", host_id, endpoint); do_remove_node(endpoint); } else if (_gossiper.compare_endpoint_startup(endpoint, *existing) > 0) { + // The new IP has greater generation than the existing one. + // Here we remap the host_id to the new IP. The 'owned_tokens' calculation logic below + // won't detect any changes - the branch 'endpoint == current_owner' will be taken. + // We still need to call 'remove_endpoint' for existing IP to remove it from system.peers. + slogger.warn("Host ID collision for {} between {} and {}; {} is the new owner", host_id, *existing, endpoint, endpoint); do_remove_node(*existing); slogger.info("Set host_id={} to be owned by node={}, existing={}", host_id, endpoint, *existing); tmptr->update_host_id(host_id, endpoint); + tmptr->get_new()->update_host_id(host_id, endpoint); } else { + // The new IP has smaller generation than the existing one, + // we are going to remove it, so we add it to the endpoints_to_remove. + // How does this relate to the tokens this endpoint may have? + // There is a condition below which checks that if endpoints_to_remove + // contains 'endpoint', then the owned_tokens must be empty, otherwise internal_error + // is triggered. This means the following is expected to be true: + // 1. each token from the tokens variable (which is read from gossiper) must have an owner node + // 2. this owner must be different from 'endpoint' + // 3. its generation must be greater than endpoint's + slogger.warn("Host ID collision for {} between {} and {}; ignored {}", host_id, *existing, endpoint, endpoint); do_remove_node(endpoint); } } else if (existing && *existing == endpoint) { + // This branch is taken for all gossiper-managed topology operations. + // For example, if this node is a member of the cluster and a new node is added, + // handle_state_normal is called on this node as the final step + // in the endpoint bootstrap process. + // This method is also called for both replace scenarios - with either the same or with a different IP. + // If the new node has a different IP, the old IP is removed by the block of + // logic below - we detach the old IP from token ring, + // it gets added to candidates_for_removal, then storage_service::remove_endpoint -> + // _gossiper.remove_endpoint -> storage_service::on_remove -> remove from token_metadata. + // If the new node has the same IP, we need to explicitly remove old host_id from + // token_metadata, since no IPs will be removed in this case. + // We do this after update_normal_tokens, allowing for tokens to be properly + // migrated to the new host_id. tmptr->del_replacing_endpoint(endpoint); + if (const auto old_host_id = tmptr->get_new()->get_host_id_if_known(endpoint); old_host_id && *old_host_id != host_id) { + replaced_id = *old_host_id; + } } else { + // This branch is taken if this node wasn't involved in node_ops + // workflow (storage_service::node_ops_cmd_handler wasn't called on it) and it just + // receives the current state of the cluster from the gossiper. + // For example, a new node receives this notification for every + // existing node in the cluster. tmptr->del_replacing_endpoint(endpoint); + auto nodes = _gossiper.get_nodes_with_host_id(host_id); bool left = std::any_of(nodes.begin(), nodes.end(), [this] (const gms::inet_address& node) { return _gossiper.is_left(node); }); if (left) { @@ -3726,6 +3783,7 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit } slogger.info("Set host_id={} to be owned by node={}", host_id, endpoint); tmptr->update_host_id(host_id, endpoint); + tmptr->get_new()->update_host_id(host_id, endpoint); } // Tokens owned by the handled endpoint. @@ -3739,6 +3797,16 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit std::unordered_map token_to_endpoint_map = get_token_metadata().get_token_to_endpoint(); std::unordered_set candidates_for_removal; + // Here we convert tokens from gossiper to owned_tokens, which will be assigned as a new + // normal tokens to token_metadata and its new host_id-based version. + // This transformation accounts for situations where some tokens + // belong to outdated nodes - the ones with smaller generation. + // We use endpoints instead of host_ids here since gossiper operates + // with endpoints and generations are tied to endpoints, not host_ids. + // In replace-with-same-ip scenario we won't be able to distinguish + // between the old and new IP owners, so we assume the old replica + // is down and won't be resurrected. + for (auto t : tokens) { // we don't want to update if this node is responsible for the token and it has a later startup time than endpoint. auto current = token_to_endpoint_map.find(t); @@ -3806,8 +3874,21 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit do_notify_joined = true; } - tmptr->update_topology(endpoint, get_dc_rack_for(endpoint), locator::node::state::normal); + const auto dc_rack = get_dc_rack_for(endpoint); + tmptr->update_topology(endpoint, dc_rack, locator::node::state::normal); + tmptr->get_new()->update_topology(host_id, dc_rack, locator::node::state::normal); co_await tmptr->update_normal_tokens(owned_tokens, endpoint); + co_await tmptr->get_new()->update_normal_tokens(owned_tokens, host_id); + if (replaced_id) { + if (tmptr->get_new()->is_normal_token_owner(*replaced_id)) { + on_internal_error(slogger, ::format("replaced endpoint={}/{} still owns tokens {}", + endpoint, *replaced_id, tmptr->get_new()->get_tokens(*replaced_id))); + } else { + tmptr->get_new()->remove_endpoint(*replaced_id); + slogger.info("node {}/{} is removed from token_metadata since it's replaced by {}/{} ", + endpoint, *replaced_id, endpoint, host_id); + } + } } co_await update_topology_change_info(tmptr, ::format("handle_state_normal {}", endpoint)); From 711aaa0e2968ecac00878ff6b1157948b0b9602d Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Sun, 22 Oct 2023 13:40:18 +0400 Subject: [PATCH 15/51] storage_service: on_remove: update new token_metadata --- service/storage_service.cc | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 4bff5a4b00..375b2d3b30 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -3701,11 +3701,7 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit // In all of these cases host_id is retained, only the IP addresses are changed. // That's why we don't need to call remove_endpoint on tmptr->get_new(). // However, it will be called eventually through the chain storage_service::remove_endpoint -> - // _gossiper.remove_endpoint -> storage_service::on_remove, and we should handle - // the case when we wouldn't be able to find endpoint -> ip mapping in tm->get_new(). - // This could happen e.g. when the new endpoint has bigger generation - the code - // below will remap host_id to new IP and we won't find old IP in storage_service::on_remove. - // We should just skip the remove in that case. + // _gossiper.remove_endpoint -> storage_service::on_remove. tmptr->remove_endpoint(node); endpoints_to_remove.insert(node); @@ -4061,6 +4057,13 @@ future<> storage_service::on_remove(gms::inet_address endpoint, gms::permit_id p auto tmlock = co_await get_token_metadata_lock(); auto tmptr = co_await get_mutable_token_metadata_ptr(); tmptr->remove_endpoint(endpoint); + // We should handle the case when we aren't able to find endpoint -> ip mapping in tm->get_new(). + // This could happen e.g. when the new endpoint has bigger generation in handle_state_normal - the code + // in handle_state_normal will remap host_id to the new IP and we won't find + // old IP here. We should just skip the remove in that case. + if (const auto host_id = tmptr->get_new()->get_host_id_if_known(endpoint); host_id) { + tmptr->get_new()->remove_endpoint(*host_id); + } co_await update_topology_change_info(tmptr, ::format("on_remove {}", endpoint)); co_await replicate_to_all_cores(std::move(tmptr)); } From 23811486d893b1741280ed34f48e178f907dc4c2 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Sun, 22 Oct 2023 13:51:28 +0400 Subject: [PATCH 16/51] storage_service: join_cluster: update new token_metadata --- service/storage_service.cc | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 375b2d3b30..fdd6c97929 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -4260,11 +4260,17 @@ future<> storage_service::join_cluster(sharded& // entry has been mistakenly added, delete it co_await _sys_ks.local().remove_endpoint(ep); } else { - tmptr->update_topology(ep, get_dc_rack(ep), locator::node::state::normal); + const auto dc_rack = get_dc_rack(ep); + tmptr->update_topology(ep, dc_rack, locator::node::state::normal); co_await tmptr->update_normal_tokens(tokens, ep); - if (loaded_host_ids.contains(ep)) { - tmptr->update_host_id(loaded_host_ids.at(ep), ep); + const auto hostIdIt = loaded_host_ids.find(ep); + if (hostIdIt == loaded_host_ids.end()) { + on_internal_error(slogger, format("can't find host_id for ep {}", ep)); } + tmptr->update_host_id(hostIdIt->second, ep); + tmptr->get_new()->update_topology(hostIdIt->second, dc_rack, locator::node::state::normal); + co_await tmptr->get_new()->update_normal_tokens(tokens, hostIdIt->second); + tmptr->get_new()->update_host_id(hostIdIt->second, ep); loaded_endpoints.insert(ep); co_await _gossiper.add_saved_endpoint(ep); } From fde20bddc071a26e2fb74dc6b1e078180e0625b9 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Sun, 22 Oct 2023 14:01:20 +0400 Subject: [PATCH 17/51] storage_service: excise: update new token_metadata excise is called from handle_state_left, the endpoint may have already been removed from tm by then - test_raft_upgrade_majority_loss fails if we use unconditional tmptr->get_new()->get_host_id instead of get_host_id_if_known --- service/storage_service.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/service/storage_service.cc b/service/storage_service.cc index fdd6c97929..636a385a26 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -6022,6 +6022,10 @@ future<> storage_service::excise(std::unordered_set tokens, inet_address auto tmptr = co_await get_mutable_token_metadata_ptr(); tmptr->remove_endpoint(endpoint); tmptr->remove_bootstrap_tokens(tokens); + if (const auto host_id = tmptr->get_new()->get_host_id_if_known(endpoint); host_id) { + tmptr->get_new()->remove_endpoint(*host_id); + } + tmptr->get_new()->remove_bootstrap_tokens(tokens); co_await update_topology_change_info(tmptr, ::format("excise {}", endpoint)); co_await replicate_to_all_cores(std::move(tmptr)); From c20c8c653c4d6163046aa981dd1ea5bf9cb92c10 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Tue, 5 Dec 2023 17:53:44 +0400 Subject: [PATCH 18/51] storage_service: join_token_ring: update new token_metadata --- service/storage_service.cc | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 636a385a26..abae6ca2a1 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -3062,6 +3062,12 @@ future<> storage_service::join_token_ring(shardedupdate_topology(*replace_address, std::move(ri->dc_rack), locator::node::state::being_replaced); co_await tmptr->update_normal_tokens(bootstrap_tokens, *replace_address); + + tmptr->get_new()->update_topology(tmptr->get_my_id(), std::nullopt, locator::node::state::replacing); + tmptr->get_new()->update_topology(ri->host_id, std::move(ri->dc_rack), locator::node::state::being_replaced); + co_await tmptr->get_new()->update_normal_tokens(bootstrap_tokens, ri->host_id); + tmptr->get_new()->update_host_id(ri->host_id, *replace_address); + replaced_host_id = ri->host_id; } } else if (should_bootstrap()) { @@ -3102,7 +3108,9 @@ future<> storage_service::join_token_ring(shardedupdate_topology(get_broadcast_address(), _snitch.local()->get_location(), locator::node::state::normal); + tmptr->get_new()->update_topology(tmptr->get_new()->get_my_id(), _snitch.local()->get_location(), locator::node::state::normal); co_await tmptr->update_normal_tokens(my_tokens, get_broadcast_address()); + co_await tmptr->get_new()->update_normal_tokens(my_tokens, tmptr->get_new()->get_my_id()); cdc_gen_id = co_await _sys_ks.local().get_cdc_generation_id(); if (!cdc_gen_id) { @@ -3129,6 +3137,7 @@ future<> storage_service::join_token_ring(shardedupdate_host_id(local_host_id, get_broadcast_address()); + tmptr->get_new()->update_host_id(local_host_id, get_broadcast_address()); } // Replicate the tokens early because once gossip runs other nodes @@ -3419,12 +3428,14 @@ future<> storage_service::join_token_ring(sharded future<> { // This node must know about its chosen tokens before other nodes do // since they may start sending writes to this node after it gossips status = NORMAL. // Therefore, in case we haven't updated _token_metadata with our tokens yet, do it now. tmptr->update_topology(get_broadcast_address(), _snitch.local()->get_location(), locator::node::state::normal); - return tmptr->update_normal_tokens(bootstrap_tokens, get_broadcast_address()); + co_await tmptr->update_normal_tokens(bootstrap_tokens, get_broadcast_address()); + tmptr->get_new()->update_topology(tmptr->get_new()->get_my_id(), _snitch.local()->get_location(), locator::node::state::normal); + co_await tmptr->get_new()->update_normal_tokens(bootstrap_tokens, tmptr->get_new()->get_my_id()); }); if (!_sys_ks.local().bootstrap_complete()) { From 2794b14a807630da0e416dad9a2589436477dd69 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Sun, 22 Oct 2023 18:49:03 +0400 Subject: [PATCH 19/51] storage_service: bootstrap: update new token_metadata --- service/storage_service.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/service/storage_service.cc b/service/storage_service.cc index abae6ca2a1..31c5228ef6 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -3575,6 +3575,8 @@ future<> storage_service::bootstrap(std::unordered_set& bootstrap_tokens, auto endpoint = get_broadcast_address(); tmptr->update_topology(endpoint, _snitch.local()->get_location(), locator::node::state::bootstrapping); tmptr->add_bootstrap_tokens(bootstrap_tokens, endpoint); + tmptr->get_new()->update_topology(tmptr->get_new()->get_my_id(), _snitch.local()->get_location(), locator::node::state::bootstrapping); + tmptr->get_new()->add_bootstrap_tokens(bootstrap_tokens, tmptr->get_new()->get_my_id()); return update_topology_change_info(std::move(tmptr), ::format("bootstrapping node {}", endpoint)); }).get(); } From 1745a1551a35cd09da8ae322e163135ae29e9af9 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Tue, 5 Dec 2023 14:02:49 +0400 Subject: [PATCH 20/51] storage_service: node_ops_cmd_handler: add coordinator_host_id We'll need it in the next commits to address to replacing and bootstrapping nodes by id. We assume this change will be shipped in 6.0 with upgrade from 5.4, where host_id already exists in client_info. We don't support upgrade between non-adjacent versions. --- service/storage_service.cc | 10 +++++++--- service/storage_service.hh | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 31c5228ef6..c612e83d0f 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -5462,7 +5462,7 @@ void storage_service::node_ops_insert(node_ops_id ops_uuid, on_node_ops_registered(ops_uuid); } -future storage_service::node_ops_cmd_handler(gms::inet_address coordinator, node_ops_cmd_request req) { +future storage_service::node_ops_cmd_handler(gms::inet_address coordinator, std::optional, node_ops_cmd_request req) { return seastar::async([this, coordinator, req = std::move(req)] () mutable { auto ops_uuid = req.ops_uuid; auto topo_guard = null_topology_guard; @@ -7172,8 +7172,12 @@ future storage_service::join_node_response_handler(jo void storage_service::init_messaging_service(bool raft_topology_change_enabled) { _messaging.local().register_node_ops_cmd([this] (const rpc::client_info& cinfo, node_ops_cmd_request req) { auto coordinator = cinfo.retrieve_auxiliary("baddr"); - return container().invoke_on(0, [coordinator, req = std::move(req)] (auto& ss) mutable { - return ss.node_ops_cmd_handler(coordinator, std::move(req)); + std::optional coordinator_host_id; + if (const auto* id = cinfo.retrieve_auxiliary_opt("host_id")) { + coordinator_host_id = *id; + } + return container().invoke_on(0, [coordinator, coordinator_host_id, req = std::move(req)] (auto& ss) mutable { + return ss.node_ops_cmd_handler(coordinator, coordinator_host_id, std::move(req)); }); }); if (raft_topology_change_enabled) { diff --git a/service/storage_service.hh b/service/storage_service.hh index c8ad5970c4..f12b21d0a9 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -656,7 +656,7 @@ public: * @param hostIdString token for the node */ future<> removenode(locator::host_id host_id, std::list ignore_nodes); - future node_ops_cmd_handler(gms::inet_address coordinator, node_ops_cmd_request req); + future node_ops_cmd_handler(gms::inet_address coordinator, std::optional coordinator_host_id, node_ops_cmd_request req); void node_ops_cmd_check(gms::inet_address coordinator, const node_ops_cmd_request& req); future<> node_ops_cmd_heartbeat_updater(node_ops_cmd cmd, node_ops_id uuid, std::list nodes, lw_shared_ptr heartbeat_updater_done); void on_node_ops_registered(node_ops_id); From 278c8322859536fdb7bf9ba54f2e65d5c45fe04e Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Tue, 31 Oct 2023 23:16:56 +0400 Subject: [PATCH 21/51] storage_service: node_ops_cmd_handler: update new token_metadata --- service/storage_service.cc | 63 ++++++++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 10 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index c612e83d0f..5f834470c0 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -5462,8 +5462,8 @@ void storage_service::node_ops_insert(node_ops_id ops_uuid, on_node_ops_registered(ops_uuid); } -future storage_service::node_ops_cmd_handler(gms::inet_address coordinator, std::optional, node_ops_cmd_request req) { - return seastar::async([this, coordinator, req = std::move(req)] () mutable { +future storage_service::node_ops_cmd_handler(gms::inet_address coordinator, std::optional coordinator_host_id, node_ops_cmd_request req) { + return seastar::async([this, coordinator, coordinator_host_id, req = std::move(req)] () mutable { auto ops_uuid = req.ops_uuid; auto topo_guard = null_topology_guard; slogger.debug("node_ops_cmd_handler cmd={}, ops_uuid={}", req.cmd, ops_uuid); @@ -5506,6 +5506,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad for (auto& node : req.leaving_nodes) { slogger.info("removenode[{}]: Added node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator); tmptr->add_leaving_endpoint(node); + tmptr->get_new()->add_leaving_endpoint(tmptr->get_new()->get_host_id(node)); } return update_topology_change_info(tmptr, ::format("removenode {}", req.leaving_nodes)); }).get(); @@ -5514,6 +5515,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad for (auto& node : req.leaving_nodes) { slogger.info("removenode[{}]: Removed node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator); tmptr->del_leaving_endpoint(node); + tmptr->get_new()->del_leaving_endpoint(tmptr->get_new()->get_host_id(node)); } return update_topology_change_info(tmptr, ::format("removenode {}", req.leaving_nodes)); }); @@ -5554,6 +5556,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad for (auto& node : req.leaving_nodes) { slogger.info("decommission[{}]: Added node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator); tmptr->add_leaving_endpoint(node); + tmptr->get_new()->add_leaving_endpoint(tmptr->get_new()->get_host_id(node)); } return update_topology_change_info(tmptr, ::format("decommission {}", req.leaving_nodes)); }).get(); @@ -5562,6 +5565,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad for (auto& node : req.leaving_nodes) { slogger.info("decommission[{}]: Removed node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator); tmptr->del_leaving_endpoint(node); + tmptr->get_new()->del_leaving_endpoint(tmptr->get_new()->get_host_id(node)); } return update_topology_change_info(tmptr, ::format("decommission {}", req.leaving_nodes)); }); @@ -5607,23 +5611,51 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad slogger.warn("{}", msg); throw std::runtime_error(msg); } - mutate_token_metadata([coordinator, &req, this] (mutable_token_metadata_ptr tmptr) mutable { + if (!coordinator_host_id) { + throw std::runtime_error("Coordinator host_id not found"); + } + mutate_token_metadata([coordinator, coordinator_host_id, &req, this] (mutable_token_metadata_ptr tmptr) mutable { for (auto& x: req.replace_nodes) { auto existing_node = x.first; auto replacing_node = x.second; - slogger.info("replace[{}]: Added replacing_node={} to replace existing_node={}, coordinator={}", req.ops_uuid, replacing_node, existing_node, coordinator); + const auto existing_node_id = tmptr->get_new()->get_host_id(existing_node); + const auto replacing_node_id = *coordinator_host_id; + slogger.info("replace[{}]: Added replacing_node={}/{} to replace existing_node={}/{}, coordinator={}/{}", + req.ops_uuid, replacing_node, replacing_node_id, existing_node, existing_node_id, coordinator, *coordinator_host_id); tmptr->update_topology(replacing_node, get_dc_rack_for(replacing_node), locator::node::state::replacing); tmptr->add_replacing_endpoint(existing_node, replacing_node); + + // In case of replace-with-same-ip we need to map both host_id-s + // to the same IP. The locator::topology allows this specifically in case + // where one node is being_replaced and another is replacing, + // so here we adjust the state of the original node accordingly. + // The host_id -> IP map works as usual, and IP -> host_id will map + // IP to the being_replaced node - this is what is implied by the + // current code. The IP will be placed in pending_endpoints and + // excluded from normal_endpoints (maybe_remove_node_being_replaced function). + // In handle_state_normal we'll remap the IP to the new host_id. + tmptr->get_new()->update_topology(existing_node_id, std::nullopt, locator::node::state::being_replaced); + tmptr->get_new()->update_topology(replacing_node_id, get_dc_rack_for(replacing_node), locator::node::state::replacing); + tmptr->get_new()->update_host_id(replacing_node_id, replacing_node); + tmptr->get_new()->add_replacing_endpoint(existing_node_id, replacing_node_id); } return make_ready_future<>(); }).get(); - node_ops_insert(ops_uuid, coordinator, std::move(req.ignore_nodes), [this, coordinator, req = std::move(req)] () mutable { - return mutate_token_metadata([this, coordinator, req = std::move(req)] (mutable_token_metadata_ptr tmptr) mutable { + node_ops_insert(ops_uuid, coordinator, std::move(req.ignore_nodes), [this, coordinator, coordinator_host_id, req = std::move(req)] () mutable { + return mutate_token_metadata([this, coordinator, coordinator_host_id, req = std::move(req)] (mutable_token_metadata_ptr tmptr) mutable { for (auto& x: req.replace_nodes) { auto existing_node = x.first; auto replacing_node = x.second; - slogger.info("replace[{}]: Removed replacing_node={} to replace existing_node={}, coordinator={}", req.ops_uuid, replacing_node, existing_node, coordinator); + const auto existing_node_id = tmptr->get_new()->get_host_id(existing_node); + const auto replacing_node_id = *coordinator_host_id; + slogger.info("replace[{}]: Removed replacing_node={}/{} to replace existing_node={}/{}, coordinator={}/{}", + req.ops_uuid, replacing_node, replacing_node_id, existing_node, existing_node_id, coordinator, *coordinator_host_id); tmptr->del_replacing_endpoint(existing_node); + + tmptr->get_new()->del_replacing_endpoint(existing_node_id); + const auto dc_rack = get_dc_rack_for(replacing_node); + tmptr->get_new()->update_topology(existing_node_id, dc_rack, locator::node::state::normal); + tmptr->get_new()->remove_endpoint(replacing_node_id); } return update_topology_change_info(tmptr, ::format("replace {}", req.replace_nodes)); }); @@ -5659,13 +5691,23 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad slogger.warn("{}", msg); throw std::runtime_error(msg); } - mutate_token_metadata([coordinator, &req, this] (mutable_token_metadata_ptr tmptr) mutable { + if (!coordinator_host_id) { + throw std::runtime_error("Coordinator host_id not found"); + } + mutate_token_metadata([coordinator, coordinator_host_id, &req, this] (mutable_token_metadata_ptr tmptr) mutable { for (auto& x: req.bootstrap_nodes) { auto& endpoint = x.first; auto tokens = std::unordered_set(x.second.begin(), x.second.end()); - slogger.info("bootstrap[{}]: Added node={} as bootstrap, coordinator={}", req.ops_uuid, endpoint, coordinator); - tmptr->update_topology(endpoint, get_dc_rack_for(endpoint), locator::node::state::bootstrapping); + const auto host_id = *coordinator_host_id; + const auto dc_rack = get_dc_rack_for(endpoint); + slogger.info("bootstrap[{}]: Added node={}/{} as bootstrap, coordinator={}/{}", + req.ops_uuid, endpoint, host_id, coordinator, *coordinator_host_id); + tmptr->update_topology(endpoint, dc_rack, locator::node::state::bootstrapping); tmptr->add_bootstrap_tokens(tokens, endpoint); + + tmptr->get_new()->update_host_id(host_id, endpoint); + tmptr->get_new()->update_topology(host_id, dc_rack, locator::node::state::bootstrapping); + tmptr->get_new()->add_bootstrap_tokens(tokens, host_id); } return update_topology_change_info(tmptr, ::format("bootstrap {}", req.bootstrap_nodes)); }).get(); @@ -5676,6 +5718,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad auto tokens = std::unordered_set(x.second.begin(), x.second.end()); slogger.info("bootstrap[{}]: Removed node={} as bootstrap, coordinator={}", req.ops_uuid, endpoint, coordinator); tmptr->remove_bootstrap_tokens(tokens); + tmptr->get_new()->remove_bootstrap_tokens(tokens); } return update_topology_change_info(tmptr, ::format("bootstrap {}", req.bootstrap_nodes)); }); From 0aab20d3feff6374cd070549a3356657999f663c Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Sun, 22 Oct 2023 19:57:57 +0400 Subject: [PATCH 22/51] storage_service: leave_ring: update new token_metadata --- service/storage_service.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/service/storage_service.cc b/service/storage_service.cc index 5f834470c0..7b74ff0eba 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -6101,6 +6101,7 @@ future<> storage_service::leave_ring() { co_await mutate_token_metadata([this] (mutable_token_metadata_ptr tmptr) { auto endpoint = get_broadcast_address(); tmptr->remove_endpoint(endpoint); + tmptr->get_new()->remove_endpoint(tmptr->get_new()->get_my_id()); return update_topology_change_info(std::move(tmptr), ::format("leave_ring {}", endpoint)); }); From 4e03ba3ede2203ebb23ed891264a5edf814c66f9 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Sun, 22 Oct 2023 19:59:18 +0400 Subject: [PATCH 23/51] storage_service: snitch_reconfigured: update new token_metadata --- service/storage_service.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/service/storage_service.cc b/service/storage_service.cc index 7b74ff0eba..b837a728a1 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -6345,6 +6345,7 @@ future<> storage_service::snitch_reconfigured() { co_await mutate_token_metadata([&] (mutable_token_metadata_ptr tmptr) -> future<> { // re-read local rack and DC info tmptr->update_topology(get_broadcast_address(), snitch->get_location()); + tmptr->get_new()->update_topology(tmptr->get_new()->get_my_id(), snitch->get_location()); return make_ready_future<>(); }); From 5c04a47d6fd3789347a49bcfa82da93ccbb22145 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Sun, 22 Oct 2023 20:09:51 +0400 Subject: [PATCH 24/51] storage_service: handle_state_bootstrap: update new token_metadata --- service/storage_service.cc | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index b837a728a1..133c406072 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -3668,21 +3668,29 @@ future<> storage_service::handle_state_bootstrap(inet_address endpoint, gms::per // continue. auto tmlock = co_await get_token_metadata_lock(); auto tmptr = co_await get_mutable_token_metadata_ptr(); - if (tmptr->is_normal_token_owner(endpoint)) { - // If isLeaving is false, we have missed both LEAVING and LEFT. However, if - // isLeaving is true, we have only missed LEFT. Waiting time between completing - // leave operation and rebootstrapping is relatively short, so the latter is quite - // common (not enough time for gossip to spread). Therefore we report only the - // former in the log. - if (!tmptr->is_leaving(endpoint)) { - slogger.info("Node {} state jump to bootstrap", endpoint); + auto update_tm = [&](locator::generic_token_metadata& tm, NodeId n, std::optional dc_rack) { + if (tm.is_normal_token_owner(n)) { + // If isLeaving is false, we have missed both LEAVING and LEFT. However, if + // isLeaving is true, we have only missed LEFT. Waiting time between completing + // leave operation and rebootstrapping is relatively short, so the latter is quite + // common (not enough time for gossip to spread). Therefore we report only the + // former in the log. + if (!tm.is_leaving(n)) { + slogger.info("Node {} state jump to bootstrap", n); + } + tm.remove_endpoint(n); } - tmptr->remove_endpoint(endpoint); - } - tmptr->update_topology(endpoint, get_dc_rack_for(endpoint), locator::node::state::bootstrapping); - tmptr->add_bootstrap_tokens(tokens, endpoint); - tmptr->update_host_id(_gossiper.get_host_id(endpoint), endpoint); + tm.update_topology(n, dc_rack, locator::node::state::bootstrapping); + tm.add_bootstrap_tokens(tokens, n); + }; + const auto dc_rack = get_dc_rack_for(endpoint); + const auto host_id = _gossiper.get_host_id(endpoint); + update_tm(*tmptr, endpoint, dc_rack); + update_tm(*tmptr->get_new(), host_id, dc_rack); + tmptr->update_host_id(host_id, endpoint); + tmptr->get_new()->update_host_id(host_id, endpoint); + co_await update_topology_change_info(tmptr, ::format("handle_state_bootstrap {}", endpoint)); co_await replicate_to_all_cores(std::move(tmptr)); } From 90234861acb8af0c644bfb2f637b177a8b8b9cad Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Sun, 22 Oct 2023 20:27:44 +0400 Subject: [PATCH 25/51] storage_service: on_alive: update new token_metadata --- service/storage_service.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 133c406072..c31e77324b 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -4007,7 +4007,11 @@ future<> storage_service::on_alive(gms::inet_address endpoint, gms::endpoint_sta } else { auto tmlock = co_await get_token_metadata_lock(); auto tmptr = co_await get_mutable_token_metadata_ptr(); - tmptr->update_topology(endpoint, get_dc_rack_for(endpoint)); + const auto dc_rack = get_dc_rack_for(endpoint); + tmptr->update_topology(endpoint, dc_rack); + const auto host_id = _gossiper.get_host_id(endpoint); + tmptr->get_new()->update_host_id(host_id, endpoint); + tmptr->get_new()->update_topology(host_id, dc_rack); co_await replicate_to_all_cores(std::move(tmptr)); } } From 1960436d9315873ff2c55686d3ecd80c198d0229 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Sun, 29 Oct 2023 22:22:01 +0400 Subject: [PATCH 26/51] network_topology_strategy_test: update new token_metadata --- test/boost/network_topology_strategy_test.cc | 41 ++++++++++++++------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/test/boost/network_topology_strategy_test.cc b/test/boost/network_topology_strategy_test.cc index 96885556f7..4f8368d308 100644 --- a/test/boost/network_topology_strategy_test.cc +++ b/test/boost/network_topology_strategy_test.cc @@ -251,13 +251,21 @@ void simple_test() { // Initialize the token_metadata stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { - auto& topo = tm.get_topology(); - for (const auto& [ring_point, endpoint, id] : ring_points) { - std::unordered_set tokens; - tokens.insert({dht::token::kind::key, d2t(ring_point / ring_points.size())}); - topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal); - co_await tm.update_normal_tokens(std::move(tokens), endpoint); - } + auto update_tm = [&](generic_token_metadata& tm) -> future<> { + auto& topo = tm.get_topology(); + for (const auto& [ring_point, endpoint, id] : ring_points) { + std::unordered_set tokens; + tokens.insert({dht::token::kind::key, d2t(ring_point / ring_points.size())}); + topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal); + if constexpr(std::is_same_v) { + co_await tm.update_normal_tokens(std::move(tokens), endpoint); + } else { + co_await tm.update_normal_tokens(std::move(tokens), id); + } + } + }; + co_await update_tm(tm); + co_await update_tm(*tm.get_new()); }).get(); ///////////////////////////////////// @@ -294,6 +302,7 @@ void simple_test() { // stm.mutate_token_metadata([] (token_metadata& tm) { tm.invalidate_cached_rings(); + tm.get_new()->invalidate_cached_rings(); return make_ready_future<>(); }).get(); full_ring_check(ring_points, options320, ars_ptr, stm.get()); @@ -358,11 +367,19 @@ void heavy_origin_test() { } stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { - auto& topo = tm.get_topology(); - for (const auto& [ring_point, endpoint, id] : ring_points) { - topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal); - co_await tm.update_normal_tokens(std::move(tokens[endpoint]), endpoint); - } + auto update_tm = [&](generic_token_metadata& tm) -> future<> { + auto& topo = tm.get_topology(); + for (const auto& [ring_point, endpoint, id] : ring_points) { + topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal); + if constexpr (std::is_same_v) { + co_await tm.update_normal_tokens(tokens[endpoint], endpoint); + } else { + co_await tm.update_normal_tokens(tokens[endpoint], id); + } + } + }; + co_await update_tm(tm); + co_await update_tm(*tm.get_new()); }).get(); auto ars_ptr = abstract_replication_strategy::create_replication_strategy( From d5b4b02b28a239e81cbc797db5bfe19b52db5f1d Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Tue, 24 Oct 2023 12:15:45 +0400 Subject: [PATCH 27/51] abstract_replication_strategy: calculate_natural_endpoints: make it work with both versions of token_metadata We've updated all the places where token_metadata is mutated, and now we can progress to the next stage of the refactoring - gradually switching the read code paths. The calculate_natural_endpoints function is at the core of all of them. It decides to what nodes the given token should be replicated to for the given token_metadata. It has a lot of usages in various contexts, we can't switch them all in one commit, so instead we allowed the function to behave in both ways. If use_host_id parameter is false, the function uses the provided token_metadata as is and returns endpoint_set as a result. If it's true, it uses get_new() on the provided token_metadata and returns host_id_set as a result. The scope of the whole refactoring is limited to the erm data structure, its interface will be kept inet_address based for now. This means we'll often need to resolve host_ids to inet_address-es as soon as we got a result from calculated_natural_endpoints. A new calculate_natural_ips function is added for convenience. It uses the new token_metadata and immediately resolves returned host_id-s to inet_address-es. The auxiliary declarations natural_ep_type, set_type, vector_type, get_self_id, select_tm are introduced only for the sake of migration, they will be removed later. --- inet_address_vectors.hh | 5 ++++ locator/abstract_replication_strategy.cc | 31 +++++++++++++++----- locator/abstract_replication_strategy.hh | 24 ++++++++++++++- locator/everywhere_replication_strategy.cc | 10 ++++--- locator/everywhere_replication_strategy.hh | 2 +- locator/local_strategy.cc | 6 ++-- locator/local_strategy.hh | 2 +- locator/network_topology_strategy.cc | 29 ++++++++++-------- locator/network_topology_strategy.hh | 4 +-- locator/simple_strategy.cc | 8 +++-- locator/simple_strategy.hh | 2 +- repair/repair.cc | 6 ++-- service/storage_service.cc | 2 +- test/boost/network_topology_strategy_test.cc | 2 +- 14 files changed, 93 insertions(+), 40 deletions(-) diff --git a/inet_address_vectors.hh b/inet_address_vectors.hh index 859b448adf..5408a51be4 100644 --- a/inet_address_vectors.hh +++ b/inet_address_vectors.hh @@ -9,8 +9,13 @@ #pragma once #include "gms/inet_address.hh" +#include "locator/host_id.hh" #include "utils/small_vector.hh" using inet_address_vector_replica_set = utils::small_vector; using inet_address_vector_topology_change = utils::small_vector; + +using host_id_vector_replica_set = utils::small_vector; + +using host_id_vector_topology_change = utils::small_vector; diff --git a/locator/abstract_replication_strategy.cc b/locator/abstract_replication_strategy.cc index fbdb579977..123a0b7a23 100644 --- a/locator/abstract_replication_strategy.cc +++ b/locator/abstract_replication_strategy.cc @@ -19,6 +19,18 @@ namespace locator { +static endpoint_set resolve_endpoints(const host_id_set& host_ids, const token_metadata2& tm) { + endpoint_set result{}; + result.reserve(host_ids.size()); + for (const auto& host_id: host_ids) { + // Empty host_id is used as a marker for local address. + // The reason for this hack is that we need local_strategy to + // work before the local host_id is loaded from the system.local table. + result.push_back(host_id ? tm.get_endpoint_for_host_id(host_id) : tm.get_topology().my_address()); + } + return result; +} + logging::logger rslogger("replication_strategy"); abstract_replication_strategy::abstract_replication_strategy( @@ -56,6 +68,11 @@ void abstract_replication_strategy::validate_replication_strategy(const sstring& } } +future abstract_replication_strategy::calculate_natural_ips(const token& search_token, const token_metadata2_ptr& tm) const { + const auto host_ids = co_await calculate_natural_endpoints(search_token, token_metadata(tm), true); + co_return resolve_endpoints(get(host_ids), *tm); +} + using strategy_class_registry = class_registry< locator::abstract_replication_strategy, const locator::replication_strategy_config_options&>; @@ -261,7 +278,7 @@ abstract_replication_strategy::get_ranges(inet_address ep, const token_metadata& // Using the common path would make the function quadratic in the number of endpoints. should_add = true; } else { - auto eps = co_await calculate_natural_endpoints(tok, tm); + auto eps = get(co_await calculate_natural_endpoints(tok, tm, false)); should_add = eps.contains(ep); } if (should_add) { @@ -326,7 +343,7 @@ abstract_replication_strategy::get_range_addresses(const token_metadata& tm) con std::unordered_map ret; for (auto& t : tm.sorted_tokens()) { dht::token_range_vector ranges = tm.get_primary_ranges_for(t); - auto eps = co_await calculate_natural_endpoints(t, tm); + auto eps = get(co_await calculate_natural_endpoints(t, tm, false)); for (auto& r : ranges) { ret.emplace(r, eps.get_vector()); } @@ -341,7 +358,7 @@ abstract_replication_strategy::get_pending_address_ranges(const token_metadata_p temp.update_topology(pending_address, std::move(dr)); co_await temp.update_normal_tokens(pending_tokens, pending_address); for (const auto& t : temp.sorted_tokens()) { - auto eps = co_await calculate_natural_endpoints(t, temp); + auto eps = get(co_await calculate_natural_endpoints(t, temp, false)); if (eps.contains(pending_address)) { dht::token_range_vector r = temp.get_primary_ranges_for(t); rslogger.debug("get_pending_address_ranges: token={} primary_range={} endpoint={}", t, r, pending_address); @@ -372,8 +389,8 @@ future calculate_effective_replicat const auto token = all_tokens[i]; - auto current_endpoints = co_await rs->calculate_natural_endpoints(token, base_token_metadata); - auto target_endpoints = co_await rs->calculate_natural_endpoints(token, *topology_changes->target_token_metadata); + auto current_endpoints = get(co_await rs->calculate_natural_endpoints(token, base_token_metadata, false)); + auto target_endpoints = get(co_await rs->calculate_natural_endpoints(token, *topology_changes->target_token_metadata, false)); auto add_mapping = [&](ring_mapping& target, std::unordered_set&& endpoints) { using interval = ring_mapping::interval_type; @@ -422,11 +439,11 @@ future calculate_effective_replicat } } else if (depend_on_token) { for (const auto &t : sorted_tokens) { - auto eps = co_await rs->calculate_natural_endpoints(t, *tmptr); + auto eps = get(co_await rs->calculate_natural_endpoints(t, *tmptr, false)); replication_map.emplace(t, std::move(eps).extract_vector()); } } else { - auto eps = co_await rs->calculate_natural_endpoints(default_replication_map_key, *tmptr); + auto eps = get(co_await rs->calculate_natural_endpoints(default_replication_map_key, *tmptr, false)); replication_map.emplace(default_replication_map_key, std::move(eps).extract_vector()); } diff --git a/locator/abstract_replication_strategy.hh b/locator/abstract_replication_strategy.hh index 5af70446a6..f296de7910 100644 --- a/locator/abstract_replication_strategy.hh +++ b/locator/abstract_replication_strategy.hh @@ -53,12 +53,19 @@ using replication_strategy_config_options = std::map; using replication_map = std::unordered_map; using endpoint_set = utils::basic_sequenced_set; +using host_id_set = utils::basic_sequenced_set; +using natural_ep_type = std::variant; +template +using set_type = std::conditional_t, endpoint_set, host_id_set>; +template +using vector_type = std::conditional_t, inet_address_vector_replica_set, host_id_vector_replica_set>; class vnode_effective_replication_map; class effective_replication_map_factory; class per_table_replication_strategy; class tablet_aware_replication_strategy; + class abstract_replication_strategy : public seastar::enable_shared_from_this { friend class vnode_effective_replication_map; friend class per_table_replication_strategy; @@ -85,6 +92,20 @@ protected: rslogger.debug(fmt, std::forward(args)...); } + template + static NodeId get_self_id(const generic_token_metadata& tm) { + if constexpr(std::is_same_v) { + return tm.get_topology().my_address(); + } else { + return NodeId{}; + } + } + + template + static future select_tm(Func&& func, const token_metadata& tm, bool use_host_id) { + return use_host_id ? func(*tm.template get_new()) : func(tm); + } + public: using ptr_type = seastar::shared_ptr; @@ -101,7 +122,8 @@ public: // is small, that implementation may not yield since by itself it won't cause a reactor stall (assuming practical // cluster sizes and number of tokens per node). The caller is responsible for yielding if they call this function // in a loop. - virtual future calculate_natural_endpoints(const token& search_token, const token_metadata& tm) const = 0; + virtual future calculate_natural_endpoints(const token& search_token, const token_metadata& tm, bool use_host_id) const = 0; + future calculate_natural_ips(const token& search_token, const token_metadata2_ptr& tm) const; virtual ~abstract_replication_strategy() {} static ptr_type create_replication_strategy(const sstring& strategy_name, const replication_strategy_config_options& config_options); diff --git a/locator/everywhere_replication_strategy.cc b/locator/everywhere_replication_strategy.cc index 3bf75f2021..1b88258d41 100644 --- a/locator/everywhere_replication_strategy.cc +++ b/locator/everywhere_replication_strategy.cc @@ -20,13 +20,15 @@ everywhere_replication_strategy::everywhere_replication_strategy(const replicati _natural_endpoints_depend_on_token = false; } -future everywhere_replication_strategy::calculate_natural_endpoints(const token& search_token, const token_metadata& tm) const { +future everywhere_replication_strategy::calculate_natural_endpoints(const token& search_token, const token_metadata& tm, bool use_host_id) const { + return select_tm([this](const generic_token_metadata& tm) -> future { if (tm.sorted_tokens().empty()) { - endpoint_set result{inet_address_vector_replica_set({tm.get_topology().my_address()})}; - return make_ready_future(std::move(result)); + set_type result{vector_type({this->get_self_id(tm)})}; + return make_ready_future(std::move(result)); } const auto& all_endpoints = tm.get_all_endpoints(); - return make_ready_future(endpoint_set(all_endpoints.begin(), all_endpoints.end())); + return make_ready_future(set_type(all_endpoints.begin(), all_endpoints.end())); + }, tm, use_host_id); } size_t everywhere_replication_strategy::get_replication_factor(const token_metadata& tm) const { diff --git a/locator/everywhere_replication_strategy.hh b/locator/everywhere_replication_strategy.hh index a3cd8ab134..ada8ea81ee 100644 --- a/locator/everywhere_replication_strategy.hh +++ b/locator/everywhere_replication_strategy.hh @@ -18,7 +18,7 @@ class everywhere_replication_strategy : public abstract_replication_strategy { public: everywhere_replication_strategy(const replication_strategy_config_options& config_options); - virtual future calculate_natural_endpoints(const token& search_token, const token_metadata& tm) const override; + virtual future calculate_natural_endpoints(const token& search_token, const token_metadata& tm, bool host_id) const override; virtual void validate_options(const gms::feature_service&) const override { /* noop */ } diff --git a/locator/local_strategy.cc b/locator/local_strategy.cc index 8f213489c4..34cc010aff 100644 --- a/locator/local_strategy.cc +++ b/locator/local_strategy.cc @@ -18,8 +18,10 @@ local_strategy::local_strategy(const replication_strategy_config_options& config _natural_endpoints_depend_on_token = false; } -future local_strategy::calculate_natural_endpoints(const token& t, const token_metadata& tm) const { - return make_ready_future(endpoint_set({tm.get_topology().my_address()})); +future local_strategy::calculate_natural_endpoints(const token& t, const token_metadata& tm, bool use_host_id) const { + return select_tm([this](const generic_token_metadata& tm) -> future { + return make_ready_future(set_type({this->get_self_id(tm)})); + }, tm, use_host_id); } void local_strategy::validate_options(const gms::feature_service&) const { diff --git a/locator/local_strategy.hh b/locator/local_strategy.hh index 60e58e1d4b..e87085e45d 100644 --- a/locator/local_strategy.hh +++ b/locator/local_strategy.hh @@ -27,7 +27,7 @@ public: virtual ~local_strategy() {}; virtual size_t get_replication_factor(const token_metadata&) const override; - virtual future calculate_natural_endpoints(const token& search_token, const token_metadata& tm) const override; + virtual future calculate_natural_endpoints(const token& search_token, const token_metadata& tm, bool host_id) const override; virtual void validate_options(const gms::feature_service&) const override; diff --git a/locator/network_topology_strategy.cc b/locator/network_topology_strategy.cc index e320fe96ca..7dbf89e866 100644 --- a/locator/network_topology_strategy.cc +++ b/locator/network_topology_strategy.cc @@ -76,13 +76,14 @@ network_topology_strategy::network_topology_strategy( using endpoint_dc_rack_set = std::unordered_set; +template class natural_endpoints_tracker { /** * Endpoint adder applying the replication rules for a given DC. */ struct data_center_endpoints { /** List accepted endpoints get pushed into. */ - endpoint_set& _endpoints; + set_type& _endpoints; /** * Racks encountered so far. Replicas are put into separate racks while possible. @@ -95,7 +96,7 @@ class natural_endpoints_tracker { size_t _rf_left; ssize_t _acceptable_rack_repeats; - data_center_endpoints(size_t rf, size_t rack_count, size_t node_count, endpoint_set& endpoints, endpoint_dc_rack_set& racks) + data_center_endpoints(size_t rf, size_t rack_count, size_t node_count, set_type& endpoints, endpoint_dc_rack_set& racks) : _endpoints(endpoints) , _racks(racks) // If there aren't enough nodes in this DC to fill the RF, the number of nodes is the effective RF. @@ -109,7 +110,7 @@ class natural_endpoints_tracker { * Attempts to add an endpoint to the replicas for this datacenter, adding to the endpoints set if successful. * Returns true if the endpoint was added, and this datacenter does not require further replicas. */ - bool add_endpoint_and_check_if_done(const inet_address& ep, const endpoint_dc_rack& location) { + bool add_endpoint_and_check_if_done(const NodeId& ep, const endpoint_dc_rack& location) { if (done()) { return false; } @@ -160,7 +161,7 @@ class natural_endpoints_tracker { } }; - const token_metadata& _tm; + const generic_token_metadata& _tm; const topology& _tp; std::unordered_map _dc_rep_factor; @@ -168,7 +169,7 @@ class natural_endpoints_tracker { // We want to preserve insertion order so that the first added endpoint // becomes primary. // - endpoint_set _replicas; + set_type _replicas; // tracks the racks we have already placed replicas in endpoint_dc_rack_set _seen_racks; @@ -189,7 +190,7 @@ class natural_endpoints_tracker { size_t _dcs_to_fill; public: - natural_endpoints_tracker(const token_metadata& tm, const std::unordered_map& dc_rep_factor) + natural_endpoints_tracker(const generic_token_metadata& tm, const std::unordered_map& dc_rep_factor) : _tm(tm) , _tp(_tm.get_topology()) , _dc_rep_factor(dc_rep_factor) @@ -219,7 +220,7 @@ public: } } - bool add_endpoint_and_check_if_done(inet_address ep) { + bool add_endpoint_and_check_if_done(NodeId ep) { auto& loc = _tp.get_location(ep); auto i = _dcs.find(loc.dc); if (i != _dcs.end() && i->second.add_endpoint_and_check_if_done(ep, loc)) { @@ -232,27 +233,29 @@ public: return _dcs_to_fill == 0; } - endpoint_set& replicas() noexcept { + set_type& replicas() noexcept { return _replicas; } }; -future +future network_topology_strategy::calculate_natural_endpoints( - const token& search_token, const token_metadata& tm) const { + const token& search_token, const token_metadata& tm, bool use_host_id) const { - natural_endpoints_tracker tracker(tm, _dc_rep_factor); + return select_tm([&](const generic_token_metadata& tm) -> future { + natural_endpoints_tracker tracker(tm, _dc_rep_factor); for (auto& next : tm.ring_range(search_token)) { co_await coroutine::maybe_yield(); - inet_address ep = *tm.get_endpoint(next); + NodeId ep = *tm.get_endpoint(next); if (tracker.add_endpoint_and_check_if_done(ep)) { break; } } co_return std::move(tracker.replicas()); + }, tm, use_host_id); } void network_topology_strategy::validate_options(const gms::feature_service& fs) const { @@ -306,7 +309,7 @@ future network_topology_strategy::allocate_tablets_for_new_table(sch auto token_range = tm->ring_range(dht::token::get_random_token()); for (tablet_id tb : tablets.tablet_ids()) { - natural_endpoints_tracker tracker(*tm, _dc_rep_factor); + natural_endpoints_tracker tracker(*tm, _dc_rep_factor); while (true) { co_await coroutine::maybe_yield(); diff --git a/locator/network_topology_strategy.hh b/locator/network_topology_strategy.hh index 3f704c98b8..57b8ce1ad9 100644 --- a/locator/network_topology_strategy.hh +++ b/locator/network_topology_strategy.hh @@ -50,8 +50,8 @@ protected: * calculate endpoints in one pass through the tokens by tracking our * progress in each DC, rack etc. */ - virtual future calculate_natural_endpoints( - const token& search_token, const token_metadata& tm) const override; + virtual future calculate_natural_endpoints( + const token& search_token, const token_metadata& tm, bool host_id) const override; virtual void validate_options(const gms::feature_service&) const override; diff --git a/locator/simple_strategy.cc b/locator/simple_strategy.cc index 81006dc800..af40382c28 100644 --- a/locator/simple_strategy.cc +++ b/locator/simple_strategy.cc @@ -33,15 +33,16 @@ simple_strategy::simple_strategy(const replication_strategy_config_options& conf } } -future simple_strategy::calculate_natural_endpoints(const token& t, const token_metadata& tm) const { +future simple_strategy::calculate_natural_endpoints(const token& t, const token_metadata& tm, bool use_host_id) const { + return select_tm([&](const generic_token_metadata& tm) -> future { const std::vector& tokens = tm.sorted_tokens(); if (tokens.empty()) { - co_return endpoint_set(); + co_return set_type{}; } size_t replicas = _replication_factor; - endpoint_set endpoints; + set_type endpoints; endpoints.reserve(replicas); for (auto& token : tm.ring_range(t)) { @@ -61,6 +62,7 @@ future simple_strategy::calculate_natural_endpoints(const token& t } co_return endpoints; + }, tm, use_host_id); } size_t simple_strategy::get_replication_factor(const token_metadata&) const { diff --git a/locator/simple_strategy.hh b/locator/simple_strategy.hh index a04e3e2ccd..427aa2a24b 100644 --- a/locator/simple_strategy.hh +++ b/locator/simple_strategy.hh @@ -26,7 +26,7 @@ public: return true; } - virtual future calculate_natural_endpoints(const token& search_token, const token_metadata& tm) const override; + virtual future calculate_natural_endpoints(const token& search_token, const token_metadata& tm, bool host_id) const override; private: size_t _replication_factor = 1; }; diff --git a/repair/repair.cc b/repair/repair.cc index 0363322339..92415b5f9e 100644 --- a/repair/repair.cc +++ b/repair/repair.cc @@ -1719,7 +1719,7 @@ future<> repair_service::do_decommission_removenode_with_repair(locator::token_m // Find (for each range) all nodes that store replicas for these ranges as well for (auto& r : ranges) { auto end_token = r.end() ? r.end()->value() : dht::maximum_token(); - auto eps = strat.calculate_natural_endpoints(end_token, *tmptr).get0(); + auto eps = get(strat.calculate_natural_endpoints(end_token, *tmptr, false).get0()); current_replica_endpoints.emplace(r, std::move(eps)); seastar::thread::maybe_yield(); } @@ -1738,7 +1738,7 @@ future<> repair_service::do_decommission_removenode_with_repair(locator::token_m ops->check_abort(); } auto end_token = r.end() ? r.end()->value() : dht::maximum_token(); - const auto new_eps = strat.calculate_natural_endpoints(end_token, temp).get0(); + const auto new_eps = get(strat.calculate_natural_endpoints(end_token, temp, false).get0()); const auto& current_eps = current_replica_endpoints[r]; std::unordered_set neighbors_set = new_eps.get_set(); bool skip_this_range = false; @@ -1929,7 +1929,7 @@ future<> repair_service::do_rebuild_replace_with_repair(locator::token_metadata_ auto& r = *it; seastar::thread::maybe_yield(); auto end_token = r.end() ? r.end()->value() : dht::maximum_token(); - auto neighbors = boost::copy_range>(strat.calculate_natural_endpoints(end_token, *tmptr).get0() | + auto neighbors = boost::copy_range>(get(strat.calculate_natural_endpoints(end_token, *tmptr, false).get0()) | boost::adaptors::filtered([myip, &source_dc, &topology, &ignore_nodes] (const gms::inet_address& node) { if (node == myip) { return false; diff --git a/service/storage_service.cc b/service/storage_service.cc index c31e77324b..a7afc1069e 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -5960,7 +5960,7 @@ storage_service::get_changed_ranges_for_leaving(locator::vnode_effective_replica const auto& rs = erm->get_replication_strategy(); for (auto& r : ranges) { auto end_token = r.end() ? r.end()->value() : dht::maximum_token(); - auto new_replica_endpoints = co_await rs.calculate_natural_endpoints(end_token, temp); + auto new_replica_endpoints = get(co_await rs.calculate_natural_endpoints(end_token, temp, false)); auto rg = current_replica_endpoints.equal_range(r); for (auto it = rg.first; it != rg.second; it++) { diff --git a/test/boost/network_topology_strategy_test.cc b/test/boost/network_topology_strategy_test.cc index 4f8368d308..c37016ddc5 100644 --- a/test/boost/network_topology_strategy_test.cc +++ b/test/boost/network_topology_strategy_test.cc @@ -671,7 +671,7 @@ static void test_equivalence(const shared_token_metadata& stm, const locator::to for (size_t i = 0; i < 1000; ++i) { auto token = dht::token::get_random_token(); auto expected = calculate_natural_endpoints(token, tm, topo, datacenters); - auto actual = nts.calculate_natural_endpoints(token, tm).get0(); + auto actual = get(nts.calculate_natural_endpoints(token, tm, false).get0()); // Because the old algorithm does not put the nodes in the correct order in the case where more replicas // are required than there are racks in a dc, we accept different order as long as the primary From fe3c543c4e76d98b4b88fbe476822e4770f4e493 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Tue, 24 Oct 2023 12:16:54 +0400 Subject: [PATCH 28/51] calculate_natural_endpoints: fix formatting --- locator/everywhere_replication_strategy.cc | 10 ++--- locator/network_topology_strategy.cc | 16 ++++---- locator/simple_strategy.cc | 44 +++++++++++----------- 3 files changed, 35 insertions(+), 35 deletions(-) diff --git a/locator/everywhere_replication_strategy.cc b/locator/everywhere_replication_strategy.cc index 1b88258d41..1105ff3b2c 100644 --- a/locator/everywhere_replication_strategy.cc +++ b/locator/everywhere_replication_strategy.cc @@ -22,12 +22,12 @@ everywhere_replication_strategy::everywhere_replication_strategy(const replicati future everywhere_replication_strategy::calculate_natural_endpoints(const token& search_token, const token_metadata& tm, bool use_host_id) const { return select_tm([this](const generic_token_metadata& tm) -> future { - if (tm.sorted_tokens().empty()) { + if (tm.sorted_tokens().empty()) { set_type result{vector_type({this->get_self_id(tm)})}; - return make_ready_future(std::move(result)); - } - const auto& all_endpoints = tm.get_all_endpoints(); - return make_ready_future(set_type(all_endpoints.begin(), all_endpoints.end())); + return make_ready_future(std::move(result)); + } + const auto& all_endpoints = tm.get_all_endpoints(); + return make_ready_future(set_type(all_endpoints.begin(), all_endpoints.end())); }, tm, use_host_id); } diff --git a/locator/network_topology_strategy.cc b/locator/network_topology_strategy.cc index 7dbf89e866..b2f7cc31ac 100644 --- a/locator/network_topology_strategy.cc +++ b/locator/network_topology_strategy.cc @@ -243,18 +243,18 @@ network_topology_strategy::calculate_natural_endpoints( const token& search_token, const token_metadata& tm, bool use_host_id) const { return select_tm([&](const generic_token_metadata& tm) -> future { - natural_endpoints_tracker tracker(tm, _dc_rep_factor); + natural_endpoints_tracker tracker(tm, _dc_rep_factor); - for (auto& next : tm.ring_range(search_token)) { - co_await coroutine::maybe_yield(); + for (auto& next : tm.ring_range(search_token)) { + co_await coroutine::maybe_yield(); - NodeId ep = *tm.get_endpoint(next); - if (tracker.add_endpoint_and_check_if_done(ep)) { - break; + NodeId ep = *tm.get_endpoint(next); + if (tracker.add_endpoint_and_check_if_done(ep)) { + break; + } } - } - co_return std::move(tracker.replicas()); + co_return std::move(tracker.replicas()); }, tm, use_host_id); } diff --git a/locator/simple_strategy.cc b/locator/simple_strategy.cc index af40382c28..dfbc0d7dad 100644 --- a/locator/simple_strategy.cc +++ b/locator/simple_strategy.cc @@ -35,33 +35,33 @@ simple_strategy::simple_strategy(const replication_strategy_config_options& conf future simple_strategy::calculate_natural_endpoints(const token& t, const token_metadata& tm, bool use_host_id) const { return select_tm([&](const generic_token_metadata& tm) -> future { - const std::vector& tokens = tm.sorted_tokens(); + const std::vector& tokens = tm.sorted_tokens(); - if (tokens.empty()) { - co_return set_type{}; - } - - size_t replicas = _replication_factor; - set_type endpoints; - endpoints.reserve(replicas); - - for (auto& token : tm.ring_range(t)) { - // If the number of nodes in the cluster is smaller than the desired - // replication factor we should return the loop when endpoints already - // contains all the nodes in the cluster because no more nodes could be - // added to endpoints lists. - if (endpoints.size() == replicas || endpoints.size() == tm.count_normal_token_owners()) { - break; + if (tokens.empty()) { + co_return set_type{}; } - auto ep = tm.get_endpoint(token); - assert(ep); + size_t replicas = _replication_factor; + set_type endpoints; + endpoints.reserve(replicas); - endpoints.push_back(*ep); - co_await coroutine::maybe_yield(); - } + for (auto& token : tm.ring_range(t)) { + // If the number of nodes in the cluster is smaller than the desired + // replication factor we should return the loop when endpoints already + // contains all the nodes in the cluster because no more nodes could be + // added to endpoints lists. + if (endpoints.size() == replicas || endpoints.size() == tm.count_normal_token_owners()) { + break; + } - co_return endpoints; + auto ep = tm.get_endpoint(token); + assert(ep); + + endpoints.push_back(*ep); + co_await coroutine::maybe_yield(); + } + + co_return endpoints; }, tm, use_host_id); } From f5038f6c72e7af12830a0cc5627802852bfbccec Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Sat, 4 Nov 2023 20:17:40 +0400 Subject: [PATCH 29/51] calculate_effective_replication_map: use new token_metadata In this commit we switch the function calculate_effective_replication_map to use the new token_metadata. We do this by employing our new helper calculate_natural_ips function. We can't use this helper for current_endpoints/target_endpoints though, since in that case we won't add the IP to the pending_endpoints in the replace-with-same-ip scenario The token_metadata_test is migrated to host_ids in the same commit to make it pass. Other tests work because they fill both versions of the token_metadata, but for this test it was simpler to just migrate it straight away. The test constructs the old token_metadata over the new token_metadata, this means only the get_new() method will work on it. That's why we also need to switch some other functions (maybe_remove_node_being_replaced, do_get_natural_endpoints, get_replication_factor) to the new version in the same commit. All the boost and topology tests pass with this change. --- locator/abstract_replication_strategy.cc | 36 ++--- locator/everywhere_replication_strategy.cc | 2 +- test/boost/token_metadata_test.cc | 157 +++++++++++++-------- 3 files changed, 122 insertions(+), 73 deletions(-) diff --git a/locator/abstract_replication_strategy.cc b/locator/abstract_replication_strategy.cc index 123a0b7a23..f66e10d8bc 100644 --- a/locator/abstract_replication_strategy.cc +++ b/locator/abstract_replication_strategy.cc @@ -90,7 +90,7 @@ inet_address_vector_replica_set vnode_effective_replication_map::get_natural_end void maybe_remove_node_being_replaced(const token_metadata& tm, const abstract_replication_strategy& rs, inet_address_vector_replica_set& natural_endpoints) { - if (tm.is_any_node_being_replaced() && + if (tm.get_new()->is_any_node_being_replaced() && rs.allow_remove_node_being_replaced_from_natural_endpoints()) { // When a new node is started to replace an existing dead node, we want // to make the replacing node take writes but do not count it for @@ -104,7 +104,8 @@ void maybe_remove_node_being_replaced(const token_metadata& tm, // as the natural_endpoints and the node will not appear in the // pending_endpoints. auto it = boost::range::remove_if(natural_endpoints, [&] (gms::inet_address& p) { - return tm.is_being_replaced(p); + const auto host_id = tm.get_new()->get_host_id(p); + return tm.get_new()->is_being_replaced(host_id); }); natural_endpoints.erase(it, natural_endpoints.end()); } @@ -376,21 +377,22 @@ future calculate_effective_replicat ring_mapping pending_endpoints; ring_mapping read_endpoints; const auto depend_on_token = rs->natural_endpoints_depend_on_token(); - const auto& sorted_tokens = tmptr->sorted_tokens(); + auto tmpr_new = tmptr->get_new_strong(); + const auto& sorted_tokens = tmpr_new->sorted_tokens(); replication_map.reserve(depend_on_token ? sorted_tokens.size() : 1); - if (const auto& topology_changes = tmptr->get_topology_change_info(); topology_changes) { + if (const auto& topology_changes = tmpr_new->get_topology_change_info(); topology_changes) { const auto& all_tokens = topology_changes->all_tokens; const auto& base_token_metadata = topology_changes->base_token_metadata - ? *topology_changes->base_token_metadata - : *tmptr; - const auto& current_tokens = tmptr->get_token_to_endpoint(); + ? topology_changes->base_token_metadata + : tmpr_new; + const auto& current_tokens = tmpr_new->get_token_to_endpoint(); for (size_t i = 0, size = all_tokens.size(); i < size; ++i) { co_await coroutine::maybe_yield(); const auto token = all_tokens[i]; - auto current_endpoints = get(co_await rs->calculate_natural_endpoints(token, base_token_metadata, false)); - auto target_endpoints = get(co_await rs->calculate_natural_endpoints(token, *topology_changes->target_token_metadata, false)); + auto current_endpoints = co_await rs->calculate_natural_endpoints(token, *base_token_metadata); + auto target_endpoints = co_await rs->calculate_natural_endpoints(token, *topology_changes->target_token_metadata); auto add_mapping = [&](ring_mapping& target, std::unordered_set&& endpoints) { using interval = ring_mapping::interval_type; @@ -413,37 +415,37 @@ future calculate_effective_replicat }; { - std::unordered_set endpoints_diff; + host_id_set endpoints_diff; for (const auto& e: target_endpoints) { if (!current_endpoints.contains(e)) { endpoints_diff.insert(e); } } if (!endpoints_diff.empty()) { - add_mapping(pending_endpoints, std::move(endpoints_diff)); + add_mapping(pending_endpoints, resolve_endpoints(endpoints_diff, *base_token_metadata).extract_set()); } } // in order not to waste memory, we update read_endpoints only if the // new endpoints differs from the old one if (topology_changes->read_new && target_endpoints.get_vector() != current_endpoints.get_vector()) { - add_mapping(read_endpoints, std::move(target_endpoints).extract_set()); + add_mapping(read_endpoints, resolve_endpoints(target_endpoints, *base_token_metadata).extract_set()); } if (!depend_on_token) { - replication_map.emplace(default_replication_map_key, std::move(current_endpoints).extract_vector()); + replication_map.emplace(default_replication_map_key, resolve_endpoints(current_endpoints, *base_token_metadata).extract_vector()); break; } else if (current_tokens.contains(token)) { - replication_map.emplace(token, std::move(current_endpoints).extract_vector()); + replication_map.emplace(token, resolve_endpoints(current_endpoints, *base_token_metadata).extract_vector()); } } } else if (depend_on_token) { for (const auto &t : sorted_tokens) { - auto eps = get(co_await rs->calculate_natural_endpoints(t, *tmptr, false)); + auto eps = co_await rs->calculate_natural_ips(t, tmpr_new); replication_map.emplace(t, std::move(eps).extract_vector()); } } else { - auto eps = get(co_await rs->calculate_natural_endpoints(default_replication_map_key, *tmptr, false)); + auto eps = co_await rs->calculate_natural_ips(default_replication_map_key, tmpr_new); replication_map.emplace(default_replication_map_key, std::move(eps).extract_vector()); } @@ -476,7 +478,7 @@ const inet_address_vector_replica_set& vnode_effective_replication_map::do_get_n bool is_vnode) const { const token& key_token = _rs->natural_endpoints_depend_on_token() - ? (is_vnode ? tok : _tmptr->first_token(tok)) + ? (is_vnode ? tok : _tmptr->get_new()->first_token(tok)) : default_replication_map_key; const auto it = _replication_map.find(key_token); return it->second; diff --git a/locator/everywhere_replication_strategy.cc b/locator/everywhere_replication_strategy.cc index 1105ff3b2c..87851df8d5 100644 --- a/locator/everywhere_replication_strategy.cc +++ b/locator/everywhere_replication_strategy.cc @@ -32,7 +32,7 @@ future everywhere_replication_strategy::calculate_natural_endpo } size_t everywhere_replication_strategy::get_replication_factor(const token_metadata& tm) const { - return tm.sorted_tokens().empty() ? 1 : tm.count_normal_token_owners(); + return tm.get_new()->sorted_tokens().empty() ? 1 : tm.get_new()->count_normal_token_owners(); } using registry = class_registrator; diff --git a/test/boost/token_metadata_test.cc b/test/boost/token_metadata_test.cc index e4251bf248..8aed089f80 100644 --- a/test/boost/token_metadata_test.cc +++ b/test/boost/token_metadata_test.cc @@ -17,43 +17,50 @@ using namespace locator; namespace { const auto ks_name = sstring("test-ks"); - endpoint_dc_rack get_dc_rack(gms::inet_address) { + host_id gen_id(int id) { + return host_id{utils::UUID(0, id)}; + } + + endpoint_dc_rack get_dc_rack(host_id) { return { .dc = "unk-dc", .rack = "unk-rack" }; } - mutable_token_metadata_ptr create_token_metadata(inet_address this_endpoint) { - return make_lw_shared(token_metadata::config { + mutable_token_metadata2_ptr create_token_metadata(host_id this_host_id) { + return make_lw_shared(token_metadata::config { topology::config { - .this_endpoint = this_endpoint, - .this_cql_address = this_endpoint, - .local_dc_rack = get_dc_rack(this_endpoint) + .this_host_id = this_host_id, + .local_dc_rack = get_dc_rack(this_host_id) } }); } template - mutable_vnode_erm_ptr create_erm(mutable_token_metadata_ptr tmptr, replication_strategy_config_options opts = {}) { - dc_rack_fn get_dc_rack_fn = get_dc_rack; + mutable_vnode_erm_ptr create_erm(mutable_token_metadata2_ptr tmptr, replication_strategy_config_options opts = {}) { + dc_rack_fn get_dc_rack_fn = get_dc_rack; tmptr->update_topology_change_info(get_dc_rack_fn).get(); auto strategy = seastar::make_shared(std::move(opts)); - return calculate_effective_replication_map(std::move(strategy), std::move(tmptr)).get0(); + return calculate_effective_replication_map(std::move(strategy), make_token_metadata_ptr(tmptr)).get0(); } } SEASTAR_THREAD_TEST_CASE(test_pending_and_read_endpoints_for_everywhere_strategy) { const auto e1 = inet_address("192.168.0.1"); const auto e2 = inet_address("192.168.0.2"); + const auto e1_id = gen_id(1); + const auto e2_id = gen_id(2); const auto t1 = dht::token::from_int64(10); const auto t2 = dht::token::from_int64(20); - auto token_metadata = create_token_metadata(e1); - token_metadata->update_topology(e1, get_dc_rack(e1)); - token_metadata->update_topology(e2, get_dc_rack(e2)); - token_metadata->update_normal_tokens({t1}, e1).get(); - token_metadata->add_bootstrap_token(t2, e2); + auto token_metadata = create_token_metadata(e1_id); + token_metadata->update_host_id(e1_id, e1); + token_metadata->update_host_id(e2_id, e2); + token_metadata->update_topology(e1_id, get_dc_rack(e1_id)); + token_metadata->update_topology(e2_id, get_dc_rack(e2_id)); + token_metadata->update_normal_tokens({t1}, e1_id).get(); + token_metadata->add_bootstrap_token(t2, e2_id); token_metadata->set_read_new(token_metadata::read_new_t::yes); auto erm = create_erm(token_metadata); @@ -68,12 +75,16 @@ SEASTAR_THREAD_TEST_CASE(test_pending_endpoints_for_bootstrap_second_node) { const auto t1 = dht::token::from_int64(1); const auto e2 = inet_address("192.168.0.2"); const auto t2 = dht::token::from_int64(100); + const auto e1_id = gen_id(1); + const auto e2_id = gen_id(2); - auto token_metadata = create_token_metadata(e1); - token_metadata->update_topology(e1, get_dc_rack(e1)); - token_metadata->update_topology(e2, get_dc_rack(e2)); - token_metadata->update_normal_tokens({t1}, e1).get(); - token_metadata->add_bootstrap_token(t2, e2); + auto token_metadata = create_token_metadata(e1_id); + token_metadata->update_host_id(e1_id, e1); + token_metadata->update_host_id(e2_id, e2); + token_metadata->update_topology(e1_id, get_dc_rack(e1_id)); + token_metadata->update_topology(e2_id, get_dc_rack(e2_id)); + token_metadata->update_normal_tokens({t1}, e1_id).get(); + token_metadata->add_bootstrap_token(t2, e2_id); auto erm = create_erm(token_metadata, {{"replication_factor", "1"}}); BOOST_REQUIRE_EQUAL(erm->get_pending_endpoints(dht::token::from_int64(0)), @@ -96,14 +107,20 @@ SEASTAR_THREAD_TEST_CASE(test_pending_endpoints_for_bootstrap_with_replicas) { const auto e1 = inet_address("192.168.0.1"); const auto e2 = inet_address("192.168.0.2"); const auto e3 = inet_address("192.168.0.3"); + const auto e1_id = gen_id(1); + const auto e2_id = gen_id(2); + const auto e3_id = gen_id(3); - auto token_metadata = create_token_metadata(e1); - token_metadata->update_topology(e1, get_dc_rack(e1)); - token_metadata->update_topology(e2, get_dc_rack(e2)); - token_metadata->update_topology(e3, get_dc_rack(e3)); - token_metadata->update_normal_tokens({t1, t1000}, e2).get(); - token_metadata->update_normal_tokens({t10}, e3).get(); - token_metadata->add_bootstrap_token(t100, e1); + auto token_metadata = create_token_metadata(e1_id); + token_metadata->update_host_id(e1_id, e1); + token_metadata->update_host_id(e2_id, e2); + token_metadata->update_host_id(e3_id, e3); + token_metadata->update_topology(e1_id, get_dc_rack(e1_id)); + token_metadata->update_topology(e2_id, get_dc_rack(e2_id)); + token_metadata->update_topology(e3_id, get_dc_rack(e3_id)); + token_metadata->update_normal_tokens({t1, t1000}, e2_id).get(); + token_metadata->update_normal_tokens({t10}, e3_id).get(); + token_metadata->add_bootstrap_token(t100, e1_id); auto erm = create_erm(token_metadata, {{"replication_factor", "2"}}); BOOST_REQUIRE_EQUAL(erm->get_pending_endpoints(dht::token::from_int64(1)), @@ -126,15 +143,21 @@ SEASTAR_THREAD_TEST_CASE(test_pending_endpoints_for_leave_with_replicas) { const auto e1 = inet_address("192.168.0.1"); const auto e2 = inet_address("192.168.0.2"); const auto e3 = inet_address("192.168.0.3"); + const auto e1_id = gen_id(1); + const auto e2_id = gen_id(2); + const auto e3_id = gen_id(3); - auto token_metadata = create_token_metadata(e1); - token_metadata->update_topology(e1, get_dc_rack(e1)); - token_metadata->update_topology(e2, get_dc_rack(e2)); - token_metadata->update_topology(e3, get_dc_rack(e3)); - token_metadata->update_normal_tokens({t1, t1000}, e2).get(); - token_metadata->update_normal_tokens({t10}, e3).get(); - token_metadata->update_normal_tokens({t100}, e1).get(); - token_metadata->add_leaving_endpoint(e1); + auto token_metadata = create_token_metadata(e1_id); + token_metadata->update_host_id(e1_id, e1); + token_metadata->update_host_id(e2_id, e2); + token_metadata->update_host_id(e3_id, e3); + token_metadata->update_topology(e1_id, get_dc_rack(e1_id)); + token_metadata->update_topology(e2_id, get_dc_rack(e2_id)); + token_metadata->update_topology(e3_id, get_dc_rack(e3_id)); + token_metadata->update_normal_tokens({t1, t1000}, e2_id).get(); + token_metadata->update_normal_tokens({t10}, e3_id).get(); + token_metadata->update_normal_tokens({t100}, e1_id).get(); + token_metadata->add_leaving_endpoint(e1_id); auto erm = create_erm(token_metadata, {{"replication_factor", "2"}}); BOOST_REQUIRE_EQUAL(erm->get_pending_endpoints(dht::token::from_int64(1)), @@ -158,16 +181,24 @@ SEASTAR_THREAD_TEST_CASE(test_pending_endpoints_for_replace_with_replicas) { const auto e2 = inet_address("192.168.0.2"); const auto e3 = inet_address("192.168.0.3"); const auto e4 = inet_address("192.168.0.4"); + const auto e1_id = gen_id(1); + const auto e2_id = gen_id(2); + const auto e3_id = gen_id(3); + const auto e4_id = gen_id(4); - auto token_metadata = create_token_metadata(e1); - token_metadata->update_topology(e1, get_dc_rack(e1)); - token_metadata->update_topology(e2, get_dc_rack(e2)); - token_metadata->update_topology(e3, get_dc_rack(e3)); - token_metadata->update_topology(e4, get_dc_rack(e4)); - token_metadata->update_normal_tokens({t1000}, e1).get(); - token_metadata->update_normal_tokens({t1, t100}, e2).get(); - token_metadata->update_normal_tokens({t10}, e3).get(); - token_metadata->add_replacing_endpoint(e3, e4); + auto token_metadata = create_token_metadata(e1_id); + token_metadata->update_host_id(e1_id, e1); + token_metadata->update_host_id(e2_id, e2); + token_metadata->update_host_id(e3_id, e3); + token_metadata->update_host_id(e4_id, e4); + token_metadata->update_topology(e1_id, get_dc_rack(e1_id)); + token_metadata->update_topology(e2_id, get_dc_rack(e2_id)); + token_metadata->update_topology(e3_id, get_dc_rack(e3_id)); + token_metadata->update_topology(e4_id, get_dc_rack(e4_id)); + token_metadata->update_normal_tokens({t1000}, e1_id).get(); + token_metadata->update_normal_tokens({t1, t100}, e2_id).get(); + token_metadata->update_normal_tokens({t10}, e3_id).get(); + token_metadata->add_replacing_endpoint(e3_id, e4_id); auto erm = create_erm(token_metadata, {{"replication_factor", "2"}}); BOOST_REQUIRE_EQUAL(erm->get_pending_endpoints(dht::token::from_int64(100)), @@ -194,14 +225,20 @@ SEASTAR_THREAD_TEST_CASE(test_endpoints_for_reading_when_bootstrap_with_replicas const auto e1 = inet_address("192.168.0.1"); const auto e2 = inet_address("192.168.0.2"); const auto e3 = inet_address("192.168.0.3"); + const auto e1_id = gen_id(1); + const auto e2_id = gen_id(2); + const auto e3_id = gen_id(3); - auto token_metadata = create_token_metadata(e1); - token_metadata->update_topology(e1, get_dc_rack(e1)); - token_metadata->update_topology(e2, get_dc_rack(e2)); - token_metadata->update_topology(e3, get_dc_rack(e3)); - token_metadata->update_normal_tokens({t1, t1000}, e2).get(); - token_metadata->update_normal_tokens({t10}, e3).get(); - token_metadata->add_bootstrap_token(t100, e1); + auto token_metadata = create_token_metadata(e1_id); + token_metadata->update_host_id(e1_id, e1); + token_metadata->update_host_id(e2_id, e2); + token_metadata->update_host_id(e3_id, e3); + token_metadata->update_topology(e1_id, get_dc_rack(e1_id)); + token_metadata->update_topology(e2_id, get_dc_rack(e2_id)); + token_metadata->update_topology(e3_id, get_dc_rack(e3_id)); + token_metadata->update_normal_tokens({t1, t1000}, e2_id).get(); + token_metadata->update_normal_tokens({t10}, e3_id).get(); + token_metadata->add_bootstrap_token(t100, e1_id); auto check_endpoints = [](mutable_vnode_erm_ptr erm, int64_t t, inet_address_vector_replica_set expected_replicas, @@ -246,14 +283,24 @@ SEASTAR_THREAD_TEST_CASE(test_endpoints_for_reading_when_bootstrap_with_replicas SEASTAR_THREAD_TEST_CASE(test_replace_node_with_same_endpoint) { const auto t1 = dht::token::from_int64(1); const auto e1 = inet_address("192.168.0.1"); + const auto e1_id1 = gen_id(1); + const auto e1_id2 = gen_id(2); - auto token_metadata = create_token_metadata(e1); - token_metadata->update_topology(e1, get_dc_rack(e1)); - token_metadata->update_normal_tokens({t1}, e1).get(); - token_metadata->add_replacing_endpoint(e1, e1); + auto token_metadata = create_token_metadata(e1_id2); + token_metadata->update_host_id(e1_id1, e1); + token_metadata->update_topology(e1_id1, get_dc_rack(e1_id1), node::state::being_replaced); + token_metadata->update_normal_tokens({t1}, e1_id1).get(); + + token_metadata->update_topology(e1_id2, get_dc_rack(e1_id2), node::state::replacing); + token_metadata->update_host_id(e1_id2, e1); + + token_metadata->add_replacing_endpoint(e1_id1, e1_id2); auto erm = create_erm(token_metadata, {{"replication_factor", "2"}}); + BOOST_REQUIRE_EQUAL(token_metadata->get_host_id(e1), e1_id1); BOOST_REQUIRE_EQUAL(erm->get_pending_endpoints(dht::token::from_int64(1)), inet_address_vector_topology_change{e1}); - BOOST_REQUIRE_EQUAL(token_metadata->get_endpoint(t1), e1); + BOOST_REQUIRE_EQUAL(erm->get_natural_endpoints_without_node_being_replaced(dht::token::from_int64(1)), + inet_address_vector_replica_set{}); + BOOST_REQUIRE_EQUAL(token_metadata->get_endpoint(t1), e1_id1); } From d9283bd025b4a4e525137fa8f077ff7517ba0173 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Thu, 2 Nov 2023 13:15:32 +0400 Subject: [PATCH 30/51] tablets: switch to token_metadata2 locator_topology_test, network_topology_strategy_test and tablets_test are fully switched to the host_id-based token_metadata, meaning they no longer populate the old token_metadata. All the boost and topology tests pass with this change. --- locator/abstract_replication_strategy.cc | 4 +- locator/load_sketch.hh | 4 +- locator/network_topology_strategy.cc | 15 +- locator/tablet_metadata_guard.hh | 2 +- locator/tablet_sharder.hh | 4 +- locator/tablets.cc | 20 +- replica/table.cc | 2 +- service/storage_service.cc | 8 +- service/tablet_allocator.cc | 10 +- service/tablet_allocator.hh | 2 +- test/boost/locator_topology_test.cc | 19 +- test/boost/network_topology_strategy_test.cc | 190 +++++++++---------- test/boost/tablets_test.cc | 185 +++++++++--------- 13 files changed, 225 insertions(+), 240 deletions(-) diff --git a/locator/abstract_replication_strategy.cc b/locator/abstract_replication_strategy.cc index f66e10d8bc..f06a4924e4 100644 --- a/locator/abstract_replication_strategy.cc +++ b/locator/abstract_replication_strategy.cc @@ -221,7 +221,7 @@ insert_token_range_to_sorted_container_while_unwrapping( dht::token_range_vector vnode_effective_replication_map::do_get_ranges(noncopyable_function consider_range_for_endpoint) const { dht::token_range_vector ret; - const auto& tm = *_tmptr; + const auto& tm = *_tmptr->get_new(); const auto& sorted_tokens = tm.sorted_tokens(); if (sorted_tokens.empty()) { on_internal_error(rslogger, "Token metadata is empty"); @@ -305,7 +305,7 @@ vnode_effective_replication_map::get_primary_ranges(inet_address ep) const { dht::token_range_vector vnode_effective_replication_map::get_primary_ranges_within_dc(inet_address ep) const { - const topology& topo = _tmptr->get_topology(); + const topology& topo = _tmptr->get_new()->get_topology(); sstring local_dc = topo.get_datacenter(ep); std::unordered_set local_dc_nodes = topo.get_datacenter_endpoints().at(local_dc); // The callback function below is called for each endpoint diff --git a/locator/load_sketch.hh b/locator/load_sketch.hh index df4ac4bbfe..fb663ad5aa 100644 --- a/locator/load_sketch.hh +++ b/locator/load_sketch.hh @@ -56,7 +56,7 @@ class load_sketch { } }; std::unordered_map _nodes; - token_metadata_ptr _tm; + token_metadata2_ptr _tm; private: tablet_replica_set get_replicas_for_tablet_load(const tablet_info& ti, const tablet_transition_info* trinfo) const { // We reflect migrations in the load as if they already happened, @@ -65,7 +65,7 @@ private: } public: - load_sketch(token_metadata_ptr tm) + load_sketch(token_metadata2_ptr tm) : _tm(std::move(tm)) { } diff --git a/locator/network_topology_strategy.cc b/locator/network_topology_strategy.cc index b2f7cc31ac..4a18963820 100644 --- a/locator/network_topology_strategy.cc +++ b/locator/network_topology_strategy.cc @@ -300,23 +300,23 @@ future network_topology_strategy::allocate_tablets_for_new_table(sch } tablet_map tablets(tablet_count); - load_sketch load(tm); + load_sketch load(tm->get_new_strong()); co_await load.populate(); // FIXME: Don't use tokens to distribute nodes. // The following reuses the existing token-based algorithm used by NetworkTopologyStrategy. - assert(!tm->sorted_tokens().empty()); - auto token_range = tm->ring_range(dht::token::get_random_token()); + assert(!tm->get_new()->sorted_tokens().empty()); + auto token_range = tm->get_new()->ring_range(dht::token::get_random_token()); for (tablet_id tb : tablets.tablet_ids()) { - natural_endpoints_tracker tracker(*tm, _dc_rep_factor); + natural_endpoints_tracker tracker(*tm->get_new(), _dc_rep_factor); while (true) { co_await coroutine::maybe_yield(); if (token_range.begin() == token_range.end()) { - token_range = tm->ring_range(dht::minimum_token()); + token_range = tm->get_new()->ring_range(dht::minimum_token()); } - inet_address ep = *tm->get_endpoint(*token_range.begin()); + locator::host_id ep = *tm->get_new()->get_endpoint(*token_range.begin()); token_range.drop_front(); if (tracker.add_endpoint_and_check_if_done(ep)) { break; @@ -325,8 +325,7 @@ future network_topology_strategy::allocate_tablets_for_new_table(sch tablet_replica_set replicas; for (auto&& ep : tracker.replicas()) { - auto host = tm->get_host_id(ep); - replicas.emplace_back(tablet_replica{host, load.next_shard(host)}); + replicas.emplace_back(tablet_replica{ep, load.next_shard(ep)}); } tablets.set_tablet(tb, tablet_info{std::move(replicas)}); diff --git a/locator/tablet_metadata_guard.hh b/locator/tablet_metadata_guard.hh index 0731ea8faa..127a0ce137 100644 --- a/locator/tablet_metadata_guard.hh +++ b/locator/tablet_metadata_guard.hh @@ -52,7 +52,7 @@ public: /// Returns tablet_map for the table of the tablet associated with this guard. /// The result is valid until the next deferring point. const locator::tablet_map& get_tablet_map() { - return get_token_metadata()->tablets().get_tablet_map(_tablet.table); + return get_token_metadata()->get_new()->tablets().get_tablet_map(_tablet.table); } }; diff --git a/locator/tablet_sharder.hh b/locator/tablet_sharder.hh index b133d272db..74a5c1fdd5 100644 --- a/locator/tablet_sharder.hh +++ b/locator/tablet_sharder.hh @@ -17,7 +17,7 @@ namespace locator { /// Implements sharder object which reflects assignment of tablets of a given table to local shards. /// Token ranges which don't have local tablets are reported to belong to shard 0. class tablet_sharder : public dht::sharder { - const token_metadata& _tm; + const token_metadata2& _tm; table_id _table; mutable const tablet_map* _tmap = nullptr; private: @@ -29,7 +29,7 @@ private: } } public: - tablet_sharder(const token_metadata& tm, table_id table) + tablet_sharder(const token_metadata2& tm, table_id table) : _tm(tm) , _table(table) { } diff --git a/locator/tablets.cc b/locator/tablets.cc index 3b731324b0..38a55852fa 100644 --- a/locator/tablets.cc +++ b/locator/tablets.cc @@ -115,7 +115,7 @@ const tablet_map& tablet_metadata::get_tablet_map(table_id id) const { try { return _tablets.at(id); } catch (const std::out_of_range&) { - throw std::runtime_error(format("Tablet map not found for table {}", id)); + throw_with_backtrace(format("Tablet map not found for table {}", id)); } } @@ -338,12 +338,12 @@ private: inet_address_vector_replica_set result; result.reserve(replicas.size()); for (auto&& replica : replicas) { - result.emplace_back(_tmptr->get_endpoint_for_host_id(replica.host)); + result.emplace_back(_tmptr->get_new()->get_endpoint_for_host_id(replica.host)); } return result; } const tablet_map& get_tablet_map() const { - return _tmptr->tablets().get_tablet_map(_table); + return _tmptr->get_new()->tablets().get_tablet_map(_table); } public: tablet_effective_replication_map(table_id table, @@ -352,7 +352,7 @@ public: size_t replication_factor) : effective_replication_map(std::move(rs), std::move(tmptr), replication_factor) , _table(table) - , _sharder(*_tmptr, table) + , _sharder(*_tmptr->get_new(), table) { } virtual ~tablet_effective_replication_map() = default; @@ -399,7 +399,7 @@ public: case write_replica_set_selector::both: tablet_logger.trace("get_pending_endpoints({}): table={}, tablet={}, replica={}", search_token, _table, tablet, info->pending_replica); - return {_tmptr->get_endpoint_for_host_id(info->pending_replica.host)}; + return {_tmptr->get_new()->get_endpoint_for_host_id(info->pending_replica.host)}; case write_replica_set_selector::next: return {}; } @@ -466,7 +466,7 @@ public: } virtual bool has_pending_ranges(inet_address endpoint) const override { - const auto host_id = _tmptr->get_host_id_if_known(endpoint); + const auto host_id = _tmptr->get_new()->get_host_id_if_known(endpoint); if (!host_id.has_value()) { return false; } @@ -480,11 +480,11 @@ public: virtual std::unique_ptr make_splitter() const override { class splitter : public token_range_splitter { - token_metadata_ptr _tmptr; // To keep the tablet map alive. + token_metadata2_ptr _tmptr; // To keep the tablet map alive. const tablet_map& _tmap; std::optional _next; public: - splitter(token_metadata_ptr tmptr, const tablet_map& tmap) + splitter(token_metadata2_ptr tmptr, const tablet_map& tmap) : _tmptr(std::move(tmptr)) , _tmap(tmap) { } @@ -502,7 +502,7 @@ public: return t; } }; - return std::make_unique(_tmptr, get_tablet_map()); + return std::make_unique(_tmptr->get_new_strong(), get_tablet_map()); } const dht::sharder& get_sharder(const schema& s) const override { @@ -554,7 +554,7 @@ effective_replication_map_ptr tablet_aware_replication_strategy::do_make_replica void tablet_metadata_guard::check() noexcept { auto erm = _table->get_effective_replication_map(); - auto& tmap = erm->get_token_metadata_ptr()->tablets().get_tablet_map(_tablet.table); + auto& tmap = erm->get_token_metadata_ptr()->get_new()->tablets().get_tablet_map(_tablet.table); auto* trinfo = tmap.get_tablet_transition_info(_tablet.tablet); if (bool(_stage) != bool(trinfo) || (_stage && _stage != trinfo->stage)) { _abort_source.request_abort(); diff --git a/replica/table.cc b/replica/table.cc index 2488a507b7..2da718f9fd 100644 --- a/replica/table.cc +++ b/replica/table.cc @@ -569,7 +569,7 @@ private: const locator::tablet_map& tablet_map() const { // FIXME: cheaper way to retrieve tablet_map than looking up every time in tablet_metadata's map. auto& tm = erm()->get_token_metadata(); - return tm.tablets().get_tablet_map(schema()->id()); + return tm.get_new()->tablets().get_tablet_map(schema()->id()); } public: tablet_compaction_group_manager(replica::table& t) : _t(t) {} diff --git a/service/storage_service.cc b/service/storage_service.cc index a7afc1069e..ee1f63a677 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -1608,7 +1608,7 @@ class topology_coordinator { schema_ptr, locator::global_tablet_id, const locator::tablet_transition_info&)> func) { - auto tm = get_token_metadata_ptr(); + auto tm = get_token_metadata_ptr()->get_new(); for (auto&& [table, tmap] : tm->tablets().all_tables()) { co_await coroutine::maybe_yield(); auto s = _db.find_schema(table); @@ -1622,7 +1622,7 @@ class topology_coordinator { void generate_migration_update(std::vector& out, const group0_guard& guard, const tablet_migration_info& mig) { auto s = _db.find_schema(mig.tablet.table); - auto& tmap = get_token_metadata_ptr()->tablets().get_tablet_map(mig.tablet.table); + auto& tmap = get_token_metadata_ptr()->get_new()->tablets().get_tablet_map(mig.tablet.table); auto last_token = tmap.get_last_token(mig.tablet.tablet); if (tmap.get_tablet_transition_info(mig.tablet.tablet)) { slogger.warn("Tablet already in transition, ignoring migration: {}", mig); @@ -1781,7 +1781,7 @@ class topology_coordinator { } } if (!preempt) { - auto plan = co_await _tablet_allocator.balance_tablets(get_token_metadata_ptr()); + auto plan = co_await _tablet_allocator.balance_tablets(get_token_metadata_ptr()->get_new_strong()); if (!drain || plan.has_nodes_to_drain()) { co_await generate_migration_updates(updates, guard, plan); } @@ -2562,7 +2562,7 @@ future topology_coordinator::maybe_start_tablet_migration(group0_guard gua slogger.debug("raft topology: Evaluating tablet balance"); auto tm = get_token_metadata_ptr(); - auto plan = co_await _tablet_allocator.balance_tablets(tm); + auto plan = co_await _tablet_allocator.balance_tablets(tm->get_new_strong()); if (plan.empty()) { slogger.debug("raft topology: Tablets are balanced"); co_return false; diff --git a/service/tablet_allocator.cc b/service/tablet_allocator.cc index b29e825ede..e66f9127c9 100644 --- a/service/tablet_allocator.cc +++ b/service/tablet_allocator.cc @@ -199,7 +199,7 @@ class load_balancer { std::optional target_load_sketch; - future get_load_sketch(const token_metadata_ptr& tm) { + future get_load_sketch(const token_metadata2_ptr& tm) { if (!target_load_sketch) { target_load_sketch.emplace(tm); co_await target_load_sketch->populate(id); @@ -255,7 +255,7 @@ class load_balancer { const size_t max_write_streaming_load = 2; const size_t max_read_streaming_load = 4; - token_metadata_ptr _tm; + token_metadata2_ptr _tm; load_balancer_stats_manager& _stats; private: tablet_replica_set get_replicas_for_tablet_load(const tablet_info& ti, const tablet_transition_info* trinfo) const { @@ -290,7 +290,7 @@ private: } public: - load_balancer(token_metadata_ptr tm, load_balancer_stats_manager& stats) + load_balancer(token_metadata2_ptr tm, load_balancer_stats_manager& stats) : _tm(std::move(tm)) , _stats(stats) { } @@ -819,7 +819,7 @@ public: _stopped = true; } - future balance_tablets(token_metadata_ptr tm) { + future balance_tablets(token_metadata2_ptr tm) { load_balancer lb(tm, _load_balancer_stats); co_return co_await lb.make_plan(); } @@ -869,7 +869,7 @@ future<> tablet_allocator::stop() { return impl().stop(); } -future tablet_allocator::balance_tablets(locator::token_metadata_ptr tm) { +future tablet_allocator::balance_tablets(locator::token_metadata2_ptr tm) { return impl().balance_tablets(tm); } diff --git a/service/tablet_allocator.hh b/service/tablet_allocator.hh index 4402f40cb1..3b90e6d208 100644 --- a/service/tablet_allocator.hh +++ b/service/tablet_allocator.hh @@ -90,7 +90,7 @@ public: /// /// The algorithm takes care of limiting the streaming load on the system, also by taking active migrations into account. /// - future balance_tablets(locator::token_metadata_ptr); + future balance_tablets(locator::token_metadata2_ptr); /// Should be called when the node is no longer a leader. void on_leadership_lost(); diff --git a/test/boost/locator_topology_test.cc b/test/boost/locator_topology_test.cc index 5245e96825..730f33b8dd 100644 --- a/test/boost/locator_topology_test.cc +++ b/test/boost/locator_topology_test.cc @@ -263,23 +263,24 @@ SEASTAR_THREAD_TEST_CASE(test_load_sketch) { shared_token_metadata stm([&sem] () noexcept { return get_units(sem, 1); }, locator::token_metadata::config{ topology::config{ .this_endpoint = ip1, + .this_host_id = host1 } }); stm.mutate_token_metadata([&] (token_metadata& tm) { - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); - tm.update_topology(ip1, locator::endpoint_dc_rack::default_location, std::nullopt, node1_shard_count); - tm.update_topology(ip2, locator::endpoint_dc_rack::default_location, std::nullopt, node2_shard_count); - tm.update_topology(ip3, locator::endpoint_dc_rack::default_location, std::nullopt, node3_shard_count); + tm.get_new()->update_host_id(host1, ip1); + tm.get_new()->update_host_id(host2, ip2); + tm.get_new()->update_host_id(host3, ip3); + tm.get_new()->update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, node1_shard_count); + tm.get_new()->update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, node2_shard_count); + tm.get_new()->update_topology(host3, locator::endpoint_dc_rack::default_location, std::nullopt, node3_shard_count); return make_ready_future<>(); }).get(); // Check that allocation is even when starting from empty state { auto tm = stm.get(); - load_sketch load(tm); + load_sketch load(tm->get_new_strong()); load.populate().get(); std::vector node1_shards(node1_shard_count, 0); @@ -341,13 +342,13 @@ SEASTAR_THREAD_TEST_CASE(test_load_sketch) { auto table = table_id(utils::make_random_uuid()); tab_meta.set_tablet_map(table, tmap); - tm.set_tablets(std::move(tab_meta)); + tm.get_new()->set_tablets(std::move(tab_meta)); return make_ready_future<>(); }).get(); { auto tm = stm.get(); - load_sketch load(tm); + load_sketch load(tm->get_new_strong()); load.populate().get(); // host3 has max shard load of 3 and 3 shards, and 4 tablets allocated. diff --git a/test/boost/network_topology_strategy_test.cc b/test/boost/network_topology_strategy_test.cc index c37016ddc5..eeb6ed8896 100644 --- a/test/boost/network_topology_strategy_test.cc +++ b/test/boost/network_topology_strategy_test.cc @@ -72,7 +72,7 @@ static void check_ranges_are_sorted(vnode_effective_replication_map_ptr erm, gms void strategy_sanity_check( replication_strategy_ptr ars_ptr, - const token_metadata& tm, + const token_metadata2_ptr& tm, const std::map& options) { const network_topology_strategy* nts_ptr = @@ -90,16 +90,16 @@ void strategy_sanity_check( total_rf += rf; } - BOOST_CHECK(ars_ptr->get_replication_factor(tm) == total_rf); + BOOST_CHECK(ars_ptr->get_replication_factor(token_metadata(tm)) == total_rf); } void endpoints_check( replication_strategy_ptr ars_ptr, - const token_metadata& tm, + const token_metadata2_ptr& tm, const inet_address_vector_replica_set& endpoints, const locator::topology& topo) { - auto&& nodes_per_dc = tm.get_topology().get_datacenter_endpoints(); + auto&& nodes_per_dc = tm->get_topology().get_datacenter_endpoints(); const network_topology_strategy* nts_ptr = dynamic_cast(ars_ptr.get()); @@ -111,7 +111,7 @@ void endpoints_check( // Check the total RF BOOST_CHECK(endpoints.size() == total_rf); - BOOST_CHECK(total_rf <= ars_ptr->get_replication_factor(tm)); + BOOST_CHECK(total_rf <= ars_ptr->get_replication_factor(token_metadata(tm))); // Check the uniqueness std::unordered_set ep_set(endpoints.begin(), endpoints.end()); @@ -156,19 +156,19 @@ auto d2t = [](double d) -> int64_t { void full_ring_check(const std::vector& ring_points, const std::map& options, replication_strategy_ptr ars_ptr, - locator::token_metadata_ptr tmptr) { + locator::token_metadata2_ptr tmptr) { auto& tm = *tmptr; const auto& topo = tm.get_topology(); - strategy_sanity_check(ars_ptr, tm, options); + strategy_sanity_check(ars_ptr, tmptr, options); - auto erm = calculate_effective_replication_map(ars_ptr, tmptr).get0(); + auto erm = calculate_effective_replication_map(ars_ptr, make_token_metadata_ptr(tmptr)).get0(); for (auto& rp : ring_points) { double cur_point1 = rp.point - 0.5; token t1(dht::token::kind::key, d2t(cur_point1 / ring_points.size())); auto endpoints1 = erm->get_natural_endpoints(t1); - endpoints_check(ars_ptr, tm, endpoints1, topo); + endpoints_check(ars_ptr, tmptr, endpoints1, topo); print_natural_endpoints(cur_point1, endpoints1); @@ -181,7 +181,7 @@ void full_ring_check(const std::vector& ring_points, token t2(dht::token::kind::key, d2t(cur_point2 / ring_points.size())); auto endpoints2 = erm->get_natural_endpoints(t2); - endpoints_check(ars_ptr, tm, endpoints2, topo); + endpoints_check(ars_ptr, tmptr, endpoints2, topo); check_ranges_are_sorted(erm, rp.host); BOOST_CHECK(endpoints1 == endpoints2); } @@ -190,7 +190,7 @@ void full_ring_check(const std::vector& ring_points, void full_ring_check(const tablet_map& tmap, const std::map& options, replication_strategy_ptr rs_ptr, - locator::token_metadata_ptr tmptr) { + locator::token_metadata2_ptr tmptr) { auto& tm = *tmptr; const auto& topo = tm.get_topology(); @@ -204,7 +204,7 @@ void full_ring_check(const tablet_map& tmap, }; for (tablet_id tb : tmap.tablet_ids()) { - endpoints_check(rs_ptr, tm, to_endpoint_set(tmap.get_tablet_info(tb).replicas), topo); + endpoints_check(rs_ptr, tmptr, to_endpoint_set(tmap.get_tablet_info(tb).replicas), topo); } } @@ -251,21 +251,13 @@ void simple_test() { // Initialize the token_metadata stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { - auto update_tm = [&](generic_token_metadata& tm) -> future<> { - auto& topo = tm.get_topology(); - for (const auto& [ring_point, endpoint, id] : ring_points) { - std::unordered_set tokens; - tokens.insert({dht::token::kind::key, d2t(ring_point / ring_points.size())}); - topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal); - if constexpr(std::is_same_v) { - co_await tm.update_normal_tokens(std::move(tokens), endpoint); - } else { - co_await tm.update_normal_tokens(std::move(tokens), id); - } - } - }; - co_await update_tm(tm); - co_await update_tm(*tm.get_new()); + auto& topo = tm.get_new()->get_topology(); + for (const auto& [ring_point, endpoint, id] : ring_points) { + std::unordered_set tokens; + tokens.insert({dht::token::kind::key, d2t(ring_point / ring_points.size())}); + topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal); + co_await tm.get_new()->update_normal_tokens(std::move(tokens), id); + } }).get(); ///////////////////////////////////// @@ -279,7 +271,7 @@ void simple_test() { auto ars_ptr = abstract_replication_strategy::create_replication_strategy( "NetworkTopologyStrategy", options323); - full_ring_check(ring_points, options323, ars_ptr, stm.get()); + full_ring_check(ring_points, options323, ars_ptr, stm.get()->get_new_strong()); /////////////// // Create the replication strategy @@ -292,7 +284,7 @@ void simple_test() { ars_ptr = abstract_replication_strategy::create_replication_strategy( "NetworkTopologyStrategy", options320); - full_ring_check(ring_points, options320, ars_ptr, stm.get()); + full_ring_check(ring_points, options320, ars_ptr, stm.get()->get_new_strong()); // // Check cache invalidation: invalidate the cache and run a full ring @@ -301,11 +293,10 @@ void simple_test() { // corresponding check will fail. // stm.mutate_token_metadata([] (token_metadata& tm) { - tm.invalidate_cached_rings(); tm.get_new()->invalidate_cached_rings(); return make_ready_future<>(); }).get(); - full_ring_check(ring_points, options320, ars_ptr, stm.get()); + full_ring_check(ring_points, options320, ars_ptr, stm.get()->get_new_strong()); } // Run in a seastar thread. @@ -367,25 +358,17 @@ void heavy_origin_test() { } stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { - auto update_tm = [&](generic_token_metadata& tm) -> future<> { - auto& topo = tm.get_topology(); - for (const auto& [ring_point, endpoint, id] : ring_points) { - topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal); - if constexpr (std::is_same_v) { - co_await tm.update_normal_tokens(tokens[endpoint], endpoint); - } else { - co_await tm.update_normal_tokens(tokens[endpoint], id); - } - } - }; - co_await update_tm(tm); - co_await update_tm(*tm.get_new()); + auto& topo = tm.get_new()->get_topology(); + for (const auto& [ring_point, endpoint, id] : ring_points) { + topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal); + co_await tm.get_new()->update_normal_tokens(tokens[endpoint], id); + } }).get(); auto ars_ptr = abstract_replication_strategy::create_replication_strategy( "NetworkTopologyStrategy", config_options); - full_ring_check(ring_points, config_options, ars_ptr, stm.get()); + full_ring_check(ring_points, config_options, ars_ptr, stm.get()->get_new_strong()); } @@ -431,13 +414,13 @@ SEASTAR_THREAD_TEST_CASE(NetworkTopologyStrategy_tablets_test) { // Initialize the token_metadata stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { - auto& topo = tm.get_topology(); + auto& topo = tm.get_new()->get_topology(); for (const auto& [ring_point, endpoint, id] : ring_points) { std::unordered_set tokens; tokens.insert({dht::token::kind::key, d2t(ring_point / ring_points.size())}); topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal, 1); - tm.update_host_id(id, endpoint); - co_await tm.update_normal_tokens(std::move(tokens), endpoint); + tm.get_new()->update_host_id(id, endpoint); + co_await tm.get_new()->update_normal_tokens(std::move(tokens), id); } }).get(); @@ -462,7 +445,7 @@ SEASTAR_THREAD_TEST_CASE(NetworkTopologyStrategy_tablets_test) { .build(); auto tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, stm.get()).get0(); - full_ring_check(tmap, options323, ars_ptr, stm.get()); + full_ring_check(tmap, options323, ars_ptr, stm.get()->get_new_strong()); /////////////// // Create the replication strategy @@ -479,7 +462,7 @@ SEASTAR_THREAD_TEST_CASE(NetworkTopologyStrategy_tablets_test) { BOOST_REQUIRE(tab_awr_ptr); tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, stm.get()).get0(); - full_ring_check(tmap, options320, ars_ptr, stm.get()); + full_ring_check(tmap, options320, ars_ptr, stm.get()->get_new_strong()); // Test the case of not enough nodes to meet RF in DC 102 std::map options324 = { @@ -495,7 +478,7 @@ SEASTAR_THREAD_TEST_CASE(NetworkTopologyStrategy_tablets_test) { BOOST_REQUIRE(tab_awr_ptr); tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, stm.get()).get0(); - full_ring_check(tmap, options324, ars_ptr, stm.get()); + full_ring_check(tmap, options324, ars_ptr, stm.get()->get_new_strong()); } /** @@ -508,7 +491,7 @@ static size_t get_replication_factor(const sstring& dc, } static bool has_sufficient_replicas(const sstring& dc, - const std::unordered_map>& dc_replicas, + const std::unordered_map>& dc_replicas, const std::unordered_map>& all_endpoints, const std::unordered_map& datacenters) noexcept { auto dc_replicas_it = dc_replicas.find(dc); @@ -526,7 +509,7 @@ static bool has_sufficient_replicas(const sstring& dc, } static bool has_sufficient_replicas( - const std::unordered_map>& dc_replicas, + const std::unordered_map>& dc_replicas, const std::unordered_map>& all_endpoints, const std::unordered_map& datacenters) noexcept { @@ -540,18 +523,18 @@ static bool has_sufficient_replicas( return true; } -static locator::endpoint_set calculate_natural_endpoints( - const token& search_token, const token_metadata& tm, +static locator::host_id_set calculate_natural_endpoints( + const token& search_token, const token_metadata2& tm, const locator::topology& topo, const std::unordered_map& datacenters) { // // We want to preserve insertion order so that the first added endpoint // becomes primary. // - locator::endpoint_set replicas; + locator::host_id_set replicas; // replicas we have found in each DC - std::unordered_map> dc_replicas; + std::unordered_map> dc_replicas; // tracks the racks we have already placed replicas in std::unordered_map> seen_racks; // @@ -559,7 +542,7 @@ static locator::endpoint_set calculate_natural_endpoints( // when we relax the rack uniqueness we can append this to the current // result so we don't have to wind back the iterator // - std::unordered_map + std::unordered_map skipped_dc_endpoints; // @@ -600,7 +583,7 @@ static locator::endpoint_set calculate_natural_endpoints( break; } - inet_address ep = *tm.get_endpoint(next); + host_id ep = *tm.get_endpoint(next); sstring dc = topo.get_location(ep).dc; auto& seen_racks_dc_set = seen_racks[dc]; @@ -639,7 +622,7 @@ static locator::endpoint_set calculate_natural_endpoints( auto skipped_it = skipped_dc_endpoints_set.begin(); while (skipped_it != skipped_dc_endpoints_set.end() && !has_sufficient_replicas(dc, dc_replicas, all_endpoints, datacenters)) { - inet_address skipped = *skipped_it++; + host_id skipped = *skipped_it++; dc_replicas_dc_set.insert(skipped); replicas.push_back(skipped); } @@ -667,25 +650,25 @@ static void test_equivalence(const shared_token_metadata& stm, const locator::to return std::make_pair(p.first, to_sstring(p.second)); }))); - const token_metadata& tm = *stm.get(); + const token_metadata2& tm = *stm.get()->get_new(); for (size_t i = 0; i < 1000; ++i) { auto token = dht::token::get_random_token(); auto expected = calculate_natural_endpoints(token, tm, topo, datacenters); - auto actual = get(nts.calculate_natural_endpoints(token, tm, false).get0()); + auto actual = get(nts.calculate_natural_endpoints(token, token_metadata(stm.get()->get_new_strong()), true).get0()); // Because the old algorithm does not put the nodes in the correct order in the case where more replicas // are required than there are racks in a dc, we accept different order as long as the primary // replica is the same. BOOST_REQUIRE_EQUAL(expected[0], actual[0]); - BOOST_REQUIRE_EQUAL(std::set(expected.begin(), expected.end()), - std::set(actual.begin(), actual.end())); + BOOST_REQUIRE_EQUAL(std::set(expected.begin(), expected.end()), + std::set(actual.begin(), actual.end())); } } -void generate_topology(topology& topo, const std::unordered_map datacenters, const std::vector& nodes) { +void generate_topology(topology& topo, const std::unordered_map datacenters, const std::vector& nodes) { auto& e1 = seastar::testing::local_random_engine; std::unordered_map racks_per_dc; @@ -705,11 +688,12 @@ void generate_topology(topology& topo, const std::unordered_map out = std::fill_n(out, rf, std::cref(dc)); } + unsigned i = 0; for (auto& node : nodes) { const sstring& dc = dcs[udist(0, dcs.size() - 1)(e1)]; auto rc = racks_per_dc.at(dc); auto r = udist(0, rc)(e1); - topo.add_node(host_id::create_random_id(), node, {dc, to_sstring(r)}, locator::node::state::normal); + topo.add_node(node, inet_address((127u << 24) | ++i), {dc, to_sstring(r)}, locator::node::state::normal); } } @@ -730,10 +714,10 @@ SEASTAR_THREAD_TEST_CASE(testCalculateEndpoints) { { "rf5_2", 5 }, { "rf5_3", 5 }, }; - std::vector nodes; + std::vector nodes; nodes.reserve(NODES); std::generate_n(std::back_inserter(nodes), NODES, [i = 0u]() mutable { - return inet_address((127u << 24) | ++i); + return host_id{utils::UUID(0, ++i)}; }); for (size_t run = 0; run < RUNS; ++run) { @@ -744,7 +728,7 @@ SEASTAR_THREAD_TEST_CASE(testCalculateEndpoints) { while (random_tokens.size() < nodes.size() * VNODES) { random_tokens.insert(dht::token::get_random_token()); } - std::unordered_map> endpoint_tokens; + std::unordered_map> endpoint_tokens; auto next_token_it = random_tokens.begin(); for (auto& node : nodes) { for (size_t i = 0; i < VNODES; ++i) { @@ -752,14 +736,14 @@ SEASTAR_THREAD_TEST_CASE(testCalculateEndpoints) { next_token_it++; } } - + stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { - generate_topology(tm.get_topology(), datacenters, nodes); + generate_topology(tm.get_new()->get_topology(), datacenters, nodes); for (auto&& i : endpoint_tokens) { - co_await tm.update_normal_tokens(std::move(i.second), i.first); + co_await tm.get_new()->update_normal_tokens(std::move(i.second), i.first); } }).get(); - test_equivalence(stm, stm.get()->get_topology(), datacenters); + test_equivalence(stm, stm.get()->get_new()->get_topology(), datacenters); } } @@ -837,27 +821,27 @@ SEASTAR_THREAD_TEST_CASE(test_topology_compare_endpoints) { { "rf2", 2 }, { "rf3", 3 }, }; - std::vector nodes; + std::vector nodes; nodes.reserve(NODES); auto make_address = [] (unsigned i) { - return inet_address((127u << 24) | i); + return host_id{utils::UUID(0, i)}; }; std::generate_n(std::back_inserter(nodes), NODES, [&, i = 0u]() mutable { return make_address(++i); }); - auto bogus_address = make_address(NODES + 1); + auto bogus_address = inet_address((127u << 24) | static_cast(NODES + 1)); semaphore sem(1); shared_token_metadata stm([&sem] () noexcept { return get_units(sem, 1); }, tm_cfg); stm.mutate_token_metadata([&] (token_metadata& tm) { - auto& topo = tm.get_topology(); + auto& topo = tm.get_new()->get_topology(); generate_topology(topo, datacenters, nodes); - const auto& address = nodes[tests::random::get_int(0, NODES-1)]; - const auto& a1 = nodes[tests::random::get_int(0, NODES-1)]; - const auto& a2 = nodes[tests::random::get_int(0, NODES-1)]; + const auto& address = tm.get_new()->get_endpoint_for_host_id(nodes[tests::random::get_int(0, NODES-1)]); + const auto& a1 = tm.get_new()->get_endpoint_for_host_id(nodes[tests::random::get_int(0, NODES-1)]); + const auto& a2 = tm.get_new()->get_endpoint_for_host_id(nodes[tests::random::get_int(0, NODES-1)]); topo.test_compare_endpoints(address, address, address); topo.test_compare_endpoints(address, address, a1); @@ -896,23 +880,23 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) { // get_location() should work before any node is added - BOOST_REQUIRE(stm.get()->get_topology().get_location() == ip1_dc_rack); + BOOST_REQUIRE(stm.get()->get_new()->get_topology().get_location() == ip1_dc_rack); stm.mutate_token_metadata([&] (token_metadata& tm) { - tm.update_host_id(host2, ip2); - tm.update_host_id(host1, ip1); // this_node added last on purpose + tm.get_new()->update_host_id(host2, ip2); + tm.get_new()->update_host_id(host1, ip1); // this_node added last on purpose return make_ready_future<>(); }).get(); - const node* n1 = stm.get()->get_topology().find_node(host1); + const node* n1 = stm.get()->get_new()->get_topology().find_node(host1); BOOST_REQUIRE(n1); BOOST_REQUIRE(bool(n1->is_this_node())); BOOST_REQUIRE_EQUAL(n1->host_id(), host1); BOOST_REQUIRE_EQUAL(n1->endpoint(), ip1); BOOST_REQUIRE(n1->dc_rack() == ip1_dc_rack); - BOOST_REQUIRE(stm.get()->get_topology().get_location() == ip1_dc_rack); + BOOST_REQUIRE(stm.get()->get_new()->get_topology().get_location() == ip1_dc_rack); - const node* n2 = stm.get()->get_topology().find_node(host2); + const node* n2 = stm.get()->get_new()->get_topology().find_node(host2); BOOST_REQUIRE(n2); BOOST_REQUIRE(!bool(n2->is_this_node())); BOOST_REQUIRE_EQUAL(n2->host_id(), host2); @@ -922,45 +906,45 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) { // Removing local node stm.mutate_token_metadata([&] (token_metadata& tm) { - tm.remove_endpoint(ip1); - tm.update_host_id(host3, ip3); + tm.get_new()->remove_endpoint(host1); + tm.get_new()->update_host_id(host3, ip3); return make_ready_future<>(); }).get(); - n1 = stm.get()->get_topology().find_node(host1); + n1 = stm.get()->get_new()->get_topology().find_node(host1); BOOST_REQUIRE(!n1); - n1 = stm.get()->get_topology().find_node(ip1); + n1 = stm.get()->get_new()->get_topology().find_node(ip1); BOOST_REQUIRE(!n1); // Removing node with no local node stm.mutate_token_metadata([&] (token_metadata& tm) { - tm.remove_endpoint(ip2); + tm.get_new()->remove_endpoint(host2); return make_ready_future<>(); }).get(); - n2 = stm.get()->get_topology().find_node(host2); + n2 = stm.get()->get_new()->get_topology().find_node(host2); BOOST_REQUIRE(!n2); - n2 = stm.get()->get_topology().find_node(ip2); + n2 = stm.get()->get_new()->get_topology().find_node(ip2); BOOST_REQUIRE(!n2); // Repopulate after clear_gently() stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { - co_await tm.clear_gently(); - tm.update_host_id(host2, ip2); - tm.update_host_id(host1, ip1); // this_node added last on purpose + co_await tm.get_new()->clear_gently(); + tm.get_new()->update_host_id(host2, ip2); + tm.get_new()->update_host_id(host1, ip1); // this_node added last on purpose }).get(); - n1 = stm.get()->get_topology().find_node(host1); + n1 = stm.get()->get_new()->get_topology().find_node(host1); BOOST_REQUIRE(n1); BOOST_REQUIRE(bool(n1->is_this_node())); BOOST_REQUIRE_EQUAL(n1->host_id(), host1); BOOST_REQUIRE_EQUAL(n1->endpoint(), ip1); BOOST_REQUIRE(n1->dc_rack() == ip1_dc_rack); - BOOST_REQUIRE(stm.get()->get_topology().get_location() == ip1_dc_rack); + BOOST_REQUIRE(stm.get()->get_new()->get_topology().get_location() == ip1_dc_rack); - n2 = stm.get()->get_topology().find_node(host2); + n2 = stm.get()->get_new()->get_topology().find_node(host2); BOOST_REQUIRE(n2); BOOST_REQUIRE(!bool(n2->is_this_node())); BOOST_REQUIRE_EQUAL(n2->host_id(), host2); @@ -970,15 +954,15 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) { // get_location() should pick up endpoint_dc_rack from node info stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { - co_await tm.clear_gently(); - tm.get_topology().add_or_update_endpoint(ip1, host1, ip1_dc_rack_v2, node::state::being_decommissioned); + co_await tm.get_new()->clear_gently(); + tm.get_new()->get_topology().add_or_update_endpoint(ip1, host1, ip1_dc_rack_v2, node::state::being_decommissioned); }).get(); - n1 = stm.get()->get_topology().find_node(host1); + n1 = stm.get()->get_new()->get_topology().find_node(host1); BOOST_REQUIRE(n1); BOOST_REQUIRE(bool(n1->is_this_node())); BOOST_REQUIRE_EQUAL(n1->host_id(), host1); BOOST_REQUIRE_EQUAL(n1->endpoint(), ip1); BOOST_REQUIRE(n1->dc_rack() == ip1_dc_rack_v2); - BOOST_REQUIRE(stm.get()->get_topology().get_location() == ip1_dc_rack_v2); + BOOST_REQUIRE(stm.get()->get_new()->get_topology().get_location() == ip1_dc_rack_v2); } diff --git a/test/boost/tablets_test.cc b/test/boost/tablets_test.cc index 754b216849..75430f33f5 100644 --- a/test/boost/tablets_test.cc +++ b/test/boost/tablets_test.cc @@ -433,7 +433,7 @@ SEASTAR_TEST_CASE(test_sharder) { auto table1 = table_id(utils::UUID_gen::get_time_UUID()); - token_metadata tokm(token_metadata::config{ .topo_cfg{ .this_host_id = h1 } }); + token_metadata2 tokm(token_metadata::config{ .topo_cfg{ .this_host_id = h1 } }); tokm.get_topology().add_or_update_endpoint(tokm.get_topology().my_address(), h1); std::vector tablet_ids; @@ -591,7 +591,7 @@ SEASTAR_THREAD_TEST_CASE(test_token_ownership_splitting) { // Reflects the plan in a given token metadata as if the migrations were fully executed. static -void apply_plan(token_metadata& tm, const migration_plan& plan) { +void apply_plan(token_metadata2& tm, const migration_plan& plan) { for (auto&& mig : plan.migrations()) { tablet_map& tmap = tm.tablets().get_tablet_map(mig.tablet.table); auto tinfo = tmap.get_tablet_info(mig.tablet.tablet); @@ -611,7 +611,7 @@ tablet_transition_info migration_to_transition_info(const tablet_migration_info& // Reflects the plan in a given token metadata as if the migrations were started but not yet executed. static -void apply_plan_as_in_progress(token_metadata& tm, const migration_plan& plan) { +void apply_plan_as_in_progress(token_metadata2& tm, const migration_plan& plan) { for (auto&& mig : plan.migrations()) { tablet_map& tmap = tm.tablets().get_tablet_map(mig.tablet.table); auto tinfo = tmap.get_tablet_info(mig.tablet.tablet); @@ -622,12 +622,12 @@ void apply_plan_as_in_progress(token_metadata& tm, const migration_plan& plan) { static void rebalance_tablets(tablet_allocator& talloc, shared_token_metadata& stm) { while (true) { - auto plan = talloc.balance_tablets(stm.get()).get0(); + auto plan = talloc.balance_tablets(stm.get()->get_new_strong()).get0(); if (plan.empty()) { break; } stm.mutate_token_metadata([&] (token_metadata& tm) { - apply_plan(tm, plan); + apply_plan(*tm.get_new(), plan); return make_ready_future<>(); }).get(); } @@ -636,12 +636,12 @@ void rebalance_tablets(tablet_allocator& talloc, shared_token_metadata& stm) { static void rebalance_tablets_as_in_progress(tablet_allocator& talloc, shared_token_metadata& stm) { while (true) { - auto plan = talloc.balance_tablets(stm.get()).get0(); + auto plan = talloc.balance_tablets(stm.get()->get_new_strong()).get0(); if (plan.empty()) { break; } stm.mutate_token_metadata([&] (token_metadata& tm) { - apply_plan_as_in_progress(tm, plan); + apply_plan_as_in_progress(*tm.get_new(), plan); return make_ready_future<>(); }).get(); } @@ -651,7 +651,7 @@ void rebalance_tablets_as_in_progress(tablet_allocator& talloc, shared_token_met static void execute_transitions(shared_token_metadata& stm) { stm.mutate_token_metadata([&] (token_metadata& tm) { - for (auto&& [tablet, tmap_] : tm.tablets().all_tables()) { + for (auto&& [tablet, tmap_] : tm.get_new()->tablets().all_tables()) { auto& tmap = tmap_; for (auto&& [tablet, trinfo]: tmap.transitions()) { auto ti = tmap.get_tablet_info(tablet); @@ -690,12 +690,12 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_empty_node) { }); stm.mutate_token_metadata([&] (auto& tm) { - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); - tm.update_topology(ip1, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); - tm.update_topology(ip2, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); - tm.update_topology(ip3, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.get_new()->update_host_id(host1, ip1); + tm.get_new()->update_host_id(host2, ip2); + tm.get_new()->update_host_id(host3, ip3); + tm.get_new()->update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.get_new()->update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.get_new()->update_topology(host3, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); tablet_map tmap(4); auto tid = tmap.first_tablet(); @@ -728,13 +728,13 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_empty_node) { }); tablet_metadata tmeta; tmeta.set_tablet_map(table1, std::move(tmap)); - tm.set_tablets(std::move(tmeta)); + tm.get_new()->set_tablets(std::move(tmeta)); return make_ready_future<>(); }).get(); // Sanity check { - load_sketch load(stm.get()); + load_sketch load(stm.get()->get_new_strong()); load.populate().get(); BOOST_REQUIRE_EQUAL(load.get_load(host1), 4); BOOST_REQUIRE_EQUAL(load.get_avg_shard_load(host1), 2); @@ -747,7 +747,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_empty_node) { rebalance_tablets(e.get_tablet_allocator().local(), stm); { - load_sketch load(stm.get()); + load_sketch load(stm.get()->template get_new_strong()); load.populate().get(); for (auto h : {host1, host2, host3}) { @@ -786,12 +786,12 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rf_met) { stm.mutate_token_metadata([&](auto& tm) { const unsigned shard_count = 2; - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); - tm.update_topology(ip1, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); - tm.update_topology(ip2, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); - tm.update_topology(ip3, locator::endpoint_dc_rack::default_location, node::state::being_decommissioned, + tm.get_new()->update_host_id(host1, ip1); + tm.get_new()->update_host_id(host2, ip2); + tm.get_new()->update_host_id(host3, ip3); + tm.get_new()->update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.get_new()->update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.get_new()->update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::being_decommissioned, shard_count); tablet_map tmap(4); @@ -825,14 +825,14 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rf_met) { }); tablet_metadata tmeta; tmeta.set_tablet_map(table1, std::move(tmap)); - tm.set_tablets(std::move(tmeta)); + tm.get_new()->set_tablets(std::move(tmeta)); return make_ready_future<>(); }).get(); rebalance_tablets(e.get_tablet_allocator().local(), stm); { - load_sketch load(stm.get()); + load_sketch load(stm.get()->get_new_strong()); load.populate().get(); BOOST_REQUIRE(load.get_avg_shard_load(host1) == 2); BOOST_REQUIRE(load.get_avg_shard_load(host2) == 2); @@ -840,14 +840,14 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rf_met) { } stm.mutate_token_metadata([&](auto& tm) { - tm.update_topology(ip3, locator::endpoint_dc_rack::default_location, node::state::left); + tm.get_new()->update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::left); return make_ready_future<>(); }).get(); rebalance_tablets(e.get_tablet_allocator().local(), stm); { - load_sketch load(stm.get()); + load_sketch load(stm.get()->get_new_strong()); load.populate().get(); BOOST_REQUIRE(load.get_avg_shard_load(host1) == 2); BOOST_REQUIRE(load.get_avg_shard_load(host2) == 2); @@ -888,14 +888,14 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_two_racks) { stm.mutate_token_metadata([&](auto& tm) { const unsigned shard_count = 1; - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); - tm.update_host_id(host4, ip4); - tm.update_topology(ip1, racks[0], std::nullopt, shard_count); - tm.update_topology(ip2, racks[1], std::nullopt, shard_count); - tm.update_topology(ip3, racks[0], std::nullopt, shard_count); - tm.update_topology(ip4, racks[1], node::state::being_decommissioned, + tm.get_new()->update_host_id(host1, ip1); + tm.get_new()->update_host_id(host2, ip2); + tm.get_new()->update_host_id(host3, ip3); + tm.get_new()->update_host_id(host4, ip4); + tm.get_new()->update_topology(host1, racks[0], std::nullopt, shard_count); + tm.get_new()->update_topology(host2, racks[1], std::nullopt, shard_count); + tm.get_new()->update_topology(host3, racks[0], std::nullopt, shard_count); + tm.get_new()->update_topology(host4, racks[1], node::state::being_decommissioned, shard_count); tablet_map tmap(4); @@ -929,14 +929,14 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_two_racks) { }); tablet_metadata tmeta; tmeta.set_tablet_map(table1, std::move(tmap)); - tm.set_tablets(std::move(tmeta)); + tm.get_new()->set_tablets(std::move(tmeta)); return make_ready_future<>(); }).get(); rebalance_tablets(e.get_tablet_allocator().local(), stm); { - load_sketch load(stm.get()); + load_sketch load(stm.get()->get_new_strong()); load.populate().get(); BOOST_REQUIRE(load.get_avg_shard_load(host1) >= 2); BOOST_REQUIRE(load.get_avg_shard_load(host2) >= 2); @@ -947,10 +947,10 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_two_racks) { // Verify replicas are not collocated on racks { auto tm = stm.get(); - auto& tmap = tm->tablets().get_tablet_map(table1); + auto& tmap = tm->get_new()->tablets().get_tablet_map(table1); tmap.for_each_tablet([&](auto tid, auto& tinfo) { - auto rack1 = tm->get_topology().get_rack(tinfo.replicas[0].host); - auto rack2 = tm->get_topology().get_rack(tinfo.replicas[1].host); + auto rack1 = tm->get_new()->get_topology().get_rack(tinfo.replicas[0].host); + auto rack2 = tm->get_new()->get_topology().get_rack(tinfo.replicas[1].host); BOOST_REQUIRE(rack1 != rack2); }).get(); } @@ -989,14 +989,14 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rack_load_failure) { stm.mutate_token_metadata([&](auto& tm) { const unsigned shard_count = 1; - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); - tm.update_host_id(host4, ip4); - tm.update_topology(ip1, racks[0], std::nullopt, shard_count); - tm.update_topology(ip2, racks[0], std::nullopt, shard_count); - tm.update_topology(ip3, racks[0], std::nullopt, shard_count); - tm.update_topology(ip4, racks[1], node::state::being_decommissioned, + tm.get_new()->update_host_id(host1, ip1); + tm.get_new()->update_host_id(host2, ip2); + tm.get_new()->update_host_id(host3, ip3); + tm.get_new()->update_host_id(host4, ip4); + tm.get_new()->update_topology(host1, racks[0], std::nullopt, shard_count); + tm.get_new()->update_topology(host2, racks[0], std::nullopt, shard_count); + tm.get_new()->update_topology(host3, racks[0], std::nullopt, shard_count); + tm.get_new()->update_topology(host4, racks[1], node::state::being_decommissioned, shard_count); tablet_map tmap(4); @@ -1030,7 +1030,7 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rack_load_failure) { }); tablet_metadata tmeta; tmeta.set_tablet_map(table1, std::move(tmap)); - tm.set_tablets(std::move(tmeta)); + tm.get_new()->set_tablets(std::move(tmeta)); return make_ready_future<>(); }).get(); @@ -1063,12 +1063,12 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rf_not_met) { stm.mutate_token_metadata([&](auto& tm) { const unsigned shard_count = 2; - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); - tm.update_topology(ip1, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); - tm.update_topology(ip2, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); - tm.update_topology(ip3, locator::endpoint_dc_rack::default_location, node::state::being_decommissioned, + tm.get_new()->update_host_id(host1, ip1); + tm.get_new()->update_host_id(host2, ip2); + tm.get_new()->update_host_id(host3, ip3); + tm.get_new()->update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.get_new()->update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.get_new()->update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::being_decommissioned, shard_count); tablet_map tmap(1); @@ -1082,7 +1082,7 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rf_not_met) { }); tablet_metadata tmeta; tmeta.set_tablet_map(table1, std::move(tmap)); - tm.set_tablets(std::move(tmeta)); + tm.get_new()->set_tablets(std::move(tmeta)); return make_ready_future<>(); }).get(); @@ -1118,12 +1118,12 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_works_with_in_progress_transitions) }); stm.mutate_token_metadata([&] (auto& tm) { - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); - tm.update_topology(ip1, locator::endpoint_dc_rack::default_location, std::nullopt, 1); - tm.update_topology(ip2, locator::endpoint_dc_rack::default_location, std::nullopt, 1); - tm.update_topology(ip3, locator::endpoint_dc_rack::default_location, std::nullopt, 2); + tm.get_new()->update_host_id(host1, ip1); + tm.get_new()->update_host_id(host2, ip2); + tm.get_new()->update_host_id(host3, ip3); + tm.get_new()->update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, 1); + tm.get_new()->update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, 1); + tm.get_new()->update_topology(host3, locator::endpoint_dc_rack::default_location, std::nullopt, 2); tablet_map tmap(4); std::optional tid = tmap.first_tablet(); @@ -1146,7 +1146,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_works_with_in_progress_transitions) }); tablet_metadata tmeta; tmeta.set_tablet_map(table1, std::move(tmap)); - tm.set_tablets(std::move(tmeta)); + tm.get_new()->set_tablets(std::move(tmeta)); return make_ready_future<>(); }).get(); @@ -1154,7 +1154,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_works_with_in_progress_transitions) execute_transitions(stm); { - load_sketch load(stm.get()); + load_sketch load(stm.get()->get_new_strong()); load.populate().get(); for (auto h : {host1, host2, host3}) { @@ -1187,12 +1187,12 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancer_shuffle_mode) { }); stm.mutate_token_metadata([&] (auto& tm) { - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); - tm.update_topology(ip1, locator::endpoint_dc_rack::default_location, std::nullopt, 1); - tm.update_topology(ip2, locator::endpoint_dc_rack::default_location, std::nullopt, 1); - tm.update_topology(ip3, locator::endpoint_dc_rack::default_location, std::nullopt, 2); + tm.get_new()->update_host_id(host1, ip1); + tm.get_new()->update_host_id(host2, ip2); + tm.get_new()->update_host_id(host3, ip3); + tm.get_new()->update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, 1); + tm.get_new()->update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, 1); + tm.get_new()->update_topology(host3, locator::endpoint_dc_rack::default_location, std::nullopt, 2); tablet_map tmap(4); std::optional tid = tmap.first_tablet(); @@ -1207,20 +1207,20 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancer_shuffle_mode) { } tablet_metadata tmeta; tmeta.set_tablet_map(table1, std::move(tmap)); - tm.set_tablets(std::move(tmeta)); + tm.get_new()->set_tablets(std::move(tmeta)); return make_ready_future<>(); }).get(); rebalance_tablets(e.get_tablet_allocator().local(), stm); - BOOST_REQUIRE(e.get_tablet_allocator().local().balance_tablets(stm.get()).get0().empty()); + BOOST_REQUIRE(e.get_tablet_allocator().local().balance_tablets(stm.get()->get_new_strong()).get0().empty()); utils::get_local_injector().enable("tablet_allocator_shuffle"); auto disable_injection = seastar::defer([&] { utils::get_local_injector().disable("tablet_allocator_shuffle"); }); - BOOST_REQUIRE(!e.get_tablet_allocator().local().balance_tablets(stm.get()).get0().empty()); + BOOST_REQUIRE(!e.get_tablet_allocator().local().balance_tablets(stm.get()->get_new_strong()).get0().empty()); }).get(); } #endif @@ -1250,14 +1250,14 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_two_empty_nodes) { }); stm.mutate_token_metadata([&] (auto& tm) { - tm.update_host_id(host1, ip1); - tm.update_host_id(host2, ip2); - tm.update_host_id(host3, ip3); - tm.update_host_id(host4, ip4); - tm.update_topology(ip1, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); - tm.update_topology(ip2, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); - tm.update_topology(ip3, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); - tm.update_topology(ip4, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.get_new()->update_host_id(host1, ip1); + tm.get_new()->update_host_id(host2, ip2); + tm.get_new()->update_host_id(host3, ip3); + tm.get_new()->update_host_id(host4, ip4); + tm.get_new()->update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.get_new()->update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.get_new()->update_topology(host3, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.get_new()->update_topology(host4, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); tablet_map tmap(16); for (auto tid : tmap.tablet_ids()) { @@ -1270,14 +1270,14 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_two_empty_nodes) { } tablet_metadata tmeta; tmeta.set_tablet_map(table1, std::move(tmap)); - tm.set_tablets(std::move(tmeta)); + tm.get_new()->set_tablets(std::move(tmeta)); return make_ready_future<>(); }).get(); rebalance_tablets(e.get_tablet_allocator().local(), stm); { - load_sketch load(stm.get()); + load_sketch load(stm.get()->get_new_strong()); load.populate().get(); for (auto h : {host1, host2, host3, host4}) { @@ -1312,8 +1312,8 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancer_disabling) { stm.mutate_token_metadata([&] (auto& tm) { tm.update_host_id(host1, ip1); tm.update_host_id(host2, ip2); - tm.update_topology(ip1, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); - tm.update_topology(ip2, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); tablet_map tmap(16); for (auto tid : tmap.tablet_ids()) { @@ -1399,6 +1399,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_random_load) { shared_token_metadata stm([&sem]() noexcept { return get_units(sem, 1); }, locator::token_metadata::config { locator::topology::config { .this_endpoint = inet_address("192.168.0.1"), + .this_host_id = hosts[0], .local_dc_rack = racks[1] } }); @@ -1411,9 +1412,9 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_random_load) { for (auto h : hosts) { auto ip = inet_address(format("192.168.0.{}", ++i)); auto shard_count = 2; - tm.update_host_id(h, ip); + tm.get_new()->update_host_id(h, ip); auto rack = racks[i % racks.size()]; - tm.update_topology(ip, rack, std::nullopt, shard_count); + tm.get_new()->update_topology(h, rack, std::nullopt, shard_count); if (h != hosts[0]) { // Leave the first host empty by making it invisible to allocation algorithm. hosts_by_rack[rack.rack].push_back(h); @@ -1444,7 +1445,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_random_load) { } tablet_replica_set replicas; for (auto h : replica_hosts) { - auto shard_count = tm.get_topology().find_node(h)->get_shard_count(); + auto shard_count = tm.get_new()->get_topology().find_node(h)->get_shard_count(); auto shard = tests::random::get_int(0, shard_count - 1); replicas.push_back(tablet_replica {h, shard}); } @@ -1453,17 +1454,17 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_random_load) { total_tablet_count += tmap.tablet_count(); tmeta.set_tablet_map(table, std::move(tmap)); } - tm.set_tablets(std::move(tmeta)); + tm.get_new()->set_tablets(std::move(tmeta)); return make_ready_future<>(); }).get(); - testlog.debug("tablet metadata: {}", stm.get()->tablets()); + testlog.debug("tablet metadata: {}", stm.get()->get_new()->tablets()); testlog.info("Total tablet count: {}, hosts: {}", total_tablet_count, hosts.size()); rebalance_tablets(e.get_tablet_allocator().local(), stm); { - load_sketch load(stm.get()); + load_sketch load(stm.get()->get_new_strong()); load.populate().get(); min_max_tracker min_max_load; @@ -1473,7 +1474,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_random_load) { min_max_load.update(l); } - testlog.debug("tablet metadata: {}", stm.get()->tablets()); + testlog.debug("tablet metadata: {}", stm.get()->get_new()->tablets()); testlog.debug("Min load: {}, max load: {}", min_max_load.min(), min_max_load.max()); // FIXME: The algorithm cannot achieve balance in all cases yet, so we only check that it stops. From 93263bf9e71f166f0f54dba223c1218052ee268f Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Thu, 2 Nov 2023 16:28:06 +0400 Subject: [PATCH 31/51] bootstrap: use new token_metadata Just mechanical changes to the new token_metadata. All the boost and topology tests pass with this change. --- dht/boot_strapper.hh | 10 +++--- dht/range_streamer.cc | 15 ++++----- dht/range_streamer.hh | 16 +++++----- locator/abstract_replication_strategy.cc | 23 +++++++------- locator/abstract_replication_strategy.hh | 2 +- repair/repair.cc | 20 ++++++------ repair/row_level.hh | 2 +- service/storage_service.cc | 39 ++++++++++-------------- 8 files changed, 63 insertions(+), 64 deletions(-) diff --git a/dht/boot_strapper.hh b/dht/boot_strapper.hh index 0c50c043e7..599ea0ee84 100644 --- a/dht/boot_strapper.hh +++ b/dht/boot_strapper.hh @@ -29,21 +29,23 @@ using check_token_endpoint = bool_class; class boot_strapper { using inet_address = gms::inet_address; using token_metadata = locator::token_metadata; + using token_metadata2 = locator::token_metadata2; using token_metadata_ptr = locator::token_metadata_ptr; + using token_metadata2_ptr = locator::token_metadata2_ptr; using token = dht::token; distributed& _db; sharded& _stream_manager; abort_source& _abort_source; /* endpoint that needs to be bootstrapped */ - inet_address _address; + locator::host_id _address; /* its DC/RACK info */ locator::endpoint_dc_rack _dr; /* token of the node being bootstrapped. */ std::unordered_set _tokens; - const token_metadata_ptr _token_metadata_ptr; + const locator::token_metadata2_ptr _token_metadata_ptr; public: boot_strapper(distributed& db, sharded& sm, abort_source& abort_source, - inet_address addr, locator::endpoint_dc_rack dr, std::unordered_set tokens, const token_metadata_ptr tmptr) + locator::host_id addr, locator::endpoint_dc_rack dr, std::unordered_set tokens, const token_metadata2_ptr tmptr) : _db(db) , _stream_manager(sm) , _abort_source(abort_source) @@ -91,7 +93,7 @@ public: #endif private: - const token_metadata& get_token_metadata() { + const token_metadata2& get_token_metadata() { return *_token_metadata_ptr; } }; diff --git a/dht/range_streamer.cc b/dht/range_streamer.cc index 8d2d4ba9e7..f27d3a48e7 100644 --- a/dht/range_streamer.cc +++ b/dht/range_streamer.cc @@ -88,6 +88,7 @@ range_streamer::get_all_ranges_with_sources_for(const sstring& keyspace_name, lo logger.debug("keyspace={}, desired_ranges.size={}, range_addresses.size={}", keyspace_name, desired_ranges.size(), range_addresses.size()); std::unordered_map> range_sources; + const auto address_ep = get_token_metadata().get_endpoint_for_host_id(_address); for (auto& desired_range : desired_ranges) { auto found = false; for (auto& x : range_addresses) { @@ -97,7 +98,7 @@ range_streamer::get_all_ranges_with_sources_for(const sstring& keyspace_name, lo const range& src_range = x.first; if (src_range.contains(desired_range, dht::operator<=>)) { inet_address_vector_replica_set preferred(x.second.begin(), x.second.end()); - get_token_metadata().get_topology().sort_by_proximity(_address, preferred); + get_token_metadata().get_topology().sort_by_proximity(address_ep, preferred); for (inet_address& p : preferred) { range_sources[desired_range].push_back(p); } @@ -122,14 +123,14 @@ range_streamer::get_all_ranges_with_strict_sources_for(const sstring& keyspace_n auto& strat = erm->get_replication_strategy(); //Active ranges - auto metadata_clone = get_token_metadata().clone_only_token_map().get0(); - auto range_addresses = strat.get_range_addresses(metadata_clone).get0(); + auto metadata_clone = locator::make_token_metadata2_ptr(get_token_metadata().clone_only_token_map().get0()); + auto range_addresses = strat.get_range_addresses(token_metadata(metadata_clone)).get0(); //Pending ranges - metadata_clone.update_topology(_address, _dr); - metadata_clone.update_normal_tokens(_tokens, _address).get(); - auto pending_range_addresses = strat.get_range_addresses(metadata_clone).get0(); - metadata_clone.clear_gently().get(); + metadata_clone->update_topology(_address, _dr); + metadata_clone->update_normal_tokens(_tokens, _address).get(); + auto pending_range_addresses = strat.get_range_addresses(token_metadata(metadata_clone)).get0(); + metadata_clone->clear_gently().get(); //Collects the source that will have its range moved to the new node std::unordered_map> range_sources; diff --git a/dht/range_streamer.hh b/dht/range_streamer.hh index 31a14d813d..75ceb1b01b 100644 --- a/dht/range_streamer.hh +++ b/dht/range_streamer.hh @@ -37,7 +37,9 @@ class range_streamer { public: using inet_address = gms::inet_address; using token_metadata = locator::token_metadata; + using token_metadata2 = locator::token_metadata2; using token_metadata_ptr = locator::token_metadata_ptr; + using token_metadata2_ptr = locator::token_metadata2_ptr; using stream_plan = streaming::stream_plan; using stream_state = streaming::stream_state; public: @@ -77,8 +79,8 @@ public: } }; - range_streamer(distributed& db, sharded& sm, const token_metadata_ptr tmptr, abort_source& abort_source, std::unordered_set tokens, - inet_address address, locator::endpoint_dc_rack dr, sstring description, streaming::stream_reason reason, + range_streamer(distributed& db, sharded& sm, const token_metadata2_ptr tmptr, abort_source& abort_source, std::unordered_set tokens, + locator::host_id address, locator::endpoint_dc_rack dr, sstring description, streaming::stream_reason reason, service::frozen_topology_guard topo_guard, std::vector tables = {}) : _db(db) @@ -96,8 +98,8 @@ public: _abort_source.check(); } - range_streamer(distributed& db, sharded& sm, const token_metadata_ptr tmptr, abort_source& abort_source, - inet_address address, locator::endpoint_dc_rack dr, sstring description, streaming::stream_reason reason, service::frozen_topology_guard topo_guard, std::vector tables = {}) + range_streamer(distributed& db, sharded& sm, const token_metadata2_ptr tmptr, abort_source& abort_source, + locator::host_id address, locator::endpoint_dc_rack dr, sstring description, streaming::stream_reason reason, service::frozen_topology_guard topo_guard, std::vector tables = {}) : range_streamer(db, sm, std::move(tmptr), abort_source, std::unordered_set(), address, std::move(dr), description, reason, std::move(topo_guard), std::move(tables)) { } @@ -145,7 +147,7 @@ private: #endif // Can be called only before stream_async(). - const token_metadata& get_token_metadata() { + const token_metadata2& get_token_metadata() { return *_token_metadata_ptr; } public: @@ -154,10 +156,10 @@ public: private: distributed& _db; sharded& _stream_manager; - token_metadata_ptr _token_metadata_ptr; + token_metadata2_ptr _token_metadata_ptr; abort_source& _abort_source; std::unordered_set _tokens; - inet_address _address; + locator::host_id _address; locator::endpoint_dc_rack _dr; sstring _description; streaming::stream_reason _reason; diff --git a/locator/abstract_replication_strategy.cc b/locator/abstract_replication_strategy.cc index f06a4924e4..162c4e0a71 100644 --- a/locator/abstract_replication_strategy.cc +++ b/locator/abstract_replication_strategy.cc @@ -342,9 +342,10 @@ vnode_effective_replication_map::get_range_addresses() const { future> abstract_replication_strategy::get_range_addresses(const token_metadata& tm) const { std::unordered_map ret; - for (auto& t : tm.sorted_tokens()) { - dht::token_range_vector ranges = tm.get_primary_ranges_for(t); - auto eps = get(co_await calculate_natural_endpoints(t, tm, false)); + auto tm_new = tm.get_new_strong(); + for (auto& t : tm_new->sorted_tokens()) { + dht::token_range_vector ranges = tm_new->get_primary_ranges_for(t); + auto eps = co_await calculate_natural_ips(t, tm_new); for (auto& r : ranges) { ret.emplace(r, eps.get_vector()); } @@ -353,20 +354,20 @@ abstract_replication_strategy::get_range_addresses(const token_metadata& tm) con } future -abstract_replication_strategy::get_pending_address_ranges(const token_metadata_ptr tmptr, std::unordered_set pending_tokens, inet_address pending_address, locator::endpoint_dc_rack dr) const { +abstract_replication_strategy::get_pending_address_ranges(const token_metadata2_ptr tmptr, std::unordered_set pending_tokens, locator::host_id pending_address, locator::endpoint_dc_rack dr) const { dht::token_range_vector ret; - token_metadata temp = co_await tmptr->clone_only_token_map(); - temp.update_topology(pending_address, std::move(dr)); - co_await temp.update_normal_tokens(pending_tokens, pending_address); - for (const auto& t : temp.sorted_tokens()) { - auto eps = get(co_await calculate_natural_endpoints(t, temp, false)); + auto temp = make_token_metadata2_ptr(co_await tmptr->clone_only_token_map()); + temp->update_topology(pending_address, std::move(dr)); + co_await temp->update_normal_tokens(pending_tokens, pending_address); + for (const auto& t : temp->sorted_tokens()) { + auto eps = get(co_await calculate_natural_endpoints(t, token_metadata(temp), true)); if (eps.contains(pending_address)) { - dht::token_range_vector r = temp.get_primary_ranges_for(t); + dht::token_range_vector r = temp->get_primary_ranges_for(t); rslogger.debug("get_pending_address_ranges: token={} primary_range={} endpoint={}", t, r, pending_address); ret.insert(ret.end(), r.begin(), r.end()); } } - co_await temp.clear_gently(); + co_await temp->clear_gently(); co_return ret; } diff --git a/locator/abstract_replication_strategy.hh b/locator/abstract_replication_strategy.hh index f296de7910..3da8fce7a3 100644 --- a/locator/abstract_replication_strategy.hh +++ b/locator/abstract_replication_strategy.hh @@ -174,7 +174,7 @@ public: // Caller must ensure that token_metadata will not change throughout the call. future> get_range_addresses(const token_metadata& tm) const; - future get_pending_address_ranges(const token_metadata_ptr tmptr, std::unordered_set pending_tokens, inet_address pending_address, locator::endpoint_dc_rack dr) const; + future get_pending_address_ranges(const token_metadata2_ptr tmptr, std::unordered_set pending_tokens, locator::host_id pending_address, locator::endpoint_dc_rack dr) const; }; using ring_mapping = boost::icl::interval_map>; diff --git a/repair/repair.cc b/repair/repair.cc index 92415b5f9e..b0fe70444a 100644 --- a/repair/repair.cc +++ b/repair/repair.cc @@ -1492,7 +1492,7 @@ std::optional repair::data_sync_repair_task_impl::expected_children_numb return smp::count; } -future<> repair_service::bootstrap_with_repair(locator::token_metadata_ptr tmptr, std::unordered_set bootstrap_tokens) { +future<> repair_service::bootstrap_with_repair(locator::token_metadata2_ptr tmptr, std::unordered_set bootstrap_tokens) { assert(this_shard_id() == 0); using inet_address = gms::inet_address; return seastar::async([this, tmptr = std::move(tmptr), tokens = std::move(bootstrap_tokens)] () mutable { @@ -1500,7 +1500,7 @@ future<> repair_service::bootstrap_with_repair(locator::token_metadata_ptr tmptr auto ks_erms = db.get_non_local_strategy_keyspaces_erms(); auto& topology = tmptr->get_topology(); auto myloc = topology.get_location(); - auto myip = topology.my_address(); + auto myid = tmptr->get_my_id(); auto reason = streaming::stream_reason::bootstrap; // Calculate number of ranges to sync data size_t nr_ranges_total = 0; @@ -1509,7 +1509,7 @@ future<> repair_service::bootstrap_with_repair(locator::token_metadata_ptr tmptr continue; } auto& strat = erm->get_replication_strategy(); - dht::token_range_vector desired_ranges = strat.get_pending_address_ranges(tmptr, tokens, myip, myloc).get0(); + dht::token_range_vector desired_ranges = strat.get_pending_address_ranges(tmptr, tokens, myid, myloc).get0(); seastar::thread::maybe_yield(); auto nr_tables = get_nr_tables(db, keyspace_name); nr_ranges_total += desired_ranges.size() * nr_tables; @@ -1525,20 +1525,20 @@ future<> repair_service::bootstrap_with_repair(locator::token_metadata_ptr tmptr continue; } auto& strat = erm->get_replication_strategy(); - dht::token_range_vector desired_ranges = strat.get_pending_address_ranges(tmptr, tokens, myip, myloc).get0(); + dht::token_range_vector desired_ranges = strat.get_pending_address_ranges(tmptr, tokens, myid, myloc).get0(); bool find_node_in_local_dc_only = strat.get_type() == locator::replication_strategy_type::network_topology; bool everywhere_topology = strat.get_type() == locator::replication_strategy_type::everywhere_topology; auto replication_factor = erm->get_replication_factor(); //Active ranges - auto metadata_clone = tmptr->clone_only_token_map().get0(); - auto range_addresses = strat.get_range_addresses(metadata_clone).get0(); + auto metadata_clone = locator::make_token_metadata2_ptr(tmptr->clone_only_token_map().get0()); + auto range_addresses = strat.get_range_addresses(locator::token_metadata(metadata_clone)).get0(); //Pending ranges - metadata_clone.update_topology(myip, myloc, locator::node::state::bootstrapping); - metadata_clone.update_normal_tokens(tokens, myip).get(); - auto pending_range_addresses = strat.get_range_addresses(metadata_clone).get0(); - metadata_clone.clear_gently().get(); + metadata_clone->update_topology(myid, myloc, locator::node::state::bootstrapping); + metadata_clone->update_normal_tokens(tokens, myid).get(); + auto pending_range_addresses = strat.get_range_addresses(locator::token_metadata(metadata_clone)).get0(); + metadata_clone->clear_gently().get(); //Collects the source that will have its range moved to the new node std::unordered_map range_sources; diff --git a/repair/row_level.hh b/repair/row_level.hh index e21f4df2d2..61251f1bb8 100644 --- a/repair/row_level.hh +++ b/repair/row_level.hh @@ -138,7 +138,7 @@ public: // The tokens are the tokens assigned to the bootstrap node. // all repair-based node operation entry points must be called on shard 0 - future<> bootstrap_with_repair(locator::token_metadata_ptr tmptr, std::unordered_set bootstrap_tokens); + future<> bootstrap_with_repair(locator::token_metadata2_ptr tmptr, std::unordered_set bootstrap_tokens); future<> decommission_with_repair(locator::token_metadata_ptr tmptr); future<> removenode_with_repair(locator::token_metadata_ptr tmptr, gms::inet_address leaving_node, shared_ptr ops); future<> rebuild_with_repair(locator::token_metadata_ptr tmptr, sstring source_dc); diff --git a/service/storage_service.cc b/service/storage_service.cc index ee1f63a677..eb79d949f5 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -3598,7 +3598,7 @@ future<> storage_service::bootstrap(std::unordered_set& bootstrap_tokens, slogger.info("sleeping {} ms for pending range setup", get_ring_delay().count()); _gossiper.wait_for_range_setup().get(); - dht::boot_strapper bs(_db, _stream_manager, _abort_source, get_broadcast_address(), _snitch.local()->get_location(), bootstrap_tokens, get_token_metadata_ptr()); + dht::boot_strapper bs(_db, _stream_manager, _abort_source, get_token_metadata_ptr()->get_my_id(), _snitch.local()->get_location(), bootstrap_tokens, get_token_metadata_ptr()->get_new_strong()); slogger.info("Starting to bootstrap..."); bs.bootstrap(streaming::stream_reason::bootstrap, _gossiper, null_topology_guard).get(); } else { @@ -5099,7 +5099,7 @@ void storage_service::run_bootstrap_ops(std::unordered_set& bootstrap_tok ctl.prepare(node_ops_cmd::bootstrap_prepare).get(); // Step 5: Sync data for bootstrap - _repair.local().bootstrap_with_repair(get_token_metadata_ptr(), bootstrap_tokens).get(); + _repair.local().bootstrap_with_repair(get_token_metadata_ptr()->get_new_strong(), bootstrap_tokens).get(); on_streaming_finished(); // Step 6: Finish @@ -5152,7 +5152,7 @@ void storage_service::run_replace_ops(std::unordered_set& bootstrap_token _repair.local().replace_with_repair(get_token_metadata_ptr(), bootstrap_tokens, ctl.ignore_nodes).get(); } else { slogger.info("replace[{}]: Using streaming based node ops to sync data", uuid); - dht::boot_strapper bs(_db, _stream_manager, _abort_source, get_broadcast_address(), _snitch.local()->get_location(), bootstrap_tokens, get_token_metadata_ptr()); + dht::boot_strapper bs(_db, _stream_manager, _abort_source, get_token_metadata_ptr()->get_new()->get_my_id(), _snitch.local()->get_location(), bootstrap_tokens, get_token_metadata_ptr()->get_new_strong()); bs.bootstrap(streaming::stream_reason::replace, _gossiper, null_topology_guard, replace_address).get(); } on_streaming_finished(); @@ -5892,8 +5892,8 @@ future<> storage_service::rebuild(sstring source_dc) { if (ss.is_repair_based_node_ops_enabled(streaming::stream_reason::rebuild)) { co_await ss._repair.local().rebuild_with_repair(tmptr, std::move(source_dc)); } else { - auto streamer = make_lw_shared(ss._db, ss._stream_manager, tmptr, ss._abort_source, - ss.get_broadcast_address(), ss._snitch.local()->get_location(), "Rebuild", streaming::stream_reason::rebuild, null_topology_guard); + auto streamer = make_lw_shared(ss._db, ss._stream_manager, tmptr->get_new_strong(), ss._abort_source, + tmptr->get_new()->get_my_id(), ss._snitch.local()->get_location(), "Rebuild", streaming::stream_reason::rebuild, null_topology_guard); streamer->add_source_filter(std::make_unique(ss._gossiper.get_unreachable_members())); if (source_dc != "") { streamer->add_source_filter(std::make_unique(source_dc)); @@ -6071,7 +6071,7 @@ future<> storage_service::removenode_with_stream(gms::inet_address leaving_node, as.request_abort(); } }); - auto streamer = make_lw_shared(_db, _stream_manager, tmptr, as, get_broadcast_address(), _snitch.local()->get_location(), "Removenode", streaming::stream_reason::removenode, topo_guard); + auto streamer = make_lw_shared(_db, _stream_manager, tmptr->get_new_strong(), as, tmptr->get_my_id(), _snitch.local()->get_location(), "Removenode", streaming::stream_reason::removenode, topo_guard); removenode_add_ranges(streamer, leaving_node).get(); try { streamer->stream_async().get(); @@ -6127,12 +6127,7 @@ future<> storage_service::leave_ring() { future<> storage_service::stream_ranges(std::unordered_map> ranges_to_stream_by_keyspace) { - auto streamer = dht::range_streamer(_db, _stream_manager, get_token_metadata_ptr(), _abort_source, - get_broadcast_address(), - _snitch.local()->get_location(), - "Unbootstrap", - streaming::stream_reason::decommission, - null_topology_guard); + auto streamer = dht::range_streamer(_db, _stream_manager, get_token_metadata_ptr()->get_new_strong(), _abort_source, get_token_metadata_ptr()->get_new()->get_my_id(), _snitch.local()->get_location(), "Unbootstrap", streaming::stream_reason::decommission, null_topology_guard); for (auto& entry : ranges_to_stream_by_keyspace) { const auto& keyspace = entry.first; auto& ranges_with_endpoints = entry.second; @@ -6507,10 +6502,10 @@ future storage_service::raft_topology_cmd_handler(raft if (!_topology_state_machine._topology.normal_nodes.empty()) { // stream only if there is a node in normal state co_await retrier(_bootstrap_result, coroutine::lambda([&] () -> future<> { if (is_repair_based_node_ops_enabled(streaming::stream_reason::bootstrap)) { - co_await _repair.local().bootstrap_with_repair(get_token_metadata_ptr(), rs.ring.value().tokens); + co_await _repair.local().bootstrap_with_repair(get_token_metadata_ptr()->get_new_strong(), rs.ring.value().tokens); } else { - dht::boot_strapper bs(_db, _stream_manager, _abort_source, get_broadcast_address(), - locator::endpoint_dc_rack{rs.datacenter, rs.rack}, rs.ring.value().tokens, get_token_metadata_ptr()); + dht::boot_strapper bs(_db, _stream_manager, _abort_source, get_token_metadata_ptr()->get_new()->get_my_id(), + locator::endpoint_dc_rack{rs.datacenter, rs.rack}, rs.ring.value().tokens, get_token_metadata_ptr()->get_new_strong()); co_await bs.bootstrap(streaming::stream_reason::bootstrap, _gossiper, _topology_state_machine._topology.session); } })); @@ -6533,8 +6528,8 @@ future storage_service::raft_topology_cmd_handler(raft } co_await _repair.local().replace_with_repair(get_token_metadata_ptr(), rs.ring.value().tokens, std::move(ignored_ips)); } else { - dht::boot_strapper bs(_db, _stream_manager, _abort_source, get_broadcast_address(), - locator::endpoint_dc_rack{rs.datacenter, rs.rack}, rs.ring.value().tokens, get_token_metadata_ptr()); + dht::boot_strapper bs(_db, _stream_manager, _abort_source, get_token_metadata_ptr()->get_new()->get_my_id(), + locator::endpoint_dc_rack{rs.datacenter, rs.rack}, rs.ring.value().tokens, get_token_metadata_ptr()->get_new_strong()); auto replaced_id = std::get(_topology_state_machine._topology.req_param[raft_server.id()]).replaced_id; auto existing_ip = _group0->address_map().find(replaced_id); assert(existing_ip); @@ -6604,9 +6599,8 @@ future storage_service::raft_topology_cmd_handler(raft if (is_repair_based_node_ops_enabled(streaming::stream_reason::rebuild)) { co_await _repair.local().rebuild_with_repair(tmptr, std::move(source_dc)); } else { - auto streamer = make_lw_shared(_db, _stream_manager, tmptr, _abort_source, - get_broadcast_address(), _snitch.local()->get_location(), "Rebuild", streaming::stream_reason::rebuild, - _topology_state_machine._topology.session); + auto streamer = make_lw_shared(_db, _stream_manager, tmptr->get_new_strong(), _abort_source, + tmptr->get_new()->get_my_id(), _snitch.local()->get_location(), "Rebuild", streaming::stream_reason::rebuild, _topology_state_machine._topology.session); streamer->add_source_filter(std::make_unique(_gossiper.get_unreachable_members())); if (source_dc != "") { streamer->add_source_filter(std::make_unique(source_dc)); @@ -6795,10 +6789,9 @@ future<> storage_service::stream_tablet(locator::global_tablet_id tablet) { auto& table = _db.local().find_column_family(tablet.table); std::vector tables = {table.schema()->cf_name()}; - auto streamer = make_lw_shared(_db, _stream_manager, std::move(tm), guard.get_abort_source(), - get_broadcast_address(), _snitch.local()->get_location(), + auto streamer = make_lw_shared(_db, _stream_manager, tm->get_new_strong(), guard.get_abort_source(), + tm->get_new()->get_my_id(), _snitch.local()->get_location(), "Tablet migration", streaming::stream_reason::tablet_migration, topo_guard, std::move(tables)); - tm = nullptr; streamer->add_source_filter(std::make_unique( _gossiper.get_unreachable_members())); From ef534ac8764cec02544ec95492de2dd61c165280 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Sat, 4 Nov 2023 20:53:34 +0400 Subject: [PATCH 32/51] rebuild_with_repair, replace_with_repair: use new token_metadata Just mechanical changes to the new token_metadata. All the boost and topology tests pass with this change. --- locator/abstract_replication_strategy.cc | 10 +++++----- locator/abstract_replication_strategy.hh | 4 ++-- repair/repair.cc | 20 ++++++++++---------- repair/row_level.hh | 6 +++--- service/storage_service.cc | 8 ++++---- 5 files changed, 24 insertions(+), 24 deletions(-) diff --git a/locator/abstract_replication_strategy.cc b/locator/abstract_replication_strategy.cc index 162c4e0a71..751c1519a5 100644 --- a/locator/abstract_replication_strategy.cc +++ b/locator/abstract_replication_strategy.cc @@ -256,18 +256,18 @@ vnode_effective_replication_map::get_ranges(inet_address ep) const { // Caller must ensure that token_metadata will not change throughout the call. future -abstract_replication_strategy::get_ranges(inet_address ep, token_metadata_ptr tmptr) const { +abstract_replication_strategy::get_ranges(locator::host_id ep, token_metadata_ptr tmptr) const { co_return co_await get_ranges(ep, *tmptr); } // Caller must ensure that token_metadata will not change throughout the call. future -abstract_replication_strategy::get_ranges(inet_address ep, const token_metadata& tm) const { +abstract_replication_strategy::get_ranges(locator::host_id ep, const token_metadata& tm) const { dht::token_range_vector ret; - if (!tm.is_normal_token_owner(ep)) { + if (!tm.get_new()->is_normal_token_owner(ep)) { co_return ret; } - const auto& sorted_tokens = tm.sorted_tokens(); + const auto& sorted_tokens = tm.get_new()->sorted_tokens(); if (sorted_tokens.empty()) { on_internal_error(rslogger, "Token metadata is empty"); } @@ -279,7 +279,7 @@ abstract_replication_strategy::get_ranges(inet_address ep, const token_metadata& // Using the common path would make the function quadratic in the number of endpoints. should_add = true; } else { - auto eps = get(co_await calculate_natural_endpoints(tok, tm, false)); + auto eps = get(co_await calculate_natural_endpoints(tok, tm, true)); should_add = eps.contains(ep); } if (should_add) { diff --git a/locator/abstract_replication_strategy.hh b/locator/abstract_replication_strategy.hh index 3da8fce7a3..87d81f1e97 100644 --- a/locator/abstract_replication_strategy.hh +++ b/locator/abstract_replication_strategy.hh @@ -168,8 +168,8 @@ public: // Use the token_metadata provided by the caller instead of _token_metadata // Note: must be called with initialized, non-empty token_metadata. - future get_ranges(inet_address ep, token_metadata_ptr tmptr) const; - future get_ranges(inet_address ep, const token_metadata& tm) const; + future get_ranges(locator::host_id ep, token_metadata_ptr tmptr) const; + future get_ranges(locator::host_id ep, const token_metadata& tm) const; // Caller must ensure that token_metadata will not change throughout the call. future> get_range_addresses(const token_metadata& tm) const; diff --git a/repair/repair.cc b/repair/repair.cc index b0fe70444a..96bfc988de 100644 --- a/repair/repair.cc +++ b/repair/repair.cc @@ -1883,12 +1883,13 @@ future<> repair_service::removenode_with_repair(locator::token_metadata_ptr tmpt }); } -future<> repair_service::do_rebuild_replace_with_repair(locator::token_metadata_ptr tmptr, sstring op, sstring source_dc, streaming::stream_reason reason, std::unordered_set ignore_nodes) { +future<> repair_service::do_rebuild_replace_with_repair(locator::token_metadata2_ptr tmptr, sstring op, sstring source_dc, streaming::stream_reason reason, std::unordered_set ignore_nodes) { assert(this_shard_id() == 0); return seastar::async([this, tmptr = std::move(tmptr), source_dc = std::move(source_dc), op = std::move(op), reason, ignore_nodes = std::move(ignore_nodes)] () mutable { auto& db = get_db().local(); auto ks_erms = db.get_non_local_strategy_keyspaces_erms(); auto myip = tmptr->get_topology().my_address(); + auto myid = tmptr->get_my_id(); size_t nr_ranges_total = 0; for (const auto& [keyspace_name, erm] : ks_erms) { if (!db.has_keyspace(keyspace_name)) { @@ -1896,7 +1897,7 @@ future<> repair_service::do_rebuild_replace_with_repair(locator::token_metadata_ } auto& strat = erm->get_replication_strategy(); // Okay to yield since tm is immutable - dht::token_range_vector ranges = strat.get_ranges(myip, tmptr).get0(); + dht::token_range_vector ranges = strat.get_ranges(myid, locator::token_metadata(tmptr)).get0(); auto nr_tables = get_nr_tables(db, keyspace_name); nr_ranges_total += ranges.size() * nr_tables; @@ -1920,7 +1921,7 @@ future<> repair_service::do_rebuild_replace_with_repair(locator::token_metadata_ continue; } auto& strat = erm->get_replication_strategy(); - dht::token_range_vector ranges = strat.get_ranges(myip, tmptr).get0(); + dht::token_range_vector ranges = strat.get_ranges(myid, locator::token_metadata(tmptr)).get0(); auto& topology = erm->get_token_metadata().get_topology(); std::unordered_map range_sources; auto nr_tables = get_nr_tables(db, keyspace_name); @@ -1929,7 +1930,7 @@ future<> repair_service::do_rebuild_replace_with_repair(locator::token_metadata_ auto& r = *it; seastar::thread::maybe_yield(); auto end_token = r.end() ? r.end()->value() : dht::maximum_token(); - auto neighbors = boost::copy_range>(get(strat.calculate_natural_endpoints(end_token, *tmptr, false).get0()) | + auto neighbors = boost::copy_range>(strat.calculate_natural_ips(end_token, tmptr).get0() | boost::adaptors::filtered([myip, &source_dc, &topology, &ignore_nodes] (const gms::inet_address& node) { if (node == myip) { return false; @@ -1967,7 +1968,7 @@ future<> repair_service::do_rebuild_replace_with_repair(locator::token_metadata_ }); } -future<> repair_service::rebuild_with_repair(locator::token_metadata_ptr tmptr, sstring source_dc) { +future<> repair_service::rebuild_with_repair(locator::token_metadata2_ptr tmptr, sstring source_dc) { assert(this_shard_id() == 0); auto op = sstring("rebuild_with_repair"); if (source_dc.empty()) { @@ -1983,19 +1984,18 @@ future<> repair_service::rebuild_with_repair(locator::token_metadata_ptr tmptr, }); } -future<> repair_service::replace_with_repair(locator::token_metadata_ptr tmptr, std::unordered_set replacing_tokens, std::unordered_set ignore_nodes) { +future<> repair_service::replace_with_repair(locator::token_metadata2_ptr tmptr, std::unordered_set replacing_tokens, std::unordered_set ignore_nodes) { assert(this_shard_id() == 0); auto cloned_tm = co_await tmptr->clone_async(); auto op = sstring("replace_with_repair"); auto& topology = tmptr->get_topology(); - auto myip = topology.my_address(); auto myloc = topology.get_location(); auto reason = streaming::stream_reason::replace; // update a cloned version of tmptr // no need to set the original version - auto cloned_tmptr = make_token_metadata_ptr(std::move(cloned_tm)); - cloned_tmptr->update_topology(myip, myloc, locator::node::state::replacing); - co_await cloned_tmptr->update_normal_tokens(replacing_tokens, myip); + auto cloned_tmptr = make_token_metadata2_ptr(std::move(cloned_tm)); + cloned_tmptr->update_topology(tmptr->get_my_id(), myloc, locator::node::state::replacing); + co_await cloned_tmptr->update_normal_tokens(replacing_tokens, tmptr->get_my_id()); co_return co_await do_rebuild_replace_with_repair(std::move(cloned_tmptr), std::move(op), myloc.dc, reason, std::move(ignore_nodes)); } diff --git a/repair/row_level.hh b/repair/row_level.hh index 61251f1bb8..9e27aae540 100644 --- a/repair/row_level.hh +++ b/repair/row_level.hh @@ -141,11 +141,11 @@ public: future<> bootstrap_with_repair(locator::token_metadata2_ptr tmptr, std::unordered_set bootstrap_tokens); future<> decommission_with_repair(locator::token_metadata_ptr tmptr); future<> removenode_with_repair(locator::token_metadata_ptr tmptr, gms::inet_address leaving_node, shared_ptr ops); - future<> rebuild_with_repair(locator::token_metadata_ptr tmptr, sstring source_dc); - future<> replace_with_repair(locator::token_metadata_ptr tmptr, std::unordered_set replacing_tokens, std::unordered_set ignore_nodes); + future<> rebuild_with_repair(locator::token_metadata2_ptr tmptr, sstring source_dc); + future<> replace_with_repair(locator::token_metadata2_ptr tmptr, std::unordered_set replacing_tokens, std::unordered_set ignore_nodes); private: future<> do_decommission_removenode_with_repair(locator::token_metadata_ptr tmptr, gms::inet_address leaving_node, shared_ptr ops); - future<> do_rebuild_replace_with_repair(locator::token_metadata_ptr tmptr, sstring op, sstring source_dc, streaming::stream_reason reason, std::unordered_set ignore_nodes); + future<> do_rebuild_replace_with_repair(locator::token_metadata2_ptr tmptr, sstring op, sstring source_dc, streaming::stream_reason reason, std::unordered_set ignore_nodes); // Must be called on shard 0 future<> sync_data_using_repair(sstring keyspace, diff --git a/service/storage_service.cc b/service/storage_service.cc index eb79d949f5..baa8badd44 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -5149,7 +5149,7 @@ void storage_service::run_replace_ops(std::unordered_set& bootstrap_token // Step 7: Sync data for replace if (is_repair_based_node_ops_enabled(streaming::stream_reason::replace)) { slogger.info("replace[{}]: Using repair based node ops to sync data", uuid); - _repair.local().replace_with_repair(get_token_metadata_ptr(), bootstrap_tokens, ctl.ignore_nodes).get(); + _repair.local().replace_with_repair(get_token_metadata_ptr()->get_new_strong(), bootstrap_tokens, ctl.ignore_nodes).get(); } else { slogger.info("replace[{}]: Using streaming based node ops to sync data", uuid); dht::boot_strapper bs(_db, _stream_manager, _abort_source, get_token_metadata_ptr()->get_new()->get_my_id(), _snitch.local()->get_location(), bootstrap_tokens, get_token_metadata_ptr()->get_new_strong()); @@ -5890,7 +5890,7 @@ future<> storage_service::rebuild(sstring source_dc) { slogger.info("rebuild from dc: {}", source_dc == "" ? "(any dc)" : source_dc); auto tmptr = ss.get_token_metadata_ptr(); if (ss.is_repair_based_node_ops_enabled(streaming::stream_reason::rebuild)) { - co_await ss._repair.local().rebuild_with_repair(tmptr, std::move(source_dc)); + co_await ss._repair.local().rebuild_with_repair(tmptr->get_new_strong(), std::move(source_dc)); } else { auto streamer = make_lw_shared(ss._db, ss._stream_manager, tmptr->get_new_strong(), ss._abort_source, tmptr->get_new()->get_my_id(), ss._snitch.local()->get_location(), "Rebuild", streaming::stream_reason::rebuild, null_topology_guard); @@ -6526,7 +6526,7 @@ future storage_service::raft_topology_cmd_handler(raft } ignored_ips.insert(*ip); } - co_await _repair.local().replace_with_repair(get_token_metadata_ptr(), rs.ring.value().tokens, std::move(ignored_ips)); + co_await _repair.local().replace_with_repair(get_token_metadata_ptr()->get_new_strong(), rs.ring.value().tokens, std::move(ignored_ips)); } else { dht::boot_strapper bs(_db, _stream_manager, _abort_source, get_token_metadata_ptr()->get_new()->get_my_id(), locator::endpoint_dc_rack{rs.datacenter, rs.rack}, rs.ring.value().tokens, get_token_metadata_ptr()->get_new_strong()); @@ -6597,7 +6597,7 @@ future storage_service::raft_topology_cmd_handler(raft co_await retrier(_rebuild_result, [&] () -> future<> { auto tmptr = get_token_metadata_ptr(); if (is_repair_based_node_ops_enabled(streaming::stream_reason::rebuild)) { - co_await _repair.local().rebuild_with_repair(tmptr, std::move(source_dc)); + co_await _repair.local().rebuild_with_repair(tmptr->get_new_strong(), std::move(source_dc)); } else { auto streamer = make_lw_shared(_db, _stream_manager, tmptr->get_new_strong(), _abort_source, tmptr->get_new()->get_my_id(), _snitch.local()->get_location(), "Rebuild", streaming::stream_reason::rebuild, _topology_state_machine._topology.session); From 7c7dbe377944d730c4e34234c4c36acefa5e5d4a Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Sat, 4 Nov 2023 19:32:52 +0400 Subject: [PATCH 33/51] decommission_with_repair, removenode_with_repair -> new token_metadata Just mechanical changes to the new token_metadata. All the boost and topology tests pass with this change. --- repair/repair.cc | 19 ++++++++++--------- repair/row_level.hh | 6 +++--- service/storage_service.cc | 6 +++--- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/repair/repair.cc b/repair/repair.cc index 96bfc988de..b60f4ce76b 100644 --- a/repair/repair.cc +++ b/repair/repair.cc @@ -1669,13 +1669,14 @@ future<> repair_service::bootstrap_with_repair(locator::token_metadata2_ptr tmpt }); } -future<> repair_service::do_decommission_removenode_with_repair(locator::token_metadata_ptr tmptr, gms::inet_address leaving_node, shared_ptr ops) { +future<> repair_service::do_decommission_removenode_with_repair(locator::token_metadata2_ptr tmptr, gms::inet_address leaving_node, shared_ptr ops) { assert(this_shard_id() == 0); using inet_address = gms::inet_address; return seastar::async([this, tmptr = std::move(tmptr), leaving_node = std::move(leaving_node), ops] () mutable { auto& db = get_db().local(); auto& topology = tmptr->get_topology(); auto myip = topology.my_address(); + const auto leaving_node_id = tmptr->get_host_id(leaving_node); auto ks_erms = db.get_non_local_strategy_keyspaces_erms(); auto local_dc = topology.get_datacenter(); bool is_removenode = myip != leaving_node; @@ -1719,15 +1720,15 @@ future<> repair_service::do_decommission_removenode_with_repair(locator::token_m // Find (for each range) all nodes that store replicas for these ranges as well for (auto& r : ranges) { auto end_token = r.end() ? r.end()->value() : dht::maximum_token(); - auto eps = get(strat.calculate_natural_endpoints(end_token, *tmptr, false).get0()); + auto eps = strat.calculate_natural_ips(end_token, tmptr).get0(); current_replica_endpoints.emplace(r, std::move(eps)); seastar::thread::maybe_yield(); } - auto temp = tmptr->clone_after_all_left().get0(); + auto temp = locator::make_token_metadata2_ptr(tmptr->clone_after_all_left().get0()); // leaving_node might or might not be 'leaving'. If it was not leaving (that is, removenode // command was used), it is still present in temp and must be removed. - if (temp.is_normal_token_owner(leaving_node)) { - temp.remove_endpoint(leaving_node); + if (temp->is_normal_token_owner(leaving_node_id)) { + temp->remove_endpoint(leaving_node_id); } std::unordered_map range_sources; dht::token_range_vector ranges_for_removenode; @@ -1738,7 +1739,7 @@ future<> repair_service::do_decommission_removenode_with_repair(locator::token_m ops->check_abort(); } auto end_token = r.end() ? r.end()->value() : dht::maximum_token(); - const auto new_eps = get(strat.calculate_natural_endpoints(end_token, temp, false).get0()); + const auto new_eps = strat.calculate_natural_ips(end_token, temp).get0(); const auto& current_eps = current_replica_endpoints[r]; std::unordered_set neighbors_set = new_eps.get_set(); bool skip_this_range = false; @@ -1842,7 +1843,7 @@ future<> repair_service::do_decommission_removenode_with_repair(locator::token_m } } } - temp.clear_gently().get(); + temp->clear_gently().get(); if (reason == streaming::stream_reason::decommission) { container().invoke_on_all([nr_ranges_skipped] (repair_service& rs) { rs.get_metrics().decommission_finished_ranges += nr_ranges_skipped; @@ -1864,13 +1865,13 @@ future<> repair_service::do_decommission_removenode_with_repair(locator::token_m }); } -future<> repair_service::decommission_with_repair(locator::token_metadata_ptr tmptr) { +future<> repair_service::decommission_with_repair(locator::token_metadata2_ptr tmptr) { assert(this_shard_id() == 0); auto my_address = tmptr->get_topology().my_address(); return do_decommission_removenode_with_repair(std::move(tmptr), my_address, {}); } -future<> repair_service::removenode_with_repair(locator::token_metadata_ptr tmptr, gms::inet_address leaving_node, shared_ptr ops) { +future<> repair_service::removenode_with_repair(locator::token_metadata2_ptr tmptr, gms::inet_address leaving_node, shared_ptr ops) { assert(this_shard_id() == 0); return do_decommission_removenode_with_repair(std::move(tmptr), std::move(leaving_node), std::move(ops)).then([this] { rlogger.debug("Triggering off-strategy compaction for all non-system tables on removenode completion"); diff --git a/repair/row_level.hh b/repair/row_level.hh index 9e27aae540..d82ac83f9b 100644 --- a/repair/row_level.hh +++ b/repair/row_level.hh @@ -139,12 +139,12 @@ public: // The tokens are the tokens assigned to the bootstrap node. // all repair-based node operation entry points must be called on shard 0 future<> bootstrap_with_repair(locator::token_metadata2_ptr tmptr, std::unordered_set bootstrap_tokens); - future<> decommission_with_repair(locator::token_metadata_ptr tmptr); - future<> removenode_with_repair(locator::token_metadata_ptr tmptr, gms::inet_address leaving_node, shared_ptr ops); + future<> decommission_with_repair(locator::token_metadata2_ptr tmptr); + future<> removenode_with_repair(locator::token_metadata2_ptr tmptr, gms::inet_address leaving_node, shared_ptr ops); future<> rebuild_with_repair(locator::token_metadata2_ptr tmptr, sstring source_dc); future<> replace_with_repair(locator::token_metadata2_ptr tmptr, std::unordered_set replacing_tokens, std::unordered_set ignore_nodes); private: - future<> do_decommission_removenode_with_repair(locator::token_metadata_ptr tmptr, gms::inet_address leaving_node, shared_ptr ops); + future<> do_decommission_removenode_with_repair(locator::token_metadata2_ptr tmptr, gms::inet_address leaving_node, shared_ptr ops); future<> do_rebuild_replace_with_repair(locator::token_metadata2_ptr tmptr, sstring op, sstring source_dc, streaming::stream_reason reason, std::unordered_set ignore_nodes); // Must be called on shard 0 diff --git a/service/storage_service.cc b/service/storage_service.cc index baa8badd44..1075182dce 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -5548,7 +5548,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad for (auto& node : req.leaving_nodes) { if (is_repair_based_node_ops_enabled(streaming::stream_reason::removenode)) { slogger.info("removenode[{}]: Started to sync data for removing node={} using repair, coordinator={}", req.ops_uuid, node, coordinator); - _repair.local().removenode_with_repair(get_token_metadata_ptr(), node, ops).get(); + _repair.local().removenode_with_repair(get_token_metadata_ptr()->get_new_strong(), node, ops).get(); } else { slogger.info("removenode[{}]: Started to sync data for removing node={} using stream, coordinator={}", req.ops_uuid, node, coordinator); removenode_with_stream(node, topo_guard, as).get(); @@ -5999,7 +5999,7 @@ future<> storage_service::unbootstrap() { slogger.info("Finished batchlog replay for decommission"); if (is_repair_based_node_ops_enabled(streaming::stream_reason::decommission)) { - co_await _repair.local().decommission_with_repair(get_token_metadata_ptr()); + co_await _repair.local().decommission_with_repair(get_token_metadata_ptr()->get_new_strong()); } else { std::unordered_map> ranges_to_stream; @@ -6583,7 +6583,7 @@ future storage_service::raft_topology_cmd_handler(raft ignored_ips.push_back(*ip); } auto ops = seastar::make_shared(node_ops_id::create_random_id(), as, std::move(ignored_ips)); - return _repair.local().removenode_with_repair(get_token_metadata_ptr(), *ip, ops); + return _repair.local().removenode_with_repair(get_token_metadata_ptr()->get_new_strong(), *ip, ops); } else { return removenode_with_stream(*ip, _topology_state_machine._topology.session, as); } From 933acb0f72a03a68baed700573f605d71b5f1519 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Mon, 6 Nov 2023 10:37:14 +0400 Subject: [PATCH 34/51] storage_service: get_changed_ranges_for_leaving: use new token_metadata --- service/storage_service.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 1075182dce..cb269cb2e0 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -5942,12 +5942,12 @@ storage_service::get_changed_ranges_for_leaving(locator::vnode_effective_replica co_await coroutine::maybe_yield(); } - auto temp = co_await get_token_metadata_ptr()->clone_after_all_left(); + auto temp = locator::make_token_metadata2_ptr(co_await get_token_metadata_ptr()->get_new()->clone_after_all_left()); // endpoint might or might not be 'leaving'. If it was not leaving (that is, removenode // command was used), it is still present in temp and must be removed. - if (temp.is_normal_token_owner(endpoint)) { - temp.remove_endpoint(endpoint); + if (const auto host_id = temp->get_host_id_if_known(endpoint); host_id && temp->is_normal_token_owner(*host_id)) { + temp->remove_endpoint(*host_id); } std::unordered_multimap changed_ranges; @@ -5960,7 +5960,7 @@ storage_service::get_changed_ranges_for_leaving(locator::vnode_effective_replica const auto& rs = erm->get_replication_strategy(); for (auto& r : ranges) { auto end_token = r.end() ? r.end()->value() : dht::maximum_token(); - auto new_replica_endpoints = get(co_await rs.calculate_natural_endpoints(end_token, temp, false)); + auto new_replica_endpoints = co_await rs.calculate_natural_ips(end_token, temp); auto rg = current_replica_endpoints.equal_range(r); for (auto it = rg.first; it != rg.second; it++) { @@ -5988,7 +5988,7 @@ storage_service::get_changed_ranges_for_leaving(locator::vnode_effective_replica // E.g. everywhere_replication_strategy co_await coroutine::maybe_yield(); } - co_await temp.clear_gently(); + co_await temp->clear_gently(); co_return changed_ranges; } From 80ccbc0d535b098b80bc9f6b0f2117e88b8ec738 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Tue, 21 Nov 2023 11:43:28 +0400 Subject: [PATCH 35/51] calculate_natural_endpoints: switch to token_metadata2 All usages of calculate_natural_endpoints are migrated, now we can change its interface to take token_metadata2 instead of token_metadata. --- locator/abstract_replication_strategy.cc | 8 +++--- locator/abstract_replication_strategy.hh | 21 +------------- locator/everywhere_replication_strategy.cc | 10 +++---- locator/everywhere_replication_strategy.hh | 2 +- locator/local_strategy.cc | 6 ++-- locator/local_strategy.hh | 2 +- locator/network_topology_strategy.cc | 29 +++++++++----------- locator/network_topology_strategy.hh | 4 +-- locator/simple_strategy.cc | 8 ++---- locator/simple_strategy.hh | 2 +- test/boost/network_topology_strategy_test.cc | 2 +- 11 files changed, 33 insertions(+), 61 deletions(-) diff --git a/locator/abstract_replication_strategy.cc b/locator/abstract_replication_strategy.cc index 751c1519a5..3fe2dfe257 100644 --- a/locator/abstract_replication_strategy.cc +++ b/locator/abstract_replication_strategy.cc @@ -69,8 +69,8 @@ void abstract_replication_strategy::validate_replication_strategy(const sstring& } future abstract_replication_strategy::calculate_natural_ips(const token& search_token, const token_metadata2_ptr& tm) const { - const auto host_ids = co_await calculate_natural_endpoints(search_token, token_metadata(tm), true); - co_return resolve_endpoints(get(host_ids), *tm); + const auto host_ids = co_await calculate_natural_endpoints(search_token, *tm); + co_return resolve_endpoints(host_ids, *tm); } using strategy_class_registry = class_registry< @@ -279,7 +279,7 @@ abstract_replication_strategy::get_ranges(locator::host_id ep, const token_metad // Using the common path would make the function quadratic in the number of endpoints. should_add = true; } else { - auto eps = get(co_await calculate_natural_endpoints(tok, tm, true)); + auto eps = co_await calculate_natural_endpoints(tok, *tm.get_new()); should_add = eps.contains(ep); } if (should_add) { @@ -360,7 +360,7 @@ abstract_replication_strategy::get_pending_address_ranges(const token_metadata2_ temp->update_topology(pending_address, std::move(dr)); co_await temp->update_normal_tokens(pending_tokens, pending_address); for (const auto& t : temp->sorted_tokens()) { - auto eps = get(co_await calculate_natural_endpoints(t, token_metadata(temp), true)); + auto eps = co_await calculate_natural_endpoints(t, *temp); if (eps.contains(pending_address)) { dht::token_range_vector r = temp->get_primary_ranges_for(t); rslogger.debug("get_pending_address_ranges: token={} primary_range={} endpoint={}", t, r, pending_address); diff --git a/locator/abstract_replication_strategy.hh b/locator/abstract_replication_strategy.hh index 87d81f1e97..2e4a0eddd3 100644 --- a/locator/abstract_replication_strategy.hh +++ b/locator/abstract_replication_strategy.hh @@ -54,11 +54,6 @@ using replication_map = std::unordered_map; using host_id_set = utils::basic_sequenced_set; -using natural_ep_type = std::variant; -template -using set_type = std::conditional_t, endpoint_set, host_id_set>; -template -using vector_type = std::conditional_t, inet_address_vector_replica_set, host_id_vector_replica_set>; class vnode_effective_replication_map; class effective_replication_map_factory; @@ -92,20 +87,6 @@ protected: rslogger.debug(fmt, std::forward(args)...); } - template - static NodeId get_self_id(const generic_token_metadata& tm) { - if constexpr(std::is_same_v) { - return tm.get_topology().my_address(); - } else { - return NodeId{}; - } - } - - template - static future select_tm(Func&& func, const token_metadata& tm, bool use_host_id) { - return use_host_id ? func(*tm.template get_new()) : func(tm); - } - public: using ptr_type = seastar::shared_ptr; @@ -122,7 +103,7 @@ public: // is small, that implementation may not yield since by itself it won't cause a reactor stall (assuming practical // cluster sizes and number of tokens per node). The caller is responsible for yielding if they call this function // in a loop. - virtual future calculate_natural_endpoints(const token& search_token, const token_metadata& tm, bool use_host_id) const = 0; + virtual future calculate_natural_endpoints(const token& search_token, const token_metadata2& tm) const = 0; future calculate_natural_ips(const token& search_token, const token_metadata2_ptr& tm) const; virtual ~abstract_replication_strategy() {} diff --git a/locator/everywhere_replication_strategy.cc b/locator/everywhere_replication_strategy.cc index 87851df8d5..d946bb9d48 100644 --- a/locator/everywhere_replication_strategy.cc +++ b/locator/everywhere_replication_strategy.cc @@ -20,15 +20,13 @@ everywhere_replication_strategy::everywhere_replication_strategy(const replicati _natural_endpoints_depend_on_token = false; } -future everywhere_replication_strategy::calculate_natural_endpoints(const token& search_token, const token_metadata& tm, bool use_host_id) const { - return select_tm([this](const generic_token_metadata& tm) -> future { +future everywhere_replication_strategy::calculate_natural_endpoints(const token& search_token, const token_metadata2& tm) const { if (tm.sorted_tokens().empty()) { - set_type result{vector_type({this->get_self_id(tm)})}; - return make_ready_future(std::move(result)); + host_id_set result{host_id_vector_replica_set({host_id{}})}; + return make_ready_future(std::move(result)); } const auto& all_endpoints = tm.get_all_endpoints(); - return make_ready_future(set_type(all_endpoints.begin(), all_endpoints.end())); - }, tm, use_host_id); + return make_ready_future(host_id_set(all_endpoints.begin(), all_endpoints.end())); } size_t everywhere_replication_strategy::get_replication_factor(const token_metadata& tm) const { diff --git a/locator/everywhere_replication_strategy.hh b/locator/everywhere_replication_strategy.hh index ada8ea81ee..6e072d3fc6 100644 --- a/locator/everywhere_replication_strategy.hh +++ b/locator/everywhere_replication_strategy.hh @@ -18,7 +18,7 @@ class everywhere_replication_strategy : public abstract_replication_strategy { public: everywhere_replication_strategy(const replication_strategy_config_options& config_options); - virtual future calculate_natural_endpoints(const token& search_token, const token_metadata& tm, bool host_id) const override; + virtual future calculate_natural_endpoints(const token& search_token, const token_metadata2& tm) const override; virtual void validate_options(const gms::feature_service&) const override { /* noop */ } diff --git a/locator/local_strategy.cc b/locator/local_strategy.cc index 34cc010aff..71bba87e73 100644 --- a/locator/local_strategy.cc +++ b/locator/local_strategy.cc @@ -18,10 +18,8 @@ local_strategy::local_strategy(const replication_strategy_config_options& config _natural_endpoints_depend_on_token = false; } -future local_strategy::calculate_natural_endpoints(const token& t, const token_metadata& tm, bool use_host_id) const { - return select_tm([this](const generic_token_metadata& tm) -> future { - return make_ready_future(set_type({this->get_self_id(tm)})); - }, tm, use_host_id); +future local_strategy::calculate_natural_endpoints(const token& t, const token_metadata2& tm) const { + return make_ready_future(host_id_set{host_id{}}); } void local_strategy::validate_options(const gms::feature_service&) const { diff --git a/locator/local_strategy.hh b/locator/local_strategy.hh index e87085e45d..a93515b94d 100644 --- a/locator/local_strategy.hh +++ b/locator/local_strategy.hh @@ -27,7 +27,7 @@ public: virtual ~local_strategy() {}; virtual size_t get_replication_factor(const token_metadata&) const override; - virtual future calculate_natural_endpoints(const token& search_token, const token_metadata& tm, bool host_id) const override; + virtual future calculate_natural_endpoints(const token& search_token, const token_metadata2& tm) const override; virtual void validate_options(const gms::feature_service&) const override; diff --git a/locator/network_topology_strategy.cc b/locator/network_topology_strategy.cc index 4a18963820..67d06b3100 100644 --- a/locator/network_topology_strategy.cc +++ b/locator/network_topology_strategy.cc @@ -76,14 +76,13 @@ network_topology_strategy::network_topology_strategy( using endpoint_dc_rack_set = std::unordered_set; -template class natural_endpoints_tracker { /** * Endpoint adder applying the replication rules for a given DC. */ struct data_center_endpoints { /** List accepted endpoints get pushed into. */ - set_type& _endpoints; + host_id_set& _endpoints; /** * Racks encountered so far. Replicas are put into separate racks while possible. @@ -96,7 +95,7 @@ class natural_endpoints_tracker { size_t _rf_left; ssize_t _acceptable_rack_repeats; - data_center_endpoints(size_t rf, size_t rack_count, size_t node_count, set_type& endpoints, endpoint_dc_rack_set& racks) + data_center_endpoints(size_t rf, size_t rack_count, size_t node_count, host_id_set& endpoints, endpoint_dc_rack_set& racks) : _endpoints(endpoints) , _racks(racks) // If there aren't enough nodes in this DC to fill the RF, the number of nodes is the effective RF. @@ -110,7 +109,7 @@ class natural_endpoints_tracker { * Attempts to add an endpoint to the replicas for this datacenter, adding to the endpoints set if successful. * Returns true if the endpoint was added, and this datacenter does not require further replicas. */ - bool add_endpoint_and_check_if_done(const NodeId& ep, const endpoint_dc_rack& location) { + bool add_endpoint_and_check_if_done(const host_id& ep, const endpoint_dc_rack& location) { if (done()) { return false; } @@ -161,7 +160,7 @@ class natural_endpoints_tracker { } }; - const generic_token_metadata& _tm; + const token_metadata2& _tm; const topology& _tp; std::unordered_map _dc_rep_factor; @@ -169,7 +168,7 @@ class natural_endpoints_tracker { // We want to preserve insertion order so that the first added endpoint // becomes primary. // - set_type _replicas; + host_id_set _replicas; // tracks the racks we have already placed replicas in endpoint_dc_rack_set _seen_racks; @@ -190,7 +189,7 @@ class natural_endpoints_tracker { size_t _dcs_to_fill; public: - natural_endpoints_tracker(const generic_token_metadata& tm, const std::unordered_map& dc_rep_factor) + natural_endpoints_tracker(const token_metadata2& tm, const std::unordered_map& dc_rep_factor) : _tm(tm) , _tp(_tm.get_topology()) , _dc_rep_factor(dc_rep_factor) @@ -220,7 +219,7 @@ public: } } - bool add_endpoint_and_check_if_done(NodeId ep) { + bool add_endpoint_and_check_if_done(host_id ep) { auto& loc = _tp.get_location(ep); auto i = _dcs.find(loc.dc); if (i != _dcs.end() && i->second.add_endpoint_and_check_if_done(ep, loc)) { @@ -233,29 +232,27 @@ public: return _dcs_to_fill == 0; } - set_type& replicas() noexcept { + host_id_set& replicas() noexcept { return _replicas; } }; -future +future network_topology_strategy::calculate_natural_endpoints( - const token& search_token, const token_metadata& tm, bool use_host_id) const { + const token& search_token, const token_metadata2& tm) const { - return select_tm([&](const generic_token_metadata& tm) -> future { - natural_endpoints_tracker tracker(tm, _dc_rep_factor); + natural_endpoints_tracker tracker(tm, _dc_rep_factor); for (auto& next : tm.ring_range(search_token)) { co_await coroutine::maybe_yield(); - NodeId ep = *tm.get_endpoint(next); + host_id ep = *tm.get_endpoint(next); if (tracker.add_endpoint_and_check_if_done(ep)) { break; } } co_return std::move(tracker.replicas()); - }, tm, use_host_id); } void network_topology_strategy::validate_options(const gms::feature_service& fs) const { @@ -309,7 +306,7 @@ future network_topology_strategy::allocate_tablets_for_new_table(sch auto token_range = tm->get_new()->ring_range(dht::token::get_random_token()); for (tablet_id tb : tablets.tablet_ids()) { - natural_endpoints_tracker tracker(*tm->get_new(), _dc_rep_factor); + natural_endpoints_tracker tracker(*tm->get_new(), _dc_rep_factor); while (true) { co_await coroutine::maybe_yield(); diff --git a/locator/network_topology_strategy.hh b/locator/network_topology_strategy.hh index 57b8ce1ad9..afc24bf610 100644 --- a/locator/network_topology_strategy.hh +++ b/locator/network_topology_strategy.hh @@ -50,8 +50,8 @@ protected: * calculate endpoints in one pass through the tokens by tracking our * progress in each DC, rack etc. */ - virtual future calculate_natural_endpoints( - const token& search_token, const token_metadata& tm, bool host_id) const override; + virtual future calculate_natural_endpoints( + const token& search_token, const token_metadata2& tm) const override; virtual void validate_options(const gms::feature_service&) const override; diff --git a/locator/simple_strategy.cc b/locator/simple_strategy.cc index dfbc0d7dad..6ba63db0cb 100644 --- a/locator/simple_strategy.cc +++ b/locator/simple_strategy.cc @@ -33,16 +33,15 @@ simple_strategy::simple_strategy(const replication_strategy_config_options& conf } } -future simple_strategy::calculate_natural_endpoints(const token& t, const token_metadata& tm, bool use_host_id) const { - return select_tm([&](const generic_token_metadata& tm) -> future { +future simple_strategy::calculate_natural_endpoints(const token& t, const token_metadata2& tm) const { const std::vector& tokens = tm.sorted_tokens(); if (tokens.empty()) { - co_return set_type{}; + co_return host_id_set{}; } size_t replicas = _replication_factor; - set_type endpoints; + host_id_set endpoints; endpoints.reserve(replicas); for (auto& token : tm.ring_range(t)) { @@ -62,7 +61,6 @@ future simple_strategy::calculate_natural_endpoints(const token } co_return endpoints; - }, tm, use_host_id); } size_t simple_strategy::get_replication_factor(const token_metadata&) const { diff --git a/locator/simple_strategy.hh b/locator/simple_strategy.hh index 427aa2a24b..9385510147 100644 --- a/locator/simple_strategy.hh +++ b/locator/simple_strategy.hh @@ -26,7 +26,7 @@ public: return true; } - virtual future calculate_natural_endpoints(const token& search_token, const token_metadata& tm, bool host_id) const override; + virtual future calculate_natural_endpoints(const token& search_token, const token_metadata2& tm) const override; private: size_t _replication_factor = 1; }; diff --git a/test/boost/network_topology_strategy_test.cc b/test/boost/network_topology_strategy_test.cc index eeb6ed8896..97e70f8fb1 100644 --- a/test/boost/network_topology_strategy_test.cc +++ b/test/boost/network_topology_strategy_test.cc @@ -654,7 +654,7 @@ static void test_equivalence(const shared_token_metadata& stm, const locator::to for (size_t i = 0; i < 1000; ++i) { auto token = dht::token::get_random_token(); auto expected = calculate_natural_endpoints(token, tm, topo, datacenters); - auto actual = get(nts.calculate_natural_endpoints(token, token_metadata(stm.get()->get_new_strong()), true).get0()); + auto actual = nts.calculate_natural_endpoints(token, *stm.get()->get_new()).get0(); // Because the old algorithm does not put the nodes in the correct order in the case where more replicas // are required than there are racks in a dc, we accept different order as long as the primary From b2fb650098aa3add6b239c2bec246eb8bb0f4a2c Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Wed, 6 Dec 2023 12:13:29 +0400 Subject: [PATCH 36/51] calculate_natural_endpoints: fix indentation --- locator/everywhere_replication_strategy.cc | 12 +++--- locator/local_strategy.cc | 2 +- locator/network_topology_strategy.cc | 16 ++++---- locator/simple_strategy.cc | 44 +++++++++++----------- 4 files changed, 37 insertions(+), 37 deletions(-) diff --git a/locator/everywhere_replication_strategy.cc b/locator/everywhere_replication_strategy.cc index d946bb9d48..48a757b6da 100644 --- a/locator/everywhere_replication_strategy.cc +++ b/locator/everywhere_replication_strategy.cc @@ -21,12 +21,12 @@ everywhere_replication_strategy::everywhere_replication_strategy(const replicati } future everywhere_replication_strategy::calculate_natural_endpoints(const token& search_token, const token_metadata2& tm) const { - if (tm.sorted_tokens().empty()) { - host_id_set result{host_id_vector_replica_set({host_id{}})}; - return make_ready_future(std::move(result)); - } - const auto& all_endpoints = tm.get_all_endpoints(); - return make_ready_future(host_id_set(all_endpoints.begin(), all_endpoints.end())); + if (tm.sorted_tokens().empty()) { + host_id_set result{host_id_vector_replica_set({host_id{}})}; + return make_ready_future(std::move(result)); + } + const auto& all_endpoints = tm.get_all_endpoints(); + return make_ready_future(host_id_set(all_endpoints.begin(), all_endpoints.end())); } size_t everywhere_replication_strategy::get_replication_factor(const token_metadata& tm) const { diff --git a/locator/local_strategy.cc b/locator/local_strategy.cc index 71bba87e73..d5dcbae3a1 100644 --- a/locator/local_strategy.cc +++ b/locator/local_strategy.cc @@ -19,7 +19,7 @@ local_strategy::local_strategy(const replication_strategy_config_options& config } future local_strategy::calculate_natural_endpoints(const token& t, const token_metadata2& tm) const { - return make_ready_future(host_id_set{host_id{}}); + return make_ready_future(host_id_set{host_id{}}); } void local_strategy::validate_options(const gms::feature_service&) const { diff --git a/locator/network_topology_strategy.cc b/locator/network_topology_strategy.cc index 67d06b3100..ca25ff2acd 100644 --- a/locator/network_topology_strategy.cc +++ b/locator/network_topology_strategy.cc @@ -241,18 +241,18 @@ future network_topology_strategy::calculate_natural_endpoints( const token& search_token, const token_metadata2& tm) const { - natural_endpoints_tracker tracker(tm, _dc_rep_factor); + natural_endpoints_tracker tracker(tm, _dc_rep_factor); - for (auto& next : tm.ring_range(search_token)) { - co_await coroutine::maybe_yield(); + for (auto& next : tm.ring_range(search_token)) { + co_await coroutine::maybe_yield(); - host_id ep = *tm.get_endpoint(next); - if (tracker.add_endpoint_and_check_if_done(ep)) { - break; - } + host_id ep = *tm.get_endpoint(next); + if (tracker.add_endpoint_and_check_if_done(ep)) { + break; } + } - co_return std::move(tracker.replicas()); + co_return std::move(tracker.replicas()); } void network_topology_strategy::validate_options(const gms::feature_service& fs) const { diff --git a/locator/simple_strategy.cc b/locator/simple_strategy.cc index 6ba63db0cb..1cde3d6014 100644 --- a/locator/simple_strategy.cc +++ b/locator/simple_strategy.cc @@ -34,33 +34,33 @@ simple_strategy::simple_strategy(const replication_strategy_config_options& conf } future simple_strategy::calculate_natural_endpoints(const token& t, const token_metadata2& tm) const { - const std::vector& tokens = tm.sorted_tokens(); + const std::vector& tokens = tm.sorted_tokens(); - if (tokens.empty()) { - co_return host_id_set{}; + if (tokens.empty()) { + co_return host_id_set{}; + } + + size_t replicas = _replication_factor; + host_id_set endpoints; + endpoints.reserve(replicas); + + for (auto& token : tm.ring_range(t)) { + // If the number of nodes in the cluster is smaller than the desired + // replication factor we should return the loop when endpoints already + // contains all the nodes in the cluster because no more nodes could be + // added to endpoints lists. + if (endpoints.size() == replicas || endpoints.size() == tm.count_normal_token_owners()) { + break; } - size_t replicas = _replication_factor; - host_id_set endpoints; - endpoints.reserve(replicas); + auto ep = tm.get_endpoint(token); + assert(ep); - for (auto& token : tm.ring_range(t)) { - // If the number of nodes in the cluster is smaller than the desired - // replication factor we should return the loop when endpoints already - // contains all the nodes in the cluster because no more nodes could be - // added to endpoints lists. - if (endpoints.size() == replicas || endpoints.size() == tm.count_normal_token_owners()) { - break; - } + endpoints.push_back(*ep); + co_await coroutine::maybe_yield(); + } - auto ep = tm.get_endpoint(token); - assert(ep); - - endpoints.push_back(*ep); - co_await coroutine::maybe_yield(); - } - - co_return endpoints; + co_return endpoints; } size_t simple_strategy::get_replication_factor(const token_metadata&) const { From 7eb78636351e46ff7cc69bbb59bb7b14462fbc95 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Mon, 6 Nov 2023 12:33:55 +0400 Subject: [PATCH 37/51] cdc: switch to token_metadata2 Change the token_metadata type to token_metadata2 in the signatures of CDC-related methods in storage_service and cdc/generation. Use get_new_strong to get a pointer to the new host_id-based token_metadata from the inet_address-based one, living in the shared_token_metadata. The starting point of the patch is in storage_service::handle_global_request. We change the tmptr type to token_metadata2 and propagate the change down the call chains. This includes token-related methods of the boot_strapper class. --- cdc/generation.cc | 19 ++++++++++--------- cdc/generation.hh | 4 ++-- dht/boot_strapper.cc | 6 +++--- dht/boot_strapper.hh | 6 +++--- service/storage_service.cc | 27 ++++++++++----------------- 5 files changed, 28 insertions(+), 34 deletions(-) diff --git a/cdc/generation.cc b/cdc/generation.cc index 1705d2e8f1..76a6552724 100644 --- a/cdc/generation.cc +++ b/cdc/generation.cc @@ -192,7 +192,7 @@ bool should_propose_first_generation(const gms::inet_address& me, const gms::gos }) == stop_iteration::no; } -bool is_cdc_generation_optimal(const cdc::topology_description& gen, const locator::token_metadata& tm) { +bool is_cdc_generation_optimal(const cdc::topology_description& gen, const locator::token_metadata2& tm) { if (tm.sorted_tokens().size() != gen.entries().size()) { // We probably have garbage streams from old generations cdc_log.info("Generation size does not match the token ring"); @@ -324,7 +324,7 @@ topology_description limit_number_of_streams_if_needed(topology_description&& de } // Compute a set of tokens that split the token ring into vnodes. -static auto get_tokens(const std::unordered_set& bootstrap_tokens, const locator::token_metadata_ptr tmptr) { +static auto get_tokens(const std::unordered_set& bootstrap_tokens, const locator::token_metadata2_ptr tmptr) { auto tokens = tmptr->sorted_tokens(); auto it = tokens.insert(tokens.end(), bootstrap_tokens.begin(), bootstrap_tokens.end()); std::sort(it, tokens.end()); @@ -352,7 +352,7 @@ static token_range_description create_token_range_description( cdc::topology_description make_new_generation_description( const std::unordered_set& bootstrap_tokens, const noncopyable_function(dht::token)>& get_sharding_info, - const locator::token_metadata_ptr tmptr) { + const locator::token_metadata2_ptr tmptr) { const auto tokens = get_tokens(bootstrap_tokens, tmptr); utils::chunked_vector vnode_descriptions; @@ -378,7 +378,7 @@ db_clock::time_point new_generation_timestamp(bool add_delay, std::chrono::milli } future generation_service::legacy_make_new_generation(const std::unordered_set& bootstrap_tokens, bool add_delay) { - const locator::token_metadata_ptr tmptr = _token_metadata.get(); + const locator::token_metadata2_ptr tmptr = _token_metadata.get()->get_new_strong(); // Fetch sharding parameters for a node that owns vnode ending with this token // using gossiped application states. @@ -391,8 +391,9 @@ future generation_service::legacy_make_new_generation(const throw std::runtime_error( format("Can't find endpoint for token {}", end)); } - auto sc = get_shard_count(*endpoint, _gossiper); - return {sc > 0 ? sc : 1, get_sharding_ignore_msb(*endpoint, _gossiper)}; + const auto ep = tmptr->get_endpoint_for_host_id(*endpoint); + auto sc = get_shard_count(ep, _gossiper); + return {sc > 0 ? sc : 1, get_sharding_ignore_msb(ep, _gossiper)}; } }; @@ -845,7 +846,7 @@ future<> generation_service::check_and_repair_cdc_streams() { } }); - auto tmptr = _token_metadata.get(); + auto tmptr = _token_metadata.get()->get_new_strong(); auto sys_dist_ks = get_sys_dist_ks(); bool should_regenerate = false; @@ -987,7 +988,7 @@ future<> generation_service::legacy_handle_cdc_generation(std::optionalcount_normal_token_owners(); }, + [tmptr = _token_metadata.get()->get_new_strong()] { return tmptr->count_normal_token_owners(); }, _abort_src); } } @@ -1004,7 +1005,7 @@ void generation_service::legacy_async_handle_cdc_generation(cdc::generation_id g if (using_this_gen) { cdc_log.info("Starting to use generation {}", gen_id); co_await update_streams_description(gen_id, svc->get_sys_dist_ks(), - [tmptr = svc->_token_metadata.get()] { return tmptr->count_normal_token_owners(); }, + [tmptr = svc->_token_metadata.get()->get_new_strong()] { return tmptr->count_normal_token_owners(); }, svc->_abort_src); } co_return; diff --git a/cdc/generation.hh b/cdc/generation.hh index 71d5a09637..d61a800611 100644 --- a/cdc/generation.hh +++ b/cdc/generation.hh @@ -137,7 +137,7 @@ bool should_propose_first_generation(const gms::inet_address& me, const gms::gos * Checks if the CDC generation is optimal, which is true if its `topology_description` is consistent * with `token_metadata`. */ -bool is_cdc_generation_optimal(const cdc::topology_description& gen, const locator::token_metadata& tm); +bool is_cdc_generation_optimal(const cdc::topology_description& gen, const locator::token_metadata2& tm); /* * Generate a set of CDC stream identifiers such that for each shard @@ -157,7 +157,7 @@ bool is_cdc_generation_optimal(const cdc::topology_description& gen, const locat cdc::topology_description make_new_generation_description( const std::unordered_set& bootstrap_tokens, const noncopyable_function (dht::token)>& get_sharding_info, - const locator::token_metadata_ptr); + const locator::token_metadata2_ptr); db_clock::time_point new_generation_timestamp(bool add_delay, std::chrono::milliseconds ring_delay); diff --git a/dht/boot_strapper.cc b/dht/boot_strapper.cc index 0de4e84e0d..b9fc52a361 100644 --- a/dht/boot_strapper.cc +++ b/dht/boot_strapper.cc @@ -63,7 +63,7 @@ future<> boot_strapper::bootstrap(streaming::stream_reason reason, gms::gossiper } } -std::unordered_set boot_strapper::get_random_bootstrap_tokens(const token_metadata_ptr tmptr, size_t num_tokens, dht::check_token_endpoint check) { +std::unordered_set boot_strapper::get_random_bootstrap_tokens(const token_metadata2_ptr tmptr, size_t num_tokens, dht::check_token_endpoint check) { if (num_tokens < 1) { throw std::runtime_error("num_tokens must be >= 1"); } @@ -77,7 +77,7 @@ std::unordered_set boot_strapper::get_random_bootstrap_tokens(const token return tokens; } -std::unordered_set boot_strapper::get_bootstrap_tokens(const token_metadata_ptr tmptr, const db::config& cfg, dht::check_token_endpoint check) { +std::unordered_set boot_strapper::get_bootstrap_tokens(const token_metadata2_ptr tmptr, const db::config& cfg, dht::check_token_endpoint check) { std::unordered_set initial_tokens; sstring tokens_string = cfg.initial_token(); try { @@ -104,7 +104,7 @@ std::unordered_set boot_strapper::get_bootstrap_tokens(const token_metada return get_random_bootstrap_tokens(tmptr, cfg.num_tokens(), check); } -std::unordered_set boot_strapper::get_random_tokens(const token_metadata_ptr tmptr, size_t num_tokens) { +std::unordered_set boot_strapper::get_random_tokens(const token_metadata2_ptr tmptr, size_t num_tokens) { std::unordered_set tokens; while (tokens.size() < num_tokens) { auto token = dht::token::get_random_token(); diff --git a/dht/boot_strapper.hh b/dht/boot_strapper.hh index 599ea0ee84..5f88257f05 100644 --- a/dht/boot_strapper.hh +++ b/dht/boot_strapper.hh @@ -62,14 +62,14 @@ public: * otherwise, if num_tokens == 1, pick a token to assume half the load of the most-loaded node. * else choose num_tokens tokens at random */ - static std::unordered_set get_bootstrap_tokens(const token_metadata_ptr tmptr, const db::config& cfg, check_token_endpoint check); + static std::unordered_set get_bootstrap_tokens(const token_metadata2_ptr tmptr, const db::config& cfg, check_token_endpoint check); /** * Same as above but does not consult initialtoken config */ - static std::unordered_set get_random_bootstrap_tokens(const token_metadata_ptr tmptr, size_t num_tokens, check_token_endpoint check); + static std::unordered_set get_random_bootstrap_tokens(const token_metadata2_ptr tmptr, size_t num_tokens, check_token_endpoint check); - static std::unordered_set get_random_tokens(const token_metadata_ptr tmptr, size_t num_tokens); + static std::unordered_set get_random_tokens(const token_metadata2_ptr tmptr, size_t num_tokens); #if 0 public static class StringSerializer implements IVersionedSerializer { diff --git a/service/storage_service.cc b/service/storage_service.cc index cb269cb2e0..0eefa7f8bf 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -1208,7 +1208,7 @@ class topology_coordinator { // If there's a bootstrapping node, its tokens should be included in the new generation. // Pass them and a reference to the bootstrapping node's replica_state through `binfo`. future>> prepare_new_cdc_generation_data( - locator::token_metadata_ptr tmptr, const group0_guard& guard, std::optional binfo) { + locator::token_metadata2_ptr tmptr, const group0_guard& guard, std::optional binfo) { auto get_sharding_info = [&] (dht::token end) -> std::pair { if (binfo && binfo->bootstrap_tokens.contains(end)) { return {binfo->rs.shard_count, binfo->rs.ignore_msb}; @@ -1223,18 +1223,11 @@ class topology_coordinator { " can't find endpoint for token {}", end)); } - auto id = tmptr->get_host_id_if_known(*ep); - if (!id) { - on_internal_error(slogger, ::format( - "raft topology: make_new_cdc_generation_data: get_sharding_info:" - " can't find host ID for endpoint {}, owner of token {}", *ep, end)); - } - - auto ptr = _topo_sm._topology.find(raft::server_id{id->uuid()}); + auto ptr = _topo_sm._topology.find(raft::server_id{ep->uuid()}); if (!ptr) { on_internal_error(slogger, ::format( "raft topology: make_new_cdc_generation_data: get_sharding_info:" - " couldn't find node {} in topology, owner of token {}", *id, end)); + " couldn't find node {} in topology, owner of token {}", *ep, end)); } auto& rs = ptr->second; @@ -1277,7 +1270,7 @@ class topology_coordinator { // (bootstrapping is quick if there is no data in the cluster, but usually if one has 100 nodes they // have tons of data, so indeed streaming/repair will take much longer (hours/days)). future> prepare_and_broadcast_cdc_generation_data( - locator::token_metadata_ptr tmptr, group0_guard guard, std::optional binfo) { + locator::token_metadata2_ptr tmptr, group0_guard guard, std::optional binfo) { auto [gen_uuid, gen_mutations] = co_await prepare_new_cdc_generation_data(tmptr, guard, binfo); if (gen_mutations.empty()) { @@ -1450,7 +1443,7 @@ class topology_coordinator { case global_topology_request::new_cdc_generation: { slogger.info("raft topology: new CDC generation requested"); - auto tmptr = get_token_metadata_ptr(); + auto tmptr = get_token_metadata_ptr()->get_new_strong(); auto [gen_uuid, guard_, mutation] = co_await prepare_and_broadcast_cdc_generation_data(tmptr, std::move(guard), std::nullopt); guard = std::move(guard_); @@ -1920,7 +1913,7 @@ class topology_coordinator { auto num_tokens = std::get(node.req_param.value()).num_tokens; // A node have just been accepted and does not have tokens assigned yet // Need to assign random tokens to the node - auto tmptr = get_token_metadata_ptr(); + auto tmptr = get_token_metadata_ptr()->get_new_strong(); auto bootstrap_tokens = dht::boot_strapper::get_random_bootstrap_tokens( tmptr, num_tokens, dht::check_token_endpoint::yes); @@ -3363,9 +3356,9 @@ future<> storage_service::join_token_ring(shardedget_new_strong(); - if (tmptr->is_normal_token_owner(get_broadcast_address())) { + if (tmptr->is_normal_token_owner(tmptr->get_my_id())) { throw std::runtime_error("This node is already a member of the token ring; bootstrap aborted. (If replacing a dead node, remove the old one from the ring first.)"); } slogger.info("getting bootstrap token"); @@ -3415,7 +3408,7 @@ future<> storage_service::join_token_ring(shardedget_new_strong(), _db.local().get_config(), dht::check_token_endpoint::no); co_await _sys_ks.local().update_tokens(bootstrap_tokens); } else { size_t num_tokens = _db.local().get_config().num_tokens(); @@ -5855,7 +5848,7 @@ future<> storage_service::raft_check_and_repair_cdc_streams() { slogger.error("check_and_repair_cdc_streams: no current CDC generation, requesting a new one."); } else { auto gen = co_await _sys_ks.local().read_cdc_generation(curr_gen->id); - if (cdc::is_cdc_generation_optimal(gen, get_token_metadata())) { + if (cdc::is_cdc_generation_optimal(gen, *get_token_metadata().get_new())) { cdc_log.info("CDC generation {} does not need repair", curr_gen); co_return; } From b2d3dc33e2ed8d95ba7a827ec2804716cd3acc13 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Mon, 27 Nov 2023 18:32:45 +0400 Subject: [PATCH 38/51] storage_service::on_change: switch to new token_metadata The check *ep == endpoint is needed when a node changes its IP - on_change can be called by the gossiper for old IP as part of its removal, after handle_state_normal has already been called for the new one. Without the check, the do_update_system_peers_table call overwrites the IP back to its old value. Previously token_metadata used endpoint as the key and the *ep == endpoint condition was followed from the is_normal_token_owner check. Now with host_id-s we have an additional layer of indirection, and we need *ep == endpoint check to get the same end condition. This case was revealed by the dtest update_cluster_layout_tests.py::TestUpdateClusterLayout::test_change_node_ip --- service/storage_service.cc | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 0eefa7f8bf..fe1781f7f1 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -4042,8 +4042,17 @@ future<> storage_service::on_change(inet_address endpoint, application_state sta slogger.debug("Ignoring state change for dead or unknown endpoint: {}", endpoint); co_return; } - if (get_token_metadata().is_normal_token_owner(endpoint)) { - slogger.debug("endpoint={} on_change: updating system.peers table", endpoint); + const auto host_id = _gossiper.get_host_id(endpoint); + const auto& tm = *get_token_metadata().get_new(); + const auto ep = tm.get_endpoint_for_host_id_if_known(host_id); + // The check *ep == endpoint is needed when a node changes + // its IP - on_change can be called by the gossiper for old IP as part + // of its removal, after handle_state_normal has already been called for + // the new one. Without the check, the do_update_system_peers_table call + // overwrites the IP back to its old value. + // In essence, the code under the 'if' should fire if the given IP is a normal_token_owner. + if (ep && *ep == endpoint && tm.is_normal_token_owner(host_id)) { + slogger.debug("endpoint={}/{} on_change: updating system.peers table", endpoint, host_id); co_await do_update_system_peers_table(endpoint, state, value); if (state == application_state::RPC_READY) { slogger.debug("Got application_state::RPC_READY for node {}, is_cql_ready={}", endpoint, ep_state->is_cql_ready()); From 0e4c90dca66f7ab19589eaa49c65db696e8feb48 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Mon, 27 Nov 2023 21:01:14 +0400 Subject: [PATCH 39/51] api/token_metadata: switch to new version --- api/token_metadata.cc | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/api/token_metadata.cc b/api/token_metadata.cc index 1eb2d021eb..199a949a7e 100644 --- a/api/token_metadata.cc +++ b/api/token_metadata.cc @@ -20,25 +20,34 @@ using namespace json; void set_token_metadata(http_context& ctx, routes& r, sharded& tm) { ss::local_hostid.set(r, [&tm](std::unique_ptr req) { - auto id = tm.local().get()->get_my_id(); + auto id = tm.local().get()->get_new()->get_my_id(); return make_ready_future(id.to_sstring()); }); ss::get_tokens.set(r, [&tm] (std::unique_ptr req) { - return make_ready_future(stream_range_as_array(tm.local().get()->sorted_tokens(), [](const dht::token& i) { + return make_ready_future(stream_range_as_array(tm.local().get()->get_new()->sorted_tokens(), [](const dht::token& i) { return fmt::to_string(i); })); }); ss::get_node_tokens.set(r, [&tm] (std::unique_ptr req) { gms::inet_address addr(req->param["endpoint"]); - return make_ready_future(stream_range_as_array(tm.local().get()->get_tokens(addr), [](const dht::token& i) { - return fmt::to_string(i); - })); + auto& local_tm = *tm.local().get()->get_new(); + const auto host_id = local_tm.get_host_id_if_known(addr); + return make_ready_future(stream_range_as_array(host_id ? local_tm.get_tokens(*host_id): std::vector{}, [](const dht::token& i) { + return fmt::to_string(i); + })); }); ss::get_leaving_nodes.set(r, [&tm](const_req req) { - return container_to_vec(tm.local().get()->get_leaving_endpoints()); + const auto& local_tm = *tm.local().get()->get_new(); + const auto& leaving_host_ids = local_tm.get_leaving_endpoints(); + std::unordered_set eps; + eps.reserve(leaving_host_ids.size()); + for (const auto host_id: leaving_host_ids) { + eps.insert(local_tm.get_endpoint_for_host_id(host_id)); + } + return container_to_vec(eps); }); ss::get_moving_nodes.set(r, [](const_req req) { @@ -47,17 +56,19 @@ void set_token_metadata(http_context& ctx, routes& r, shardedget_bootstrap_tokens(); - std::unordered_set addr; - for (auto i: points) { - addr.insert(fmt::to_string(i.second)); + const auto& local_tm = *tm.local().get()->get_new(); + const auto& points = local_tm.get_bootstrap_tokens(); + std::unordered_set eps; + eps.reserve(points.size()); + for (const auto& [token, host_id]: points) { + eps.insert(local_tm.get_endpoint_for_host_id(host_id)); } - return container_to_vec(addr); + return container_to_vec(eps); }); ss::get_host_id_map.set(r, [&tm](const_req req) { std::vector res; - return map_to_key_value(tm.local().get()->get_endpoint_to_host_id_map_for_reading(), res); + return map_to_key_value(tm.local().get()->get_new()->get_endpoint_to_host_id_map_for_reading(), res); }); static auto host_or_broadcast = [&tm](const_req req) { @@ -66,7 +77,7 @@ void set_token_metadata(http_context& ctx, routes& r, shardedget_topology(); + auto& topology = tm.local().get()->get_new()->get_topology(); auto ep = host_or_broadcast(req); if (!topology.has_endpoint(ep)) { // Cannot return error here, nodetool status can race, request @@ -77,7 +88,7 @@ void set_token_metadata(http_context& ctx, routes& r, shardedget_topology(); + auto& topology = tm.local().get()->get_new()->get_topology(); auto ep = host_or_broadcast(req); if (!topology.has_endpoint(ep)) { // Cannot return error here, nodetool status can race, request From f53f34f9892395c18f6bd84fe419d04667c377bb Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Wed, 6 Dec 2023 18:07:56 +0400 Subject: [PATCH 40/51] storage_service: get_token_to_endpoint_map: use new token_metadata The token_metadata::get_normal_and_bootstrapping_token_to_endpoint_map method was used only here. It's inlined in this commit since it's too specific and incurs the overhead of creating an intermediate map. --- locator/token_metadata.cc | 19 ------------------- locator/token_metadata.hh | 6 ------ service/storage_service.cc | 10 +++++++++- 3 files changed, 9 insertions(+), 26 deletions(-) diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index a6f82aa3de..78a2a99c6f 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -275,12 +275,6 @@ public: } public: - /** - * @return a (stable copy, won't be modified) Token to Endpoint map for all the normal and bootstrapping nodes - * in the cluster. - */ - std::map get_normal_and_bootstrapping_token_to_endpoint_map() const; - long get_ring_version() const { return _ring_version; } @@ -901,13 +895,6 @@ void token_metadata_impl::del_replacing_endpoint(NodeId existing_node) { _replacing_endpoints.erase(existing_node); } -template -std::map token_metadata_impl::get_normal_and_bootstrapping_token_to_endpoint_map() const { - std::map ret(_token_to_endpoint_map.begin(), _token_to_endpoint_map.end()); - ret.insert(_bootstrap_tokens.begin(), _bootstrap_tokens.end()); - return ret; -} - template topology_change_info::topology_change_info(lw_shared_ptr> target_token_metadata_, lw_shared_ptr> base_token_metadata_, @@ -1321,12 +1308,6 @@ generic_token_metadata::set_read_new(read_new_t read_new) { _impl->set_read_new(read_new); } -template -std::map -generic_token_metadata::get_normal_and_bootstrapping_token_to_endpoint_map() const { - return _impl->get_normal_and_bootstrapping_token_to_endpoint_map(); -} - template long generic_token_metadata::get_ring_version() const { diff --git a/locator/token_metadata.hh b/locator/token_metadata.hh index e58a21d405..d8113f19e7 100644 --- a/locator/token_metadata.hh +++ b/locator/token_metadata.hh @@ -328,12 +328,6 @@ public: using read_new_t = bool_class; void set_read_new(read_new_t value); - /** - * @return a (stable copy, won't be modified) Token to Endpoint map for all the normal and bootstrapping nodes - * in the cluster. - */ - std::map get_normal_and_bootstrapping_token_to_endpoint_map() const; - long get_ring_version() const; void invalidate_cached_rings(); diff --git a/service/storage_service.cc b/service/storage_service.cc index fe1781f7f1..843bd6aa5c 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -6242,7 +6242,15 @@ storage_service::construct_range_to_endpoint_map( std::map storage_service::get_token_to_endpoint_map() { - return get_token_metadata().get_normal_and_bootstrapping_token_to_endpoint_map(); + const auto& tm = *get_token_metadata().get_new(); + std::map result; + for (const auto [t, id]: tm.get_token_to_endpoint()) { + result.insert({t, tm.get_endpoint_for_host_id(id)}); + } + for (const auto [t, id]: tm.get_bootstrap_tokens()) { + result.insert({t, tm.get_endpoint_for_host_id(id)}); + } + return result; } std::chrono::milliseconds storage_service::get_ring_delay() { From 309e08e5974c32e1d3596e270e52426c5565b0ee Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Mon, 6 Nov 2023 14:53:15 +0400 Subject: [PATCH 41/51] storage_service: get_token_metadata -> token_metadata2 In this commit we change the return type of storage_service::get_token_metadata_ptr() to token_metadata2_ptr and fix whatever breaks. All the boost and topology tests pass with this change. --- db/virtual_tables.cc | 6 +- node_ops/node_ops_ctl.hh | 2 +- service/storage_service.cc | 147 ++++++++++++++++++++----------------- service/storage_service.hh | 15 ++-- 4 files changed, 94 insertions(+), 76 deletions(-) diff --git a/db/virtual_tables.cc b/db/virtual_tables.cc index 915ed07f78..2089714cef 100644 --- a/db/virtual_tables.cc +++ b/db/virtual_tables.cc @@ -65,7 +65,7 @@ public: future<> execute(std::function mutation_sink) override { return _ss.get_ownership().then([&, mutation_sink] (std::map ownership) { - const locator::token_metadata& tm = _ss.get_token_metadata(); + const locator::token_metadata2& tm = _ss.get_token_metadata(); _gossiper.for_each_endpoint_state([&] (const gms::inet_address& endpoint, const gms::endpoint_state&) { mutation m(schema(), partition_key::from_single_value(*schema(), data_value(endpoint).serialize_nonnull())); @@ -80,7 +80,7 @@ public: set_cell(cr, "host_id", hostid->uuid()); } - if (tm.is_normal_token_owner(endpoint)) { + if (hostid && tm.is_normal_token_owner(*hostid)) { sstring dc = tm.get_topology().get_location(endpoint).dc; set_cell(cr, "dc", dc); } @@ -89,7 +89,7 @@ public: set_cell(cr, "owns", ownership[endpoint]); } - set_cell(cr, "tokens", int32_t(tm.get_tokens(endpoint).size())); + set_cell(cr, "tokens", int32_t(hostid ? tm.get_tokens(*hostid).size() : 0)); mutation_sink(std::move(m)); }); diff --git a/node_ops/node_ops_ctl.hh b/node_ops/node_ops_ctl.hh index 3d30136bc5..e452b90172 100644 --- a/node_ops/node_ops_ctl.hh +++ b/node_ops/node_ops_ctl.hh @@ -139,7 +139,7 @@ public: sstring desc; locator::host_id host_id; // Host ID of the node operand (i.e. added, replaced, or leaving node) gms::inet_address endpoint; // IP address of the node operand (i.e. added, replaced, or leaving node) - lw_shared_ptr tmptr; + lw_shared_ptr tmptr; std::unordered_set sync_nodes; std::unordered_set ignore_nodes; node_ops_cmd_request req; diff --git a/service/storage_service.cc b/service/storage_service.cc index 843bd6aa5c..7c20d468e1 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -283,6 +283,16 @@ static future<> set_gossip_tokens(gms::gossiper& g, }); } +static std::unordered_map get_token_to_endpoint(const locator::token_metadata2& tm) { + const auto& map = tm.get_token_to_endpoint(); + std::unordered_map result; + result.reserve(map.size()); + for (const auto [t, id]: map) { + result.insert({t, tm.get_endpoint_for_host_id(id)}); + } + return result; +} + /* * The helper waits for two things * 1) for schema agreement @@ -556,7 +566,7 @@ future<> storage_service::topology_state_load() { // of the cluster state. To work correctly, the gossiper needs to know the current // endpoints. We cannot rely on seeds alone, since it is not guaranteed that seeds // will be up to date and reachable at the time of restart. - const auto* tmptr = get_token_metadata_ptr()->get_new(); + const auto tmptr = get_token_metadata_ptr(); for (const auto& e: tmptr->get_all_endpoints()) { const auto ep = tmptr->get_endpoint_for_host_id(e); if (!is_me(e) && !_gossiper.get_endpoint_state_ptr(ep)) { @@ -926,12 +936,12 @@ class topology_coordinator { // True if an ongoing topology change should be rolled back bool _rollback = false; - const locator::token_metadata& get_token_metadata() const noexcept { - return *_shared_tm.get(); + const locator::token_metadata2& get_token_metadata() const noexcept { + return *_shared_tm.get()->get_new(); } - locator::token_metadata_ptr get_token_metadata_ptr() const noexcept { - return _shared_tm.get(); + locator::token_metadata2_ptr get_token_metadata_ptr() const noexcept { + return _shared_tm.get()->get_new_strong(); } // This is a topology snapshot for a given node. It contains pointers into the topology state machine @@ -1443,7 +1453,7 @@ class topology_coordinator { case global_topology_request::new_cdc_generation: { slogger.info("raft topology: new CDC generation requested"); - auto tmptr = get_token_metadata_ptr()->get_new_strong(); + auto tmptr = get_token_metadata_ptr(); auto [gen_uuid, guard_, mutation] = co_await prepare_and_broadcast_cdc_generation_data(tmptr, std::move(guard), std::nullopt); guard = std::move(guard_); @@ -1601,7 +1611,7 @@ class topology_coordinator { schema_ptr, locator::global_tablet_id, const locator::tablet_transition_info&)> func) { - auto tm = get_token_metadata_ptr()->get_new(); + auto tm = get_token_metadata_ptr(); for (auto&& [table, tmap] : tm->tablets().all_tables()) { co_await coroutine::maybe_yield(); auto s = _db.find_schema(table); @@ -1615,7 +1625,7 @@ class topology_coordinator { void generate_migration_update(std::vector& out, const group0_guard& guard, const tablet_migration_info& mig) { auto s = _db.find_schema(mig.tablet.table); - auto& tmap = get_token_metadata_ptr()->get_new()->tablets().get_tablet_map(mig.tablet.table); + auto& tmap = get_token_metadata_ptr()->tablets().get_tablet_map(mig.tablet.table); auto last_token = tmap.get_last_token(mig.tablet.tablet); if (tmap.get_tablet_transition_info(mig.tablet.tablet)) { slogger.warn("Tablet already in transition, ignoring migration: {}", mig); @@ -1774,7 +1784,7 @@ class topology_coordinator { } } if (!preempt) { - auto plan = co_await _tablet_allocator.balance_tablets(get_token_metadata_ptr()->get_new_strong()); + auto plan = co_await _tablet_allocator.balance_tablets(get_token_metadata_ptr()); if (!drain || plan.has_nodes_to_drain()) { co_await generate_migration_updates(updates, guard, plan); } @@ -1913,7 +1923,7 @@ class topology_coordinator { auto num_tokens = std::get(node.req_param.value()).num_tokens; // A node have just been accepted and does not have tokens assigned yet // Need to assign random tokens to the node - auto tmptr = get_token_metadata_ptr()->get_new_strong(); + auto tmptr = get_token_metadata_ptr(); auto bootstrap_tokens = dht::boot_strapper::get_random_bootstrap_tokens( tmptr, num_tokens, dht::check_token_endpoint::yes); @@ -2555,7 +2565,7 @@ future topology_coordinator::maybe_start_tablet_migration(group0_guard gua slogger.debug("raft topology: Evaluating tablet balance"); auto tm = get_token_metadata_ptr(); - auto plan = co_await _tablet_allocator.balance_tablets(tm->get_new_strong()); + auto plan = co_await _tablet_allocator.balance_tablets(tm); if (plan.empty()) { slogger.debug("raft topology: Tablets are balanced"); co_return false; @@ -3356,7 +3366,7 @@ future<> storage_service::join_token_ring(shardedget_new_strong(); + auto tmptr = get_token_metadata_ptr(); if (tmptr->is_normal_token_owner(tmptr->get_my_id())) { throw std::runtime_error("This node is already a member of the token ring; bootstrap aborted. (If replacing a dead node, remove the old one from the ring first.)"); @@ -3384,7 +3394,7 @@ future<> storage_service::join_token_ring(shardedget_endpoint(token); if (existing) { - auto eps = _gossiper.get_endpoint_state_ptr(*existing); + auto eps = _gossiper.get_endpoint_state_ptr(tmptr->get_endpoint_for_host_id(*existing)); if (eps && eps->get_update_timestamp() > gms::gossiper::clk::now() - delay) { throw std::runtime_error("Cannot replace a live node..."); } @@ -3408,7 +3418,7 @@ future<> storage_service::join_token_ring(shardedget_new_strong(), _db.local().get_config(), dht::check_token_endpoint::no); + bootstrap_tokens = boot_strapper::get_bootstrap_tokens(get_token_metadata_ptr(), _db.local().get_config(), dht::check_token_endpoint::no); co_await _sys_ks.local().update_tokens(bootstrap_tokens); } else { size_t num_tokens = _db.local().get_config().num_tokens(); @@ -3502,7 +3512,7 @@ future<> storage_service::mark_existing_views_as_built() { }); } -std::unordered_set storage_service::parse_node_list(sstring comma_separated_list, const token_metadata& tm) { +std::unordered_set storage_service::parse_node_list(sstring comma_separated_list, const token_metadata2& tm) { std::vector ignore_nodes_strs = utils::split_comma_separated_list(std::move(comma_separated_list)); std::unordered_set ignore_nodes; for (const sstring& n : ignore_nodes_strs) { @@ -3591,7 +3601,7 @@ future<> storage_service::bootstrap(std::unordered_set& bootstrap_tokens, slogger.info("sleeping {} ms for pending range setup", get_ring_delay().count()); _gossiper.wait_for_range_setup().get(); - dht::boot_strapper bs(_db, _stream_manager, _abort_source, get_token_metadata_ptr()->get_my_id(), _snitch.local()->get_location(), bootstrap_tokens, get_token_metadata_ptr()->get_new_strong()); + dht::boot_strapper bs(_db, _stream_manager, _abort_source, get_token_metadata_ptr()->get_my_id(), _snitch.local()->get_location(), bootstrap_tokens, get_token_metadata_ptr()); slogger.info("Starting to bootstrap..."); bs.bootstrap(streaming::stream_reason::bootstrap, _gossiper, null_topology_guard).get(); } else { @@ -3804,7 +3814,7 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit // token_to_endpoint_map is used to track the current token owners for the purpose of removing replaced endpoints. // when any token is replaced by a new owner, we track the existing owner in `candidates_for_removal` // and eventually, if any candidate for removal ends up owning no tokens, it is removed from token_metadata. - std::unordered_map token_to_endpoint_map = get_token_metadata().get_token_to_endpoint(); + std::unordered_map token_to_endpoint_map = get_token_to_endpoint(get_token_metadata()); std::unordered_set candidates_for_removal; // Here we convert tokens from gossiper to owned_tokens, which will be assigned as a new @@ -3927,7 +3937,7 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit const auto& tm = get_token_metadata(); auto ver = tm.get_ring_version(); for (auto& x : tm.get_token_to_endpoint()) { - slogger.debug("handle_state_normal: token_metadata.ring_version={}, token={} -> endpoint={}", ver, x.first, x.second); + slogger.debug("handle_state_normal: token_metadata.ring_version={}, token={} -> endpoint={}/{}", ver, x.first, tm.get_endpoint_for_host_id(x.second), x.second); } } _normal_state_handled_on_boot.insert(endpoint); @@ -3945,8 +3955,9 @@ future<> storage_service::handle_state_left(inet_address endpoint, std::vector storage_service::handle_state_left(inet_address endpoint, std::vector(tokens_from_tm.begin(), tokens_from_tm.end()); } co_await excise(tokens, endpoint, extract_expire_time(pieces), pid); @@ -3974,9 +3985,10 @@ future<> storage_service::handle_state_removed(inet_address endpoint, std::vecto } co_return; } - if (get_token_metadata().is_normal_token_owner(endpoint)) { + const auto host_id = _gossiper.get_host_id(endpoint); + if (get_token_metadata().is_normal_token_owner(host_id)) { auto state = pieces[0]; - auto remove_tokens = get_token_metadata().get_tokens(endpoint); + auto remove_tokens = get_token_metadata().get_tokens(host_id); std::unordered_set tmp(remove_tokens.begin(), remove_tokens.end()); co_await excise(std::move(tmp), endpoint, extract_expire_time(pieces), pid); } else { // now that the gossiper has told us about this nonexistent member, notify the gossiper to remove it @@ -3993,8 +4005,10 @@ future<> storage_service::on_join(gms::inet_address endpoint, gms::endpoint_stat } future<> storage_service::on_alive(gms::inet_address endpoint, gms::endpoint_state_ptr state, gms::permit_id pid) { - slogger.debug("endpoint={} on_alive: permit_id={}", endpoint, pid); - bool is_normal_token_owner = get_token_metadata().is_normal_token_owner(endpoint); + const auto& tm = get_token_metadata(); + const auto tm_host_id_opt = tm.get_host_id_if_known(endpoint); + slogger.debug("endpoint={}/{} on_alive: permit_id={}", endpoint, tm_host_id_opt, pid); + bool is_normal_token_owner = tm_host_id_opt && tm.is_normal_token_owner(*tm_host_id_opt); if (is_normal_token_owner) { co_await notify_up(endpoint); } else { @@ -4043,7 +4057,7 @@ future<> storage_service::on_change(inet_address endpoint, application_state sta co_return; } const auto host_id = _gossiper.get_host_id(endpoint); - const auto& tm = *get_token_metadata().get_new(); + const auto& tm = get_token_metadata(); const auto ep = tm.get_endpoint_for_host_id_if_known(host_id); // The check *ep == endpoint is needed when a node changes // its IP - on_change can be called by the gossiper for old IP as part @@ -4623,9 +4637,9 @@ future> storage_service::get_ownership() { // describeOwnership returns tokens in an unspecified order, let's re-order them std::map ownership; for (auto entry : token_map) { - gms::inet_address endpoint = tm.get_endpoint(entry.first).value(); + locator::host_id id = tm.get_endpoint(entry.first).value(); auto token_ownership = entry.second; - ownership[endpoint] += token_ownership; + ownership[tm.get_endpoint_for_host_id(id)] += token_ownership; } return ownership; }); @@ -4915,7 +4929,7 @@ future<> storage_service::decommission() { uuid = ctl.uuid(); auto endpoint = ctl.endpoint; const auto& tmptr = ctl.tmptr; - if (!tmptr->is_normal_token_owner(endpoint)) { + if (!tmptr->is_normal_token_owner(ctl.host_id)) { throw std::runtime_error("local node is not a member of the token ring yet"); } // We assume that we're a member of group 0 if we're in decommission()` and Raft is enabled. @@ -5101,7 +5115,7 @@ void storage_service::run_bootstrap_ops(std::unordered_set& bootstrap_tok ctl.prepare(node_ops_cmd::bootstrap_prepare).get(); // Step 5: Sync data for bootstrap - _repair.local().bootstrap_with_repair(get_token_metadata_ptr()->get_new_strong(), bootstrap_tokens).get(); + _repair.local().bootstrap_with_repair(get_token_metadata_ptr(), bootstrap_tokens).get(); on_streaming_finished(); // Step 6: Finish @@ -5151,10 +5165,10 @@ void storage_service::run_replace_ops(std::unordered_set& bootstrap_token // Step 7: Sync data for replace if (is_repair_based_node_ops_enabled(streaming::stream_reason::replace)) { slogger.info("replace[{}]: Using repair based node ops to sync data", uuid); - _repair.local().replace_with_repair(get_token_metadata_ptr()->get_new_strong(), bootstrap_tokens, ctl.ignore_nodes).get(); + _repair.local().replace_with_repair(get_token_metadata_ptr(), bootstrap_tokens, ctl.ignore_nodes).get(); } else { slogger.info("replace[{}]: Using streaming based node ops to sync data", uuid); - dht::boot_strapper bs(_db, _stream_manager, _abort_source, get_token_metadata_ptr()->get_new()->get_my_id(), _snitch.local()->get_location(), bootstrap_tokens, get_token_metadata_ptr()->get_new_strong()); + dht::boot_strapper bs(_db, _stream_manager, _abort_source, get_token_metadata_ptr()->get_my_id(), _snitch.local()->get_location(), bootstrap_tokens, get_token_metadata_ptr()); bs.bootstrap(streaming::stream_reason::replace, _gossiper, null_topology_guard, replace_address).get(); } on_streaming_finished(); @@ -5325,7 +5339,7 @@ future<> storage_service::removenode(locator::host_id host_id, std::listget_tokens(endpoint); + auto tokens = tmptr->get_tokens(host_id); try { // Step 3: Start heartbeat updater @@ -5550,7 +5564,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad for (auto& node : req.leaving_nodes) { if (is_repair_based_node_ops_enabled(streaming::stream_reason::removenode)) { slogger.info("removenode[{}]: Started to sync data for removing node={} using repair, coordinator={}", req.ops_uuid, node, coordinator); - _repair.local().removenode_with_repair(get_token_metadata_ptr()->get_new_strong(), node, ops).get(); + _repair.local().removenode_with_repair(get_token_metadata_ptr(), node, ops).get(); } else { slogger.info("removenode[{}]: Started to sync data for removing node={} using stream, coordinator={}", req.ops_uuid, node, coordinator); removenode_with_stream(node, topo_guard, as).get(); @@ -5595,13 +5609,14 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad check_again = false; for (auto& node : req.leaving_nodes) { auto tmptr = get_token_metadata_ptr(); - if (tmptr->is_normal_token_owner(node)) { + const auto host_id = tmptr->get_host_id_if_known(node); + if (host_id && tmptr->is_normal_token_owner(*host_id)) { check_again = true; if (std::chrono::steady_clock::now() > start_time + std::chrono::seconds(60)) { - auto msg = ::format("decommission[{}]: Node {} is still in the cluster", req.ops_uuid, node); + auto msg = ::format("decommission[{}]: Node {}/{} is still in the cluster", req.ops_uuid, node, host_id); throw std::runtime_error(msg); } - slogger.warn("decommission[{}]: Node {} is still in the cluster, sleep and check again", req.ops_uuid, node); + slogger.warn("decommission[{}]: Node {}/{} is still in the cluster, sleep and check again", req.ops_uuid, node, host_id); sleep_abortable(std::chrono::milliseconds(500), _abort_source).get(); break; } @@ -5857,7 +5872,7 @@ future<> storage_service::raft_check_and_repair_cdc_streams() { slogger.error("check_and_repair_cdc_streams: no current CDC generation, requesting a new one."); } else { auto gen = co_await _sys_ks.local().read_cdc_generation(curr_gen->id); - if (cdc::is_cdc_generation_optimal(gen, *get_token_metadata().get_new())) { + if (cdc::is_cdc_generation_optimal(gen, get_token_metadata())) { cdc_log.info("CDC generation {} does not need repair", curr_gen); co_return; } @@ -5892,10 +5907,10 @@ future<> storage_service::rebuild(sstring source_dc) { slogger.info("rebuild from dc: {}", source_dc == "" ? "(any dc)" : source_dc); auto tmptr = ss.get_token_metadata_ptr(); if (ss.is_repair_based_node_ops_enabled(streaming::stream_reason::rebuild)) { - co_await ss._repair.local().rebuild_with_repair(tmptr->get_new_strong(), std::move(source_dc)); + co_await ss._repair.local().rebuild_with_repair(tmptr, std::move(source_dc)); } else { - auto streamer = make_lw_shared(ss._db, ss._stream_manager, tmptr->get_new_strong(), ss._abort_source, - tmptr->get_new()->get_my_id(), ss._snitch.local()->get_location(), "Rebuild", streaming::stream_reason::rebuild, null_topology_guard); + auto streamer = make_lw_shared(ss._db, ss._stream_manager, tmptr, ss._abort_source, + tmptr->get_my_id(), ss._snitch.local()->get_location(), "Rebuild", streaming::stream_reason::rebuild, null_topology_guard); streamer->add_source_filter(std::make_unique(ss._gossiper.get_unreachable_members())); if (source_dc != "") { streamer->add_source_filter(std::make_unique(source_dc)); @@ -5944,7 +5959,7 @@ storage_service::get_changed_ranges_for_leaving(locator::vnode_effective_replica co_await coroutine::maybe_yield(); } - auto temp = locator::make_token_metadata2_ptr(co_await get_token_metadata_ptr()->get_new()->clone_after_all_left()); + auto temp = locator::make_token_metadata2_ptr(co_await get_token_metadata_ptr()->clone_after_all_left()); // endpoint might or might not be 'leaving'. If it was not leaving (that is, removenode // command was used), it is still present in temp and must be removed. @@ -6001,7 +6016,7 @@ future<> storage_service::unbootstrap() { slogger.info("Finished batchlog replay for decommission"); if (is_repair_based_node_ops_enabled(streaming::stream_reason::decommission)) { - co_await _repair.local().decommission_with_repair(get_token_metadata_ptr()->get_new_strong()); + co_await _repair.local().decommission_with_repair(get_token_metadata_ptr()); } else { std::unordered_map> ranges_to_stream; @@ -6073,7 +6088,7 @@ future<> storage_service::removenode_with_stream(gms::inet_address leaving_node, as.request_abort(); } }); - auto streamer = make_lw_shared(_db, _stream_manager, tmptr->get_new_strong(), as, tmptr->get_my_id(), _snitch.local()->get_location(), "Removenode", streaming::stream_reason::removenode, topo_guard); + auto streamer = make_lw_shared(_db, _stream_manager, tmptr, as, tmptr->get_my_id(), _snitch.local()->get_location(), "Removenode", streaming::stream_reason::removenode, topo_guard); removenode_add_ranges(streamer, leaving_node).get(); try { streamer->stream_async().get(); @@ -6129,7 +6144,7 @@ future<> storage_service::leave_ring() { future<> storage_service::stream_ranges(std::unordered_map> ranges_to_stream_by_keyspace) { - auto streamer = dht::range_streamer(_db, _stream_manager, get_token_metadata_ptr()->get_new_strong(), _abort_source, get_token_metadata_ptr()->get_new()->get_my_id(), _snitch.local()->get_location(), "Unbootstrap", streaming::stream_reason::decommission, null_topology_guard); + auto streamer = dht::range_streamer(_db, _stream_manager, get_token_metadata_ptr(), _abort_source, get_token_metadata_ptr()->get_my_id(), _snitch.local()->get_location(), "Unbootstrap", streaming::stream_reason::decommission, null_topology_guard); for (auto& entry : ranges_to_stream_by_keyspace) { const auto& keyspace = entry.first; auto& ranges_with_endpoints = entry.second; @@ -6242,7 +6257,7 @@ storage_service::construct_range_to_endpoint_map( std::map storage_service::get_token_to_endpoint_map() { - const auto& tm = *get_token_metadata().get_new(); + const auto& tm = get_token_metadata(); std::map result; for (const auto [t, id]: tm.get_token_to_endpoint()) { result.insert({t, tm.get_endpoint_for_host_id(id)}); @@ -6512,10 +6527,10 @@ future storage_service::raft_topology_cmd_handler(raft if (!_topology_state_machine._topology.normal_nodes.empty()) { // stream only if there is a node in normal state co_await retrier(_bootstrap_result, coroutine::lambda([&] () -> future<> { if (is_repair_based_node_ops_enabled(streaming::stream_reason::bootstrap)) { - co_await _repair.local().bootstrap_with_repair(get_token_metadata_ptr()->get_new_strong(), rs.ring.value().tokens); + co_await _repair.local().bootstrap_with_repair(get_token_metadata_ptr(), rs.ring.value().tokens); } else { - dht::boot_strapper bs(_db, _stream_manager, _abort_source, get_token_metadata_ptr()->get_new()->get_my_id(), - locator::endpoint_dc_rack{rs.datacenter, rs.rack}, rs.ring.value().tokens, get_token_metadata_ptr()->get_new_strong()); + dht::boot_strapper bs(_db, _stream_manager, _abort_source, get_token_metadata_ptr()->get_my_id(), + locator::endpoint_dc_rack{rs.datacenter, rs.rack}, rs.ring.value().tokens, get_token_metadata_ptr()); co_await bs.bootstrap(streaming::stream_reason::bootstrap, _gossiper, _topology_state_machine._topology.session); } })); @@ -6536,10 +6551,10 @@ future storage_service::raft_topology_cmd_handler(raft } ignored_ips.insert(*ip); } - co_await _repair.local().replace_with_repair(get_token_metadata_ptr()->get_new_strong(), rs.ring.value().tokens, std::move(ignored_ips)); + co_await _repair.local().replace_with_repair(get_token_metadata_ptr(), rs.ring.value().tokens, std::move(ignored_ips)); } else { - dht::boot_strapper bs(_db, _stream_manager, _abort_source, get_token_metadata_ptr()->get_new()->get_my_id(), - locator::endpoint_dc_rack{rs.datacenter, rs.rack}, rs.ring.value().tokens, get_token_metadata_ptr()->get_new_strong()); + dht::boot_strapper bs(_db, _stream_manager, _abort_source, get_token_metadata_ptr()->get_my_id(), + locator::endpoint_dc_rack{rs.datacenter, rs.rack}, rs.ring.value().tokens, get_token_metadata_ptr()); auto replaced_id = std::get(_topology_state_machine._topology.req_param[raft_server.id()]).replaced_id; auto existing_ip = _group0->address_map().find(replaced_id); assert(existing_ip); @@ -6593,7 +6608,7 @@ future storage_service::raft_topology_cmd_handler(raft ignored_ips.push_back(*ip); } auto ops = seastar::make_shared(node_ops_id::create_random_id(), as, std::move(ignored_ips)); - return _repair.local().removenode_with_repair(get_token_metadata_ptr()->get_new_strong(), *ip, ops); + return _repair.local().removenode_with_repair(get_token_metadata_ptr(), *ip, ops); } else { return removenode_with_stream(*ip, _topology_state_machine._topology.session, as); } @@ -6607,10 +6622,10 @@ future storage_service::raft_topology_cmd_handler(raft co_await retrier(_rebuild_result, [&] () -> future<> { auto tmptr = get_token_metadata_ptr(); if (is_repair_based_node_ops_enabled(streaming::stream_reason::rebuild)) { - co_await _repair.local().rebuild_with_repair(tmptr->get_new_strong(), std::move(source_dc)); + co_await _repair.local().rebuild_with_repair(tmptr, std::move(source_dc)); } else { - auto streamer = make_lw_shared(_db, _stream_manager, tmptr->get_new_strong(), _abort_source, - tmptr->get_new()->get_my_id(), _snitch.local()->get_location(), "Rebuild", streaming::stream_reason::rebuild, _topology_state_machine._topology.session); + auto streamer = make_lw_shared(_db, _stream_manager, tmptr, _abort_source, + tmptr->get_my_id(), _snitch.local()->get_location(), "Rebuild", streaming::stream_reason::rebuild, _topology_state_machine._topology.session); streamer->add_source_filter(std::make_unique(_gossiper.get_unreachable_members())); if (source_dc != "") { streamer->add_source_filter(std::make_unique(source_dc)); @@ -7380,24 +7395,24 @@ future<> storage_service::force_remove_completion() { if (!tm.get_leaving_endpoints().empty()) { auto leaving = tm.get_leaving_endpoints(); slogger.warn("Removal not confirmed, Leaving={}", leaving); - for (auto endpoint : leaving) { - const auto host_id = tm.get_host_id_if_known(endpoint); - if (!host_id) { - slogger.warn("No host_id is found for endpoint {}", endpoint); + for (auto host_id : leaving) { + const auto endpoint = tm.get_endpoint_for_host_id_if_known(host_id); + if (!endpoint) { + slogger.warn("No endpoint is found for host_id {}", host_id); continue; } - auto tokens = tm.get_tokens(endpoint); - auto permit = co_await ss._gossiper.lock_endpoint(endpoint, gms::null_permit_id); + auto tokens = tm.get_tokens(host_id); + auto permit = co_await ss._gossiper.lock_endpoint(*endpoint, gms::null_permit_id); const auto& pid = permit.id(); - co_await ss._gossiper.advertise_token_removed(endpoint, *host_id, pid); + co_await ss._gossiper.advertise_token_removed(*endpoint, host_id, pid); std::unordered_set tokens_set(tokens.begin(), tokens.end()); - co_await ss.excise(tokens_set, endpoint, pid); + co_await ss.excise(tokens_set, *endpoint, pid); - slogger.info("force_remove_completion: removing endpoint {} from group 0", endpoint); + slogger.info("force_remove_completion: removing endpoint {} from group 0", *endpoint); assert(ss._group0); bool raft_available = co_await ss._group0->wait_for_raft(); if (raft_available) { - co_await ss._group0->remove_from_group0(raft::server_id{host_id->uuid()}); + co_await ss._group0->remove_from_group0(raft::server_id{host_id.uuid()}); } } } else { diff --git a/service/storage_service.hh b/service/storage_service.hh index f12b21d0a9..ff52d39678 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -113,9 +113,12 @@ private: using endpoint_details = dht::endpoint_details; using boot_strapper = dht::boot_strapper; using token_metadata = locator::token_metadata; + using token_metadata2 = locator::token_metadata2; using shared_token_metadata = locator::shared_token_metadata; using token_metadata_ptr = locator::token_metadata_ptr; + using token_metadata2_ptr = locator::token_metadata2_ptr; using mutable_token_metadata_ptr = locator::mutable_token_metadata_ptr; + using mutable_token_metadata2_ptr = locator::mutable_token_metadata2_ptr; using token_metadata_lock = locator::token_metadata_lock; using application_state = gms::application_state; using inet_address = gms::inet_address; @@ -223,7 +226,7 @@ private: future<> snitch_reconfigured(); future get_mutable_token_metadata_ptr() noexcept { - return get_token_metadata_ptr()->clone_async().then([] (token_metadata tm) { + return _shared_token_metadata.get()->clone_async().then([] (token_metadata tm) { // bump the token_metadata ring_version // to invalidate cached token/replication mappings // when the modified token_metadata is committed. @@ -255,12 +258,12 @@ public: return _erm_factory; } - token_metadata_ptr get_token_metadata_ptr() const noexcept { - return _shared_token_metadata.get(); + token_metadata2_ptr get_token_metadata_ptr() const noexcept { + return _shared_token_metadata.get()->get_new_strong(); } - const locator::token_metadata& get_token_metadata() const noexcept { - return *_shared_token_metadata.get(); + const locator::token_metadata2& get_token_metadata() const noexcept { + return *_shared_token_metadata.get()->get_new(); } private: @@ -325,7 +328,7 @@ private: public: - static std::unordered_set parse_node_list(sstring comma_separated_list, const locator::token_metadata& tm); + static std::unordered_set parse_node_list(sstring comma_separated_list, const locator::token_metadata2& tm); future<> check_for_endpoint_collision(std::unordered_set initial_contact_nodes, const std::unordered_map& loaded_peer_features); From 11cc21d0a9fc1cefbcb4e47a6ba2c9638e66b59b Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Mon, 6 Nov 2023 16:28:20 +0400 Subject: [PATCH 42/51] erm: switch to the new token_metadata In this commit we replace token_metadata with token_metadata2 in the erm interface and field types. To accommodate the change some of strategy-related methods are also updated. All the boost and topology tests pass with this change. --- locator/abstract_replication_strategy.cc | 37 ++++++++++---------- locator/abstract_replication_strategy.hh | 24 ++++++------- locator/everywhere_replication_strategy.cc | 4 +-- locator/everywhere_replication_strategy.hh | 2 +- locator/local_strategy.cc | 2 +- locator/local_strategy.hh | 2 +- locator/network_topology_strategy.cc | 16 ++++----- locator/network_topology_strategy.hh | 6 ++-- locator/simple_strategy.cc | 2 +- locator/simple_strategy.hh | 2 +- locator/tablet_metadata_guard.hh | 4 +-- locator/tablet_replication_strategy.hh | 4 +-- locator/tablets.cc | 18 +++++----- locator/token_metadata.cc | 10 +++--- locator/token_range_splitter.hh | 2 +- locator/topology.hh | 2 +- locator/util.cc | 2 +- repair/repair.cc | 4 +-- repair/row_level.cc | 8 ++--- replica/database.cc | 4 +-- replica/table.cc | 2 +- service/storage_proxy.cc | 4 +-- service/storage_service.cc | 10 +++--- service/tablet_allocator.cc | 2 +- test/boost/network_topology_strategy_test.cc | 12 +++---- test/boost/storage_proxy_test.cc | 21 ++++++----- test/boost/token_metadata_test.cc | 2 +- tombstone_gc.cc | 2 +- 28 files changed, 106 insertions(+), 104 deletions(-) diff --git a/locator/abstract_replication_strategy.cc b/locator/abstract_replication_strategy.cc index 3fe2dfe257..c173512919 100644 --- a/locator/abstract_replication_strategy.cc +++ b/locator/abstract_replication_strategy.cc @@ -87,10 +87,10 @@ inet_address_vector_replica_set vnode_effective_replication_map::get_natural_end return natural_endpoints; } -void maybe_remove_node_being_replaced(const token_metadata& tm, +void maybe_remove_node_being_replaced(const token_metadata2& tm, const abstract_replication_strategy& rs, inet_address_vector_replica_set& natural_endpoints) { - if (tm.get_new()->is_any_node_being_replaced() && + if (tm.is_any_node_being_replaced() && rs.allow_remove_node_being_replaced_from_natural_endpoints()) { // When a new node is started to replace an existing dead node, we want // to make the replacing node take writes but do not count it for @@ -104,8 +104,8 @@ void maybe_remove_node_being_replaced(const token_metadata& tm, // as the natural_endpoints and the node will not appear in the // pending_endpoints. auto it = boost::range::remove_if(natural_endpoints, [&] (gms::inet_address& p) { - const auto host_id = tm.get_new()->get_host_id(p); - return tm.get_new()->is_being_replaced(host_id); + const auto host_id = tm.get_host_id(p); + return tm.is_being_replaced(host_id); }); natural_endpoints.erase(it, natural_endpoints.end()); } @@ -221,7 +221,7 @@ insert_token_range_to_sorted_container_while_unwrapping( dht::token_range_vector vnode_effective_replication_map::do_get_ranges(noncopyable_function consider_range_for_endpoint) const { dht::token_range_vector ret; - const auto& tm = *_tmptr->get_new(); + const auto& tm = *_tmptr; const auto& sorted_tokens = tm.sorted_tokens(); if (sorted_tokens.empty()) { on_internal_error(rslogger, "Token metadata is empty"); @@ -305,7 +305,7 @@ vnode_effective_replication_map::get_primary_ranges(inet_address ep) const { dht::token_range_vector vnode_effective_replication_map::get_primary_ranges_within_dc(inet_address ep) const { - const topology& topo = _tmptr->get_new()->get_topology(); + const topology& topo = _tmptr->get_topology(); sstring local_dc = topo.get_datacenter(ep); std::unordered_set local_dc_nodes = topo.get_datacenter_endpoints().at(local_dc); // The callback function below is called for each endpoint @@ -327,7 +327,7 @@ vnode_effective_replication_map::get_primary_ranges_within_dc(inet_address ep) c future> vnode_effective_replication_map::get_range_addresses() const { - const token_metadata& tm = *_tmptr; + const token_metadata2& tm = *_tmptr; std::unordered_map ret; for (auto& t : tm.sorted_tokens()) { dht::token_range_vector ranges = tm.get_primary_ranges_for(t); @@ -373,20 +373,19 @@ abstract_replication_strategy::get_pending_address_ranges(const token_metadata2_ static const auto default_replication_map_key = dht::token::from_int64(0); -future calculate_effective_replication_map(replication_strategy_ptr rs, token_metadata_ptr tmptr) { +future calculate_effective_replication_map(replication_strategy_ptr rs, token_metadata2_ptr tmptr) { replication_map replication_map; ring_mapping pending_endpoints; ring_mapping read_endpoints; const auto depend_on_token = rs->natural_endpoints_depend_on_token(); - auto tmpr_new = tmptr->get_new_strong(); - const auto& sorted_tokens = tmpr_new->sorted_tokens(); + const auto& sorted_tokens = tmptr->sorted_tokens(); replication_map.reserve(depend_on_token ? sorted_tokens.size() : 1); - if (const auto& topology_changes = tmpr_new->get_topology_change_info(); topology_changes) { + if (const auto& topology_changes = tmptr->get_topology_change_info(); topology_changes) { const auto& all_tokens = topology_changes->all_tokens; const auto& base_token_metadata = topology_changes->base_token_metadata ? topology_changes->base_token_metadata - : tmpr_new; - const auto& current_tokens = tmpr_new->get_token_to_endpoint(); + : tmptr; + const auto& current_tokens = tmptr->get_token_to_endpoint(); for (size_t i = 0, size = all_tokens.size(); i < size; ++i) { co_await coroutine::maybe_yield(); @@ -442,11 +441,11 @@ future calculate_effective_replicat } } else if (depend_on_token) { for (const auto &t : sorted_tokens) { - auto eps = co_await rs->calculate_natural_ips(t, tmpr_new); + auto eps = co_await rs->calculate_natural_ips(t, tmptr); replication_map.emplace(t, std::move(eps).extract_vector()); } } else { - auto eps = co_await rs->calculate_natural_ips(default_replication_map_key, tmpr_new); + auto eps = co_await rs->calculate_natural_ips(default_replication_map_key, tmptr); replication_map.emplace(default_replication_map_key, std::move(eps).extract_vector()); } @@ -479,7 +478,7 @@ const inet_address_vector_replica_set& vnode_effective_replication_map::do_get_n bool is_vnode) const { const token& key_token = _rs->natural_endpoints_depend_on_token() - ? (is_vnode ? tok : _tmptr->get_new()->first_token(tok)) + ? (is_vnode ? tok : _tmptr->first_token(tok)) : default_replication_map_key; const auto it = _replication_map.find(key_token); return it->second; @@ -513,7 +512,7 @@ vnode_effective_replication_map::~vnode_effective_replication_map() { } effective_replication_map::effective_replication_map(replication_strategy_ptr rs, - token_metadata_ptr tmptr, + token_metadata2_ptr tmptr, size_t replication_factor) noexcept : _rs(std::move(rs)) , _tmptr(std::move(tmptr)) @@ -521,11 +520,11 @@ effective_replication_map::effective_replication_map(replication_strategy_ptr rs , _validity_abort_source(std::make_unique()) { } -vnode_effective_replication_map::factory_key vnode_effective_replication_map::make_factory_key(const replication_strategy_ptr& rs, const token_metadata_ptr& tmptr) { +vnode_effective_replication_map::factory_key vnode_effective_replication_map::make_factory_key(const replication_strategy_ptr& rs, const token_metadata2_ptr& tmptr) { return factory_key(rs->get_type(), rs->get_config_options(), tmptr->get_ring_version()); } -future effective_replication_map_factory::create_effective_replication_map(replication_strategy_ptr rs, token_metadata_ptr tmptr) { +future effective_replication_map_factory::create_effective_replication_map(replication_strategy_ptr rs, token_metadata2_ptr tmptr) { // lookup key on local shard auto key = vnode_effective_replication_map::make_factory_key(rs, tmptr); auto erm = find_effective_replication_map(key); diff --git a/locator/abstract_replication_strategy.hh b/locator/abstract_replication_strategy.hh index 2e4a0eddd3..99bd927663 100644 --- a/locator/abstract_replication_strategy.hh +++ b/locator/abstract_replication_strategy.hh @@ -119,7 +119,7 @@ public: virtual void validate_options(const gms::feature_service&) const = 0; virtual std::optional> recognized_options(const topology&) const = 0; - virtual size_t get_replication_factor(const token_metadata& tm) const = 0; + virtual size_t get_replication_factor(const token_metadata2& tm) const = 0; // Decide if the replication strategy allow removing the node being // replaced from the natural endpoints when a node is being replaced in the // cluster. LocalStrategy is the not allowed to do so because it always @@ -175,17 +175,17 @@ using mutable_replication_strategy_ptr = seastar::shared_ptr _validity_abort_source; public: - effective_replication_map(replication_strategy_ptr, token_metadata_ptr, size_t replication_factor) noexcept; + effective_replication_map(replication_strategy_ptr, token_metadata2_ptr, size_t replication_factor) noexcept; effective_replication_map(effective_replication_map&&) noexcept = default; virtual ~effective_replication_map() = default; const abstract_replication_strategy& get_replication_strategy() const noexcept { return *_rs; } - const token_metadata& get_token_metadata() const noexcept { return *_tmptr; } - const token_metadata_ptr& get_token_metadata_ptr() const noexcept { return _tmptr; } + const token_metadata2& get_token_metadata() const noexcept { return *_tmptr; } + const token_metadata2_ptr& get_token_metadata_ptr() const noexcept { return _tmptr; } const topology& get_topology() const noexcept { return _tmptr->get_topology(); } size_t get_replication_factor() const noexcept { return _replication_factor; } @@ -255,7 +255,7 @@ protected: } public: virtual ~per_table_replication_strategy() = default; - virtual effective_replication_map_ptr make_replication_map(table_id, token_metadata_ptr) const = 0; + virtual effective_replication_map_ptr make_replication_map(table_id, token_metadata2_ptr) const = 0; }; // Holds the full replication_map resulting from applying the @@ -302,7 +302,7 @@ public: // effective_replication_map std::unique_ptr make_splitter() const override; const dht::sharder& get_sharder(const schema& s) const override; public: - explicit vnode_effective_replication_map(replication_strategy_ptr rs, token_metadata_ptr tmptr, replication_map replication_map, + explicit vnode_effective_replication_map(replication_strategy_ptr rs, token_metadata2_ptr tmptr, replication_map replication_map, ring_mapping pending_endpoints, ring_mapping read_endpoints, size_t replication_factor) noexcept : effective_replication_map(std::move(rs), std::move(tmptr), replication_factor) , _replication_map(std::move(replication_map)) @@ -357,7 +357,7 @@ private: const inet_address_vector_replica_set& do_get_natural_endpoints(const token& tok, bool is_vnode) const; public: - static factory_key make_factory_key(const replication_strategy_ptr& rs, const token_metadata_ptr& tmptr); + static factory_key make_factory_key(const replication_strategy_ptr& rs, const token_metadata2_ptr& tmptr); const factory_key& get_factory_key() const noexcept { return *_factory_key; @@ -382,7 +382,7 @@ using mutable_vnode_effective_replication_map_ptr = shared_ptr( std::move(rs), std::move(tmptr), std::move(replication_map), @@ -390,7 +390,7 @@ inline mutable_vnode_erm_ptr make_effective_replication_map(replication_strategy } // Apply the replication strategy over the current configuration and the given token_metadata. -future calculate_effective_replication_map(replication_strategy_ptr rs, token_metadata_ptr tmptr); +future calculate_effective_replication_map(replication_strategy_ptr rs, token_metadata2_ptr tmptr); // Class to hold a coherent view of a keyspace // effective replication map on all shards @@ -478,7 +478,7 @@ public: // vnode_effective_replication_map for the local shard. // // Therefore create should be called first on shard 0, then on all other shards. - future create_effective_replication_map(replication_strategy_ptr rs, token_metadata_ptr tmptr); + future create_effective_replication_map(replication_strategy_ptr rs, token_metadata2_ptr tmptr); future<> stop() noexcept; @@ -497,7 +497,7 @@ private: friend class vnode_effective_replication_map; }; -void maybe_remove_node_being_replaced(const token_metadata&, +void maybe_remove_node_being_replaced(const token_metadata2&, const abstract_replication_strategy&, inet_address_vector_replica_set& natural_endpoints); diff --git a/locator/everywhere_replication_strategy.cc b/locator/everywhere_replication_strategy.cc index 48a757b6da..cb25cb5b47 100644 --- a/locator/everywhere_replication_strategy.cc +++ b/locator/everywhere_replication_strategy.cc @@ -29,8 +29,8 @@ future everywhere_replication_strategy::calculate_natural_endpoints return make_ready_future(host_id_set(all_endpoints.begin(), all_endpoints.end())); } -size_t everywhere_replication_strategy::get_replication_factor(const token_metadata& tm) const { - return tm.get_new()->sorted_tokens().empty() ? 1 : tm.get_new()->count_normal_token_owners(); +size_t everywhere_replication_strategy::get_replication_factor(const token_metadata2& tm) const { + return tm.sorted_tokens().empty() ? 1 : tm.count_normal_token_owners(); } using registry = class_registrator; diff --git a/locator/everywhere_replication_strategy.hh b/locator/everywhere_replication_strategy.hh index 6e072d3fc6..f6de1823bc 100644 --- a/locator/everywhere_replication_strategy.hh +++ b/locator/everywhere_replication_strategy.hh @@ -27,7 +27,7 @@ public: return std::nullopt; } - virtual size_t get_replication_factor(const token_metadata& tm) const override; + virtual size_t get_replication_factor(const token_metadata2& tm) const override; virtual bool allow_remove_node_being_replaced_from_natural_endpoints() const override { return true; diff --git a/locator/local_strategy.cc b/locator/local_strategy.cc index d5dcbae3a1..a0539b2888 100644 --- a/locator/local_strategy.cc +++ b/locator/local_strategy.cc @@ -30,7 +30,7 @@ std::optional> local_strategy::recognized_options(co return {}; } -size_t local_strategy::get_replication_factor(const token_metadata&) const { +size_t local_strategy::get_replication_factor(const token_metadata2&) const { return 1; } diff --git a/locator/local_strategy.hh b/locator/local_strategy.hh index a93515b94d..ffaa18d493 100644 --- a/locator/local_strategy.hh +++ b/locator/local_strategy.hh @@ -25,7 +25,7 @@ class local_strategy : public abstract_replication_strategy { public: local_strategy(const replication_strategy_config_options& config_options); virtual ~local_strategy() {}; - virtual size_t get_replication_factor(const token_metadata&) const override; + virtual size_t get_replication_factor(const token_metadata2&) const override; virtual future calculate_natural_endpoints(const token& search_token, const token_metadata2& tm) const override; diff --git a/locator/network_topology_strategy.cc b/locator/network_topology_strategy.cc index ca25ff2acd..01e167f048 100644 --- a/locator/network_topology_strategy.cc +++ b/locator/network_topology_strategy.cc @@ -281,14 +281,14 @@ std::optional> network_topology_strategy::recognized return opts; } -effective_replication_map_ptr network_topology_strategy::make_replication_map(table_id table, token_metadata_ptr tm) const { +effective_replication_map_ptr network_topology_strategy::make_replication_map(table_id table, token_metadata2_ptr tm) const { if (!uses_tablets()) { on_internal_error(rslogger, format("make_replication_map() called for table {} but replication strategy not configured to use tablets", table)); } return do_make_replication_map(table, shared_from_this(), std::move(tm), _rep_factor); } -future network_topology_strategy::allocate_tablets_for_new_table(schema_ptr s, token_metadata_ptr tm) const { +future network_topology_strategy::allocate_tablets_for_new_table(schema_ptr s, token_metadata2_ptr tm) const { auto tablet_count = get_initial_tablets(); auto aligned_tablet_count = 1ul << log2ceil(tablet_count); if (tablet_count != aligned_tablet_count) { @@ -297,23 +297,23 @@ future network_topology_strategy::allocate_tablets_for_new_table(sch } tablet_map tablets(tablet_count); - load_sketch load(tm->get_new_strong()); + load_sketch load(tm); co_await load.populate(); // FIXME: Don't use tokens to distribute nodes. // The following reuses the existing token-based algorithm used by NetworkTopologyStrategy. - assert(!tm->get_new()->sorted_tokens().empty()); - auto token_range = tm->get_new()->ring_range(dht::token::get_random_token()); + assert(!tm->sorted_tokens().empty()); + auto token_range = tm->ring_range(dht::token::get_random_token()); for (tablet_id tb : tablets.tablet_ids()) { - natural_endpoints_tracker tracker(*tm->get_new(), _dc_rep_factor); + natural_endpoints_tracker tracker(*tm, _dc_rep_factor); while (true) { co_await coroutine::maybe_yield(); if (token_range.begin() == token_range.end()) { - token_range = tm->get_new()->ring_range(dht::minimum_token()); + token_range = tm->ring_range(dht::minimum_token()); } - locator::host_id ep = *tm->get_new()->get_endpoint(*token_range.begin()); + locator::host_id ep = *tm->get_endpoint(*token_range.begin()); token_range.drop_front(); if (tracker.add_endpoint_and_check_if_done(ep)) { break; diff --git a/locator/network_topology_strategy.hh b/locator/network_topology_strategy.hh index afc24bf610..3ce065e98b 100644 --- a/locator/network_topology_strategy.hh +++ b/locator/network_topology_strategy.hh @@ -25,7 +25,7 @@ public: network_topology_strategy( const replication_strategy_config_options& config_options); - virtual size_t get_replication_factor(const token_metadata&) const override { + virtual size_t get_replication_factor(const token_metadata2&) const override { return _rep_factor; } @@ -43,8 +43,8 @@ public: } public: // tablet_aware_replication_strategy - virtual effective_replication_map_ptr make_replication_map(table_id, token_metadata_ptr) const override; - virtual future allocate_tablets_for_new_table(schema_ptr, token_metadata_ptr) const override; + virtual effective_replication_map_ptr make_replication_map(table_id, token_metadata2_ptr) const override; + virtual future allocate_tablets_for_new_table(schema_ptr, token_metadata2_ptr) const override; protected: /** * calculate endpoints in one pass through the tokens by tracking our diff --git a/locator/simple_strategy.cc b/locator/simple_strategy.cc index 1cde3d6014..22b9f780a7 100644 --- a/locator/simple_strategy.cc +++ b/locator/simple_strategy.cc @@ -63,7 +63,7 @@ future simple_strategy::calculate_natural_endpoints(const token& t, co_return endpoints; } -size_t simple_strategy::get_replication_factor(const token_metadata&) const { +size_t simple_strategy::get_replication_factor(const token_metadata2&) const { return _replication_factor; } diff --git a/locator/simple_strategy.hh b/locator/simple_strategy.hh index 9385510147..cb75372048 100644 --- a/locator/simple_strategy.hh +++ b/locator/simple_strategy.hh @@ -19,7 +19,7 @@ class simple_strategy : public abstract_replication_strategy { public: simple_strategy(const replication_strategy_config_options& config_options); virtual ~simple_strategy() {}; - virtual size_t get_replication_factor(const token_metadata& tm) const override; + virtual size_t get_replication_factor(const token_metadata2& tm) const override; virtual void validate_options(const gms::feature_service&) const override; virtual std::optional> recognized_options(const topology&) const override; virtual bool allow_remove_node_being_replaced_from_natural_endpoints() const override { diff --git a/locator/tablet_metadata_guard.hh b/locator/tablet_metadata_guard.hh index 127a0ce137..ca5187d26f 100644 --- a/locator/tablet_metadata_guard.hh +++ b/locator/tablet_metadata_guard.hh @@ -45,14 +45,14 @@ public: return _abort_source; } - locator::token_metadata_ptr get_token_metadata() { + locator::token_metadata2_ptr get_token_metadata() { return _erm->get_token_metadata_ptr(); } /// Returns tablet_map for the table of the tablet associated with this guard. /// The result is valid until the next deferring point. const locator::tablet_map& get_tablet_map() { - return get_token_metadata()->get_new()->tablets().get_tablet_map(_tablet.table); + return get_token_metadata()->tablets().get_tablet_map(_tablet.table); } }; diff --git a/locator/tablet_replication_strategy.hh b/locator/tablet_replication_strategy.hh index 9403cf7c8c..03257ed0c2 100644 --- a/locator/tablet_replication_strategy.hh +++ b/locator/tablet_replication_strategy.hh @@ -38,13 +38,13 @@ protected: size_t get_initial_tablets() const { return _initial_tablets; } effective_replication_map_ptr do_make_replication_map(table_id, replication_strategy_ptr, - token_metadata_ptr, + token_metadata2_ptr, size_t replication_factor) const; public: /// Generates tablet_map for a new table. /// Runs under group0 guard. - virtual future allocate_tablets_for_new_table(schema_ptr, token_metadata_ptr) const = 0; + virtual future allocate_tablets_for_new_table(schema_ptr, token_metadata2_ptr) const = 0; }; } // namespace locator diff --git a/locator/tablets.cc b/locator/tablets.cc index 38a55852fa..2a145d75fe 100644 --- a/locator/tablets.cc +++ b/locator/tablets.cc @@ -338,21 +338,21 @@ private: inet_address_vector_replica_set result; result.reserve(replicas.size()); for (auto&& replica : replicas) { - result.emplace_back(_tmptr->get_new()->get_endpoint_for_host_id(replica.host)); + result.emplace_back(_tmptr->get_endpoint_for_host_id(replica.host)); } return result; } const tablet_map& get_tablet_map() const { - return _tmptr->get_new()->tablets().get_tablet_map(_table); + return _tmptr->tablets().get_tablet_map(_table); } public: tablet_effective_replication_map(table_id table, replication_strategy_ptr rs, - token_metadata_ptr tmptr, + token_metadata2_ptr tmptr, size_t replication_factor) : effective_replication_map(std::move(rs), std::move(tmptr), replication_factor) , _table(table) - , _sharder(*_tmptr->get_new(), table) + , _sharder(*_tmptr, table) { } virtual ~tablet_effective_replication_map() = default; @@ -399,7 +399,7 @@ public: case write_replica_set_selector::both: tablet_logger.trace("get_pending_endpoints({}): table={}, tablet={}, replica={}", search_token, _table, tablet, info->pending_replica); - return {_tmptr->get_new()->get_endpoint_for_host_id(info->pending_replica.host)}; + return {_tmptr->get_endpoint_for_host_id(info->pending_replica.host)}; case write_replica_set_selector::next: return {}; } @@ -466,7 +466,7 @@ public: } virtual bool has_pending_ranges(inet_address endpoint) const override { - const auto host_id = _tmptr->get_new()->get_host_id_if_known(endpoint); + const auto host_id = _tmptr->get_host_id_if_known(endpoint); if (!host_id.has_value()) { return false; } @@ -502,7 +502,7 @@ public: return t; } }; - return std::make_unique(_tmptr->get_new_strong(), get_tablet_map()); + return std::make_unique(_tmptr, get_tablet_map()); } const dht::sharder& get_sharder(const schema& s) const override { @@ -548,13 +548,13 @@ std::unordered_set tablet_aware_replication_strategy::recognized_tablet } effective_replication_map_ptr tablet_aware_replication_strategy::do_make_replication_map( - table_id table, replication_strategy_ptr rs, token_metadata_ptr tm, size_t replication_factor) const { + table_id table, replication_strategy_ptr rs, token_metadata2_ptr tm, size_t replication_factor) const { return seastar::make_shared(table, std::move(rs), std::move(tm), replication_factor); } void tablet_metadata_guard::check() noexcept { auto erm = _table->get_effective_replication_map(); - auto& tmap = erm->get_token_metadata_ptr()->get_new()->tablets().get_tablet_map(_tablet.table); + auto& tmap = erm->get_token_metadata_ptr()->tablets().get_tablet_map(_tablet.table); auto* trinfo = tmap.get_tablet_transition_info(_tablet.tablet); if (bool(_stage) != bool(trinfo) || (_stage && _stage != trinfo->stage)) { _abort_source.request_abort(); diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index 78a2a99c6f..e6c1e4cd1b 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -1030,13 +1030,13 @@ generic_token_metadata::ring_range(dht::ring_position_view start) const } class token_metadata_ring_splitter : public locator::token_range_splitter { - token_metadata_ptr _tmptr; - boost::iterator_range _range; + token_metadata2_ptr _tmptr; + boost::iterator_range _range; public: - token_metadata_ring_splitter(token_metadata_ptr tmptr) + token_metadata_ring_splitter(token_metadata2_ptr tmptr) : _tmptr(std::move(tmptr)) , _range(_tmptr->sorted_tokens().empty() // ring_range() throws if the ring is empty - ? boost::make_iterator_range(token_metadata::tokens_iterator(), token_metadata::tokens_iterator()) + ? boost::make_iterator_range(token_metadata2::tokens_iterator(), token_metadata2::tokens_iterator()) : _tmptr->ring_range(dht::minimum_token())) { } @@ -1054,7 +1054,7 @@ public: } }; -std::unique_ptr make_splitter(token_metadata_ptr tmptr) { +std::unique_ptr make_splitter(token_metadata2_ptr tmptr) { return std::make_unique(std::move(tmptr)); } diff --git a/locator/token_range_splitter.hh b/locator/token_range_splitter.hh index ac55d65fd1..0222f14b0b 100644 --- a/locator/token_range_splitter.hh +++ b/locator/token_range_splitter.hh @@ -40,6 +40,6 @@ public: virtual std::optional next_token() = 0; }; -std::unique_ptr make_splitter(token_metadata_ptr); +std::unique_ptr make_splitter(token_metadata2_ptr); } \ No newline at end of file diff --git a/locator/topology.hh b/locator/topology.hh index ad436af2bf..034ac92c44 100644 --- a/locator/topology.hh +++ b/locator/topology.hh @@ -326,7 +326,7 @@ public: } auto get_local_dc_filter() const noexcept { - return [ this, local_dc = get_datacenter() ] (inet_address ep) { + return [ this, local_dc = get_datacenter() ] (auto ep) { return get_datacenter(ep) == local_dc; }; }; diff --git a/locator/util.cc b/locator/util.cc index 1a16fd1313..8283cee2ad 100644 --- a/locator/util.cc +++ b/locator/util.cc @@ -54,7 +54,7 @@ get_range_to_address_map(locator::effective_replication_map_ptr erm, // Caller is responsible to hold token_metadata valid until the returned future is resolved static future> -get_tokens_in_local_dc(const locator::token_metadata& tm) { +get_tokens_in_local_dc(const locator::token_metadata2& tm) { std::vector filtered_tokens; auto local_dc_filter = tm.get_topology().get_local_dc_filter(); for (auto token : tm.sorted_tokens()) { diff --git a/repair/repair.cc b/repair/repair.cc index b60f4ce76b..6edaabd09d 100644 --- a/repair/repair.cc +++ b/repair/repair.cc @@ -221,7 +221,7 @@ static std::vector get_neighbors( dht::token tok = range.end() ? range.end()->value() : dht::maximum_token(); auto ret = erm.get_natural_endpoints(tok); if (small_table_optimization) { - auto normal_nodes = erm.get_token_metadata().get_all_endpoints(); + auto normal_nodes = erm.get_token_metadata().get_all_ips(); ret = inet_address_vector_replica_set(normal_nodes.begin(), normal_nodes.end()); } auto my_address = erm.get_topology().my_address(); @@ -1231,7 +1231,7 @@ future<> repair::user_requested_repair_task_impl::run() { bool hints_batchlog_flushed = false; std::list participants; if (_small_table_optimization) { - auto normal_nodes = germs->get().get_token_metadata().get_all_endpoints(); + auto normal_nodes = germs->get().get_token_metadata().get_all_ips(); participants = std::list(normal_nodes.begin(), normal_nodes.end()); } else { participants = get_hosts_participating_in_repair(germs->get(), keyspace, ranges, data_centers, hosts, ignore_nodes).get(); diff --git a/repair/row_level.cc b/repair/row_level.cc index 8ce475bde7..3b3a621edc 100644 --- a/repair/row_level.cc +++ b/repair/row_level.cc @@ -669,7 +669,7 @@ void flush_rows(schema_ptr s, std::list& rows, lw_shared_ptr last_dk; bool do_small_table_optimization = erm && small_table_optimization; auto* strat = do_small_table_optimization ? &erm->get_replication_strategy() : nullptr; - auto* tm = do_small_table_optimization ? &erm->get_token_metadata() : nullptr; + auto tm = do_small_table_optimization ? erm->get_token_metadata_ptr() : nullptr; auto myip = do_small_table_optimization ? erm->get_topology().my_address() : gms::inet_address(); for (auto& r : rows) { thread::maybe_yield(); @@ -679,7 +679,7 @@ void flush_rows(schema_ptr s, std::list& rows, lw_shared_ptrdk; if (do_small_table_optimization) { // Check if the token is owned by the node - auto eps = strat->calculate_natural_endpoints(dk.token(), *tm).get0(); + auto eps = strat->calculate_natural_ips(dk.token(), tm).get0(); if (!eps.contains(myip)) { rlogger.trace("master: ignore row, token={}", dk.token()); continue; @@ -1900,12 +1900,12 @@ public: } if (small_table_optimization) { auto& strat = erm.get_replication_strategy(); - auto& tm = erm.get_token_metadata(); + auto& tm = erm.get_token_metadata_ptr(); std::list tmp; for (auto& row : row_diff) { repair_row r = std::move(row); const auto& dk = r.get_dk_with_hash()->dk; - auto eps = co_await strat.calculate_natural_endpoints(dk.token(), tm); + auto eps = co_await strat.calculate_natural_ips(dk.token(), tm); if (eps.contains(remote_node)) { tmp.push_back(std::move(r)); } else { diff --git a/replica/database.cc b/replica/database.cc index a458eeb2f8..8cef7e40c4 100644 --- a/replica/database.cc +++ b/replica/database.cc @@ -998,7 +998,7 @@ future<> database::add_column_family(keyspace& ks, schema_ptr schema, column_fam auto&& rs = ks.get_replication_strategy(); locator::effective_replication_map_ptr erm; if (auto pt_rs = rs.maybe_as_per_table()) { - erm = pt_rs->make_replication_map(schema->id(), _shared_token_metadata.get()); + erm = pt_rs->make_replication_map(schema->id(), _shared_token_metadata.get()->get_new_strong()); } else { erm = ks.get_effective_replication_map(); } @@ -1313,7 +1313,7 @@ keyspace::create_replication_strategy(const locator::shared_token_metadata& stm) rslogger.debug("replication strategy for keyspace {} is {}, opts={}", _metadata->name(), _metadata->strategy_name(), _metadata->strategy_options()); if (!_replication_strategy->is_per_table()) { - auto erm = co_await _erm_factory.create_effective_replication_map(_replication_strategy, stm.get()); + auto erm = co_await _erm_factory.create_effective_replication_map(_replication_strategy, stm.get()->get_new_strong()); update_effective_replication_map(std::move(erm)); } } diff --git a/replica/table.cc b/replica/table.cc index 2da718f9fd..2488a507b7 100644 --- a/replica/table.cc +++ b/replica/table.cc @@ -569,7 +569,7 @@ private: const locator::tablet_map& tablet_map() const { // FIXME: cheaper way to retrieve tablet_map than looking up every time in tablet_metadata's map. auto& tm = erm()->get_token_metadata(); - return tm.get_new()->tablets().get_tablet_map(schema()->id()); + return tm.tablets().get_tablet_map(schema()->id()); } public: tablet_compaction_group_manager(replica::table& t) : _t(t) {} diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc index ee038d3b3d..a1469aeff2 100644 --- a/service/storage_proxy.cc +++ b/service/storage_proxy.cc @@ -2286,7 +2286,7 @@ bool paxos_response_handler::learned(gms::inet_address ep) { } static inet_address_vector_replica_set -replica_ids_to_endpoints(const locator::token_metadata& tm, const std::vector& replica_ids) { +replica_ids_to_endpoints(const locator::token_metadata2& tm, const std::vector& replica_ids) { inet_address_vector_replica_set endpoints; endpoints.reserve(replica_ids.size()); @@ -2300,7 +2300,7 @@ replica_ids_to_endpoints(const locator::token_metadata& tm, const std::vector -endpoints_to_replica_ids(const locator::token_metadata& tm, const inet_address_vector_replica_set& endpoints) { +endpoints_to_replica_ids(const locator::token_metadata2& tm, const inet_address_vector_replica_set& endpoints) { std::vector replica_ids; replica_ids.reserve(endpoints.size()); diff --git a/service/storage_service.cc b/service/storage_service.cc index 7c20d468e1..2292166b8f 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -4389,7 +4389,7 @@ future<> storage_service::replicate_to_all_cores(mutable_token_metadata_ptr tmpt if (rs->is_per_table()) { continue; } - auto erm = co_await get_erm_factory().create_effective_replication_map(rs, tmptr); + auto erm = co_await get_erm_factory().create_effective_replication_map(rs, tmptr->get_new_strong()); pending_effective_replication_maps[base_shard].emplace(ks_name, std::move(erm)); } co_await container().invoke_on_others([&] (storage_service& ss) -> future<> { @@ -4400,7 +4400,7 @@ future<> storage_service::replicate_to_all_cores(mutable_token_metadata_ptr tmpt continue; } auto tmptr = pending_token_metadata_ptr[this_shard_id()]; - auto erm = co_await ss.get_erm_factory().create_effective_replication_map(rs, std::move(tmptr)); + auto erm = co_await ss.get_erm_factory().create_effective_replication_map(rs, tmptr->get_new_strong()); pending_effective_replication_maps[this_shard_id()].emplace(ks_name, std::move(erm)); } }); @@ -4412,7 +4412,7 @@ future<> storage_service::replicate_to_all_cores(mutable_token_metadata_ptr tmpt auto rs = db.find_keyspace(table->schema()->keypace_name()).get_replication_strategy_ptr(); locator::effective_replication_map_ptr erm; if (auto pt_rs = rs->maybe_as_per_table()) { - erm = pt_rs->make_replication_map(id, tmptr); + erm = pt_rs->make_replication_map(id, tmptr->get_new_strong()); } else { erm = pending_effective_replication_maps[this_shard_id()][table->schema()->keypace_name()]; } @@ -6814,8 +6814,8 @@ future<> storage_service::stream_tablet(locator::global_tablet_id tablet) { auto& table = _db.local().find_column_family(tablet.table); std::vector tables = {table.schema()->cf_name()}; - auto streamer = make_lw_shared(_db, _stream_manager, tm->get_new_strong(), guard.get_abort_source(), - tm->get_new()->get_my_id(), _snitch.local()->get_location(), + auto streamer = make_lw_shared(_db, _stream_manager, tm, guard.get_abort_source(), + tm->get_my_id(), _snitch.local()->get_location(), "Tablet migration", streaming::stream_reason::tablet_migration, topo_guard, std::move(tables)); streamer->add_source_filter(std::make_unique( _gossiper.get_unreachable_members())); diff --git a/service/tablet_allocator.cc b/service/tablet_allocator.cc index e66f9127c9..89bdac9320 100644 --- a/service/tablet_allocator.cc +++ b/service/tablet_allocator.cc @@ -828,7 +828,7 @@ public: auto rs = abstract_replication_strategy::create_replication_strategy(ksm.strategy_name(), ksm.strategy_options()); if (auto&& tablet_rs = rs->maybe_as_tablet_aware()) { auto tm = _db.get_shared_token_metadata().get(); - auto map = tablet_rs->allocate_tablets_for_new_table(s.shared_from_this(), tm).get0(); + auto map = tablet_rs->allocate_tablets_for_new_table(s.shared_from_this(), tm->get_new_strong()).get0(); muts.emplace_back(tablet_map_to_mutation(map, s.id(), s.keypace_name(), s.cf_name(), ts).get0()); } } diff --git a/test/boost/network_topology_strategy_test.cc b/test/boost/network_topology_strategy_test.cc index 97e70f8fb1..5ab8bab42f 100644 --- a/test/boost/network_topology_strategy_test.cc +++ b/test/boost/network_topology_strategy_test.cc @@ -90,7 +90,7 @@ void strategy_sanity_check( total_rf += rf; } - BOOST_CHECK(ars_ptr->get_replication_factor(token_metadata(tm)) == total_rf); + BOOST_CHECK(ars_ptr->get_replication_factor(*tm) == total_rf); } void endpoints_check( @@ -111,7 +111,7 @@ void endpoints_check( // Check the total RF BOOST_CHECK(endpoints.size() == total_rf); - BOOST_CHECK(total_rf <= ars_ptr->get_replication_factor(token_metadata(tm))); + BOOST_CHECK(total_rf <= ars_ptr->get_replication_factor(*tm)); // Check the uniqueness std::unordered_set ep_set(endpoints.begin(), endpoints.end()); @@ -161,7 +161,7 @@ void full_ring_check(const std::vector& ring_points, const auto& topo = tm.get_topology(); strategy_sanity_check(ars_ptr, tmptr, options); - auto erm = calculate_effective_replication_map(ars_ptr, make_token_metadata_ptr(tmptr)).get0(); + auto erm = calculate_effective_replication_map(ars_ptr, tmptr).get0(); for (auto& rp : ring_points) { double cur_point1 = rp.point - 0.5; @@ -444,7 +444,7 @@ SEASTAR_THREAD_TEST_CASE(NetworkTopologyStrategy_tablets_test) { .with_column("v", utf8_type) .build(); - auto tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, stm.get()).get0(); + auto tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, stm.get()->get_new_strong()).get0(); full_ring_check(tmap, options323, ars_ptr, stm.get()->get_new_strong()); /////////////// @@ -461,7 +461,7 @@ SEASTAR_THREAD_TEST_CASE(NetworkTopologyStrategy_tablets_test) { tab_awr_ptr = ars_ptr->maybe_as_tablet_aware(); BOOST_REQUIRE(tab_awr_ptr); - tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, stm.get()).get0(); + tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, stm.get()->get_new_strong()).get0(); full_ring_check(tmap, options320, ars_ptr, stm.get()->get_new_strong()); // Test the case of not enough nodes to meet RF in DC 102 @@ -477,7 +477,7 @@ SEASTAR_THREAD_TEST_CASE(NetworkTopologyStrategy_tablets_test) { tab_awr_ptr = ars_ptr->maybe_as_tablet_aware(); BOOST_REQUIRE(tab_awr_ptr); - tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, stm.get()).get0(); + tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, stm.get()->get_new_strong()).get0(); full_ring_check(tmap, options324, ars_ptr, stm.get()->get_new_strong()); } diff --git a/test/boost/storage_proxy_test.cc b/test/boost/storage_proxy_test.cc index 1a40d21938..d0d2bb94ad 100644 --- a/test/boost/storage_proxy_test.cc +++ b/test/boost/storage_proxy_test.cc @@ -41,7 +41,7 @@ SEASTAR_TEST_CASE(test_get_restricted_ranges) { std::vector ring = make_ring(s, 10); - auto check = [&s](locator::token_metadata_ptr tmptr, dht::partition_range input, + auto check = [&s](locator::token_metadata2_ptr tmptr, dht::partition_range input, dht::partition_range_vector expected) { query_ranges_to_vnodes_generator ranges_to_vnodes(locator::make_splitter(tmptr), s, {input}); auto actual = ranges_to_vnodes(1000); @@ -54,9 +54,10 @@ SEASTAR_TEST_CASE(test_get_restricted_ranges) { { // Ring with minimum token - auto tmptr = locator::make_token_metadata_ptr(locator::token_metadata::config{}); - tmptr->update_topology(gms::inet_address("10.0.0.1"), locator::endpoint_dc_rack{"dc1", "rack1"}); - tmptr->update_normal_tokens(std::unordered_set({dht::minimum_token()}), gms::inet_address("10.0.0.1")).get(); + auto tmptr = locator::make_token_metadata2_ptr(locator::token_metadata::config{}); + const auto host_id = locator::host_id{utils::UUID(0, 1)}; + tmptr->update_topology(host_id, locator::endpoint_dc_rack{"dc1", "rack1"}); + tmptr->update_normal_tokens(std::unordered_set({dht::minimum_token()}), host_id).get(); check(tmptr, dht::partition_range::make_singular(ring[0]), { dht::partition_range::make_singular(ring[0]) @@ -68,11 +69,13 @@ SEASTAR_TEST_CASE(test_get_restricted_ranges) { } { - auto tmptr = locator::make_token_metadata_ptr(locator::token_metadata::config{}); - tmptr->update_topology(gms::inet_address("10.0.0.1"), locator::endpoint_dc_rack{"dc1", "rack1"}); - tmptr->update_normal_tokens(std::unordered_set({ring[2].token()}), gms::inet_address("10.0.0.1")).get(); - tmptr->update_topology(gms::inet_address("10.0.0.2"), locator::endpoint_dc_rack{"dc1", "rack1"}); - tmptr->update_normal_tokens(std::unordered_set({ring[5].token()}), gms::inet_address("10.0.0.2")).get(); + auto tmptr = locator::make_token_metadata2_ptr(locator::token_metadata::config{}); + const auto id1 = locator::host_id{utils::UUID(0, 1)}; + const auto id2 = locator::host_id{utils::UUID(0, 2)}; + tmptr->update_topology(id1, locator::endpoint_dc_rack{"dc1", "rack1"}); + tmptr->update_normal_tokens(std::unordered_set({ring[2].token()}), id1).get(); + tmptr->update_topology(id2, locator::endpoint_dc_rack{"dc1", "rack1"}); + tmptr->update_normal_tokens(std::unordered_set({ring[5].token()}), id2).get(); check(tmptr, dht::partition_range::make_singular(ring[0]), { dht::partition_range::make_singular(ring[0]) diff --git a/test/boost/token_metadata_test.cc b/test/boost/token_metadata_test.cc index 8aed089f80..5f5454abf1 100644 --- a/test/boost/token_metadata_test.cc +++ b/test/boost/token_metadata_test.cc @@ -42,7 +42,7 @@ namespace { dc_rack_fn get_dc_rack_fn = get_dc_rack; tmptr->update_topology_change_info(get_dc_rack_fn).get(); auto strategy = seastar::make_shared(std::move(opts)); - return calculate_effective_replication_map(std::move(strategy), make_token_metadata_ptr(tmptr)).get0(); + return calculate_effective_replication_map(std::move(strategy), tmptr).get0(); } } diff --git a/tombstone_gc.cc b/tombstone_gc.cc index 2bed1374cd..93744f60d9 100644 --- a/tombstone_gc.cc +++ b/tombstone_gc.cc @@ -181,7 +181,7 @@ static bool needs_repair_before_gc(const replica::database& db, sstring ks_name) auto& ks = db.find_keyspace(ks_name); auto& rs = ks.get_replication_strategy(); bool needs_repair = rs.get_type() != locator::replication_strategy_type::local - && rs.get_replication_factor(db.get_token_metadata()) != 1; + && rs.get_replication_factor(*db.get_token_metadata().get_new()) != 1; return needs_repair; } From e50dbef3e288672774be06656608791872fd1f03 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Mon, 6 Nov 2023 18:23:02 +0400 Subject: [PATCH 43/51] database: get_token_metadata -> new token_metadata database::get_token_metadata() is switched to token_metadata2. get_all_ips method is added to the host_id-based token_metadata, since its convenient and will be used in several places. It returns all current nodes converted to inet_address by means of the topology contained within token_metadata. hint_sender::can_send: if the node has already left the cluster we may not find its host_id. This case is handled in the same way as if it's not a normal token owner - we simply send a hint to all replicas. --- db/hints/internal/hint_sender.cc | 4 +++- db/view/view.cc | 4 ++-- db/view/view_update_checks.hh | 2 +- repair/repair.cc | 2 +- replica/database.hh | 2 +- tombstone_gc.cc | 2 +- 6 files changed, 9 insertions(+), 7 deletions(-) diff --git a/db/hints/internal/hint_sender.cc b/db/hints/internal/hint_sender.cc index abd2b9c690..561a3a5677 100644 --- a/db/hints/internal/hint_sender.cc +++ b/db/hints/internal/hint_sender.cc @@ -101,7 +101,9 @@ bool hint_sender::can_send() noexcept { return true; } else { if (!_state.contains(state::ep_state_left_the_ring)) { - _state.set_if(!_shard_manager.local_db().get_token_metadata().is_normal_token_owner(end_point_key())); + const auto& tm = _shard_manager.local_db().get_token_metadata(); + const auto host_id = tm.get_host_id_if_known(end_point_key()); + _state.set_if(!host_id || !tm.is_normal_token_owner(*host_id)); } // send the hints out if the destination Node is part of the ring - we will send to all new replicas in this case return _state.contains(state::ep_state_left_the_ring); diff --git a/db/view/view.cc b/db/view/view.cc index 726974fadd..f2df589afe 100644 --- a/db/view/view.cc +++ b/db/view/view.cc @@ -2573,7 +2573,7 @@ update_backlog node_update_backlog::add_fetch(unsigned shard, update_backlog bac return std::max(backlog, _max.load(std::memory_order_relaxed)); } -future check_view_build_ongoing(db::system_distributed_keyspace& sys_dist_ks, const locator::token_metadata& tm, const sstring& ks_name, +future check_view_build_ongoing(db::system_distributed_keyspace& sys_dist_ks, const locator::token_metadata2& tm, const sstring& ks_name, const sstring& cf_name) { using view_statuses_type = std::unordered_map; return sys_dist_ks.view_status(ks_name, cf_name).then([&tm] (view_statuses_type&& view_statuses) { @@ -2584,7 +2584,7 @@ future check_view_build_ongoing(db::system_distributed_keyspace& sys_dist_ }); } -future check_needs_view_update_path(db::system_distributed_keyspace& sys_dist_ks, const locator::token_metadata& tm, const replica::table& t, +future check_needs_view_update_path(db::system_distributed_keyspace& sys_dist_ks, const locator::token_metadata2& tm, const replica::table& t, streaming::stream_reason reason) { if (is_internal_keyspace(t.schema()->ks_name())) { return make_ready_future(false); diff --git a/db/view/view_update_checks.hh b/db/view/view_update_checks.hh index ccb3eacafa..6ee8e9b173 100644 --- a/db/view/view_update_checks.hh +++ b/db/view/view_update_checks.hh @@ -32,7 +32,7 @@ using token_metadata2 = generic_token_metadata; namespace db::view { -future check_needs_view_update_path(db::system_distributed_keyspace& sys_dist_ks, const locator::token_metadata& tm, const replica::table& t, +future check_needs_view_update_path(db::system_distributed_keyspace& sys_dist_ks, const locator::token_metadata2& tm, const replica::table& t, streaming::stream_reason reason); } diff --git a/repair/repair.cc b/repair/repair.cc index 6edaabd09d..f59eb4f90d 100644 --- a/repair/repair.cc +++ b/repair/repair.cc @@ -1237,7 +1237,7 @@ future<> repair::user_requested_repair_task_impl::run() { participants = get_hosts_participating_in_repair(germs->get(), keyspace, ranges, data_centers, hosts, ignore_nodes).get(); } if (needs_flush_before_repair) { - auto waiting_nodes = db.get_token_metadata().get_all_endpoints(); + auto waiting_nodes = db.get_token_metadata().get_all_ips(); std::erase_if(waiting_nodes, [&] (const auto& addr) { return ignore_nodes.contains(addr); }); diff --git a/replica/database.hh b/replica/database.hh index bff06cbd54..da6827b76a 100644 --- a/replica/database.hh +++ b/replica/database.hh @@ -1561,7 +1561,7 @@ public: } const locator::shared_token_metadata& get_shared_token_metadata() const { return _shared_token_metadata; } - const locator::token_metadata& get_token_metadata() const { return *_shared_token_metadata.get(); } + const locator::token_metadata2& get_token_metadata() const { return *_shared_token_metadata.get()->get_new(); } wasm::manager& wasm() noexcept { return _wasm; } const wasm::manager& wasm() const noexcept { return _wasm; } diff --git a/tombstone_gc.cc b/tombstone_gc.cc index 93744f60d9..2bed1374cd 100644 --- a/tombstone_gc.cc +++ b/tombstone_gc.cc @@ -181,7 +181,7 @@ static bool needs_repair_before_gc(const replica::database& db, sstring ks_name) auto& ks = db.find_keyspace(ks_name); auto& rs = ks.get_replication_strategy(); bool needs_repair = rs.get_type() != locator::replication_strategy_type::local - && rs.get_replication_factor(*db.get_token_metadata().get_new()) != 1; + && rs.get_replication_factor(db.get_token_metadata()) != 1; return needs_repair; } From c7314aa8e25926b3162c1b9c5e68fad846a9c92b Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Mon, 6 Nov 2023 23:31:12 +0400 Subject: [PATCH 44/51] gossiper: use new token_metadata --- gms/gossiper.cc | 27 +++++++++++++++------------ gms/gossiper.hh | 2 +- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/gms/gossiper.cc b/gms/gossiper.cc index 14482a8b4f..9de00489bd 100644 --- a/gms/gossiper.cc +++ b/gms/gossiper.cc @@ -755,8 +755,9 @@ future<> gossiper::do_status_check() { // check for dead state removal auto expire_time = get_expire_time_for_endpoint(endpoint); + const auto host_id = get_host_id(endpoint); if (!is_alive && (now > expire_time) - && (!get_token_metadata_ptr()->is_normal_token_owner(endpoint))) { + && (!get_token_metadata_ptr()->is_normal_token_owner(host_id))) { logger.debug("time is expiring for endpoint : {} ({})", endpoint, expire_time.time_since_epoch().count()); co_await evict_from_membership(endpoint, pid); } @@ -1138,7 +1139,7 @@ std::set gossiper::get_live_members() const { std::set gossiper::get_live_token_owners() const { std::set token_owners; - auto normal_token_owners = get_token_metadata_ptr()->get_all_endpoints(); + auto normal_token_owners = get_token_metadata_ptr()->get_all_ips(); for (auto& node: normal_token_owners) { if (is_alive(node)) { token_owners.insert(node); @@ -1149,7 +1150,7 @@ std::set gossiper::get_live_token_owners() const { std::set gossiper::get_unreachable_token_owners() const { std::set token_owners; - auto normal_token_owners = get_token_metadata_ptr()->get_all_endpoints(); + auto normal_token_owners = get_token_metadata_ptr()->get_all_ips(); for (auto& node: normal_token_owners) { if (!is_alive(node)) { token_owners.insert(node); @@ -1306,7 +1307,8 @@ future<> gossiper::assassinate_endpoint(sstring address) { std::vector tokens; logger.warn("Assassinating {} via gossip", endpoint); if (es) { - tokens = gossiper.get_token_metadata_ptr()->get_tokens(endpoint); + const auto host_id = gossiper.get_host_id(endpoint); + tokens = gossiper.get_token_metadata_ptr()->get_tokens(host_id); if (tokens.empty()) { logger.warn("Unable to calculate tokens for {}. Will use a random one", address); throw std::runtime_error(format("Unable to calculate tokens for {}", endpoint)); @@ -1391,7 +1393,8 @@ bool gossiper::is_gossip_only_member(inet_address endpoint) const { if (!es) { return false; } - return !is_dead_state(*es) && !get_token_metadata_ptr()->is_normal_token_owner(endpoint); + const auto host_id = get_host_id(endpoint); + return !is_dead_state(*es) && !get_token_metadata_ptr()->is_normal_token_owner(host_id); } clk::time_point gossiper::get_expire_time_for_endpoint(inet_address endpoint) const noexcept { @@ -2088,14 +2091,14 @@ future<> gossiper::add_saved_endpoint(inet_address ep) { ep_state.set_heart_beat_state_and_update_timestamp(heart_beat_state()); } const auto tmptr = get_token_metadata_ptr(); - auto tokens = tmptr->get_tokens(ep); - if (!tokens.empty()) { - std::unordered_set tokens_set(tokens.begin(), tokens.end()); - ep_state.add_application_state(gms::application_state::TOKENS, versioned_value::tokens(tokens_set)); - } auto host_id = tmptr->get_host_id_if_known(ep); if (host_id) { ep_state.add_application_state(gms::application_state::HOST_ID, versioned_value::host_id(host_id.value())); + auto tokens = tmptr->get_tokens(*host_id); + if (!tokens.empty()) { + std::unordered_set tokens_set(tokens.begin(), tokens.end()); + ep_state.add_application_state(gms::application_state::TOKENS, versioned_value::tokens(tokens_set)); + } } auto generation = ep_state.get_heart_beat_state().get_generation(); co_await replicate(ep, std::move(ep_state), permit.id()); @@ -2631,8 +2634,8 @@ void gossiper::append_endpoint_state(std::stringstream& ss, const endpoint_state } } -locator::token_metadata_ptr gossiper::get_token_metadata_ptr() const noexcept { - return _shared_token_metadata.get(); +locator::token_metadata2_ptr gossiper::get_token_metadata_ptr() const noexcept { + return _shared_token_metadata.get()->get_new_strong(); } } // namespace gms diff --git a/gms/gossiper.hh b/gms/gossiper.hh index dee8d910e9..6d6fedd670 100644 --- a/gms/gossiper.hh +++ b/gms/gossiper.hh @@ -674,7 +674,7 @@ private: gossip_config _gcfg; // Get features supported by a particular node std::set get_supported_features(inet_address endpoint) const; - locator::token_metadata_ptr get_token_metadata_ptr() const noexcept; + locator::token_metadata2_ptr get_token_metadata_ptr() const noexcept; public: void check_knows_remote_features(std::set& local_features, const std::unordered_map& loaded_peer_features) const; // Get features supported by all the nodes this node knows about From 799f747c8fb657c5ab500aff27a36a21d913fab5 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Tue, 7 Nov 2023 11:01:14 +0400 Subject: [PATCH 45/51] shared_token_metadata: switch to the new token_metadata --- api/token_metadata.cc | 16 +- cdc/generation.cc | 8 +- cql3/statements/create_keyspace_statement.cc | 2 +- cql3/statements/create_keyspace_statement.hh | 2 +- cql3/statements/ks_prop_defs.cc | 6 +- cql3/statements/ks_prop_defs.hh | 4 +- gms/gossiper.cc | 2 +- locator/token_metadata.cc | 14 +- locator/token_metadata.hh | 20 +- main.cc | 4 +- replica/database.cc | 4 +- replica/database.hh | 2 +- service/forward_service.cc | 2 +- service/forward_service.hh | 2 +- service/migration_manager.cc | 3 +- service/storage_proxy.cc | 2 +- service/storage_proxy.hh | 2 +- service/storage_service.cc | 231 ++++++++----------- service/storage_service.hh | 16 +- service/tablet_allocator.cc | 2 +- test/boost/locator_topology_test.cc | 22 +- test/boost/network_topology_strategy_test.cc | 122 +++++----- test/boost/tablets_test.cc | 200 ++++++++-------- test/lib/cql_test_env.cc | 9 +- 24 files changed, 319 insertions(+), 378 deletions(-) diff --git a/api/token_metadata.cc b/api/token_metadata.cc index 199a949a7e..58442def02 100644 --- a/api/token_metadata.cc +++ b/api/token_metadata.cc @@ -20,19 +20,19 @@ using namespace json; void set_token_metadata(http_context& ctx, routes& r, sharded& tm) { ss::local_hostid.set(r, [&tm](std::unique_ptr req) { - auto id = tm.local().get()->get_new()->get_my_id(); + auto id = tm.local().get()->get_my_id(); return make_ready_future(id.to_sstring()); }); ss::get_tokens.set(r, [&tm] (std::unique_ptr req) { - return make_ready_future(stream_range_as_array(tm.local().get()->get_new()->sorted_tokens(), [](const dht::token& i) { + return make_ready_future(stream_range_as_array(tm.local().get()->sorted_tokens(), [](const dht::token& i) { return fmt::to_string(i); })); }); ss::get_node_tokens.set(r, [&tm] (std::unique_ptr req) { gms::inet_address addr(req->param["endpoint"]); - auto& local_tm = *tm.local().get()->get_new(); + auto& local_tm = *tm.local().get(); const auto host_id = local_tm.get_host_id_if_known(addr); return make_ready_future(stream_range_as_array(host_id ? local_tm.get_tokens(*host_id): std::vector{}, [](const dht::token& i) { return fmt::to_string(i); @@ -40,7 +40,7 @@ void set_token_metadata(http_context& ctx, routes& r, shardedget_new(); + const auto& local_tm = *tm.local().get(); const auto& leaving_host_ids = local_tm.get_leaving_endpoints(); std::unordered_set eps; eps.reserve(leaving_host_ids.size()); @@ -56,7 +56,7 @@ void set_token_metadata(http_context& ctx, routes& r, shardedget_new(); + const auto& local_tm = *tm.local().get(); const auto& points = local_tm.get_bootstrap_tokens(); std::unordered_set eps; eps.reserve(points.size()); @@ -68,7 +68,7 @@ void set_token_metadata(http_context& ctx, routes& r, sharded res; - return map_to_key_value(tm.local().get()->get_new()->get_endpoint_to_host_id_map_for_reading(), res); + return map_to_key_value(tm.local().get()->get_endpoint_to_host_id_map_for_reading(), res); }); static auto host_or_broadcast = [&tm](const_req req) { @@ -77,7 +77,7 @@ void set_token_metadata(http_context& ctx, routes& r, shardedget_new()->get_topology(); + auto& topology = tm.local().get()->get_topology(); auto ep = host_or_broadcast(req); if (!topology.has_endpoint(ep)) { // Cannot return error here, nodetool status can race, request @@ -88,7 +88,7 @@ void set_token_metadata(http_context& ctx, routes& r, shardedget_new()->get_topology(); + auto& topology = tm.local().get()->get_topology(); auto ep = host_or_broadcast(req); if (!topology.has_endpoint(ep)) { // Cannot return error here, nodetool status can race, request diff --git a/cdc/generation.cc b/cdc/generation.cc index 76a6552724..2946872a4e 100644 --- a/cdc/generation.cc +++ b/cdc/generation.cc @@ -378,7 +378,7 @@ db_clock::time_point new_generation_timestamp(bool add_delay, std::chrono::milli } future generation_service::legacy_make_new_generation(const std::unordered_set& bootstrap_tokens, bool add_delay) { - const locator::token_metadata2_ptr tmptr = _token_metadata.get()->get_new_strong(); + const locator::token_metadata2_ptr tmptr = _token_metadata.get(); // Fetch sharding parameters for a node that owns vnode ending with this token // using gossiped application states. @@ -846,7 +846,7 @@ future<> generation_service::check_and_repair_cdc_streams() { } }); - auto tmptr = _token_metadata.get()->get_new_strong(); + auto tmptr = _token_metadata.get(); auto sys_dist_ks = get_sys_dist_ks(); bool should_regenerate = false; @@ -988,7 +988,7 @@ future<> generation_service::legacy_handle_cdc_generation(std::optionalget_new_strong()] { return tmptr->count_normal_token_owners(); }, + [tmptr = _token_metadata.get()] { return tmptr->count_normal_token_owners(); }, _abort_src); } } @@ -1005,7 +1005,7 @@ void generation_service::legacy_async_handle_cdc_generation(cdc::generation_id g if (using_this_gen) { cdc_log.info("Starting to use generation {}", gen_id); co_await update_streams_description(gen_id, svc->get_sys_dist_ks(), - [tmptr = svc->_token_metadata.get()->get_new_strong()] { return tmptr->count_normal_token_owners(); }, + [tmptr = svc->_token_metadata.get()] { return tmptr->count_normal_token_owners(); }, svc->_abort_src); } co_return; diff --git a/cql3/statements/create_keyspace_statement.cc b/cql3/statements/create_keyspace_statement.cc index 3f5260cda6..eb89dd354e 100644 --- a/cql3/statements/create_keyspace_statement.cc +++ b/cql3/statements/create_keyspace_statement.cc @@ -253,7 +253,7 @@ create_keyspace_statement::execute(query_processor& qp, service::query_state& st }); } -lw_shared_ptr create_keyspace_statement::get_keyspace_metadata(const locator::token_metadata& tm) { +lw_shared_ptr create_keyspace_statement::get_keyspace_metadata(const locator::token_metadata2& tm) { _attrs->validate(); return _attrs->as_ks_metadata(_name, tm); } diff --git a/cql3/statements/create_keyspace_statement.hh b/cql3/statements/create_keyspace_statement.hh index 2aff726ad5..533aeeab2c 100644 --- a/cql3/statements/create_keyspace_statement.hh +++ b/cql3/statements/create_keyspace_statement.hh @@ -76,7 +76,7 @@ public: virtual future<::shared_ptr> execute(query_processor& qp, service::query_state& state, const query_options& options, std::optional guard) const override; - lw_shared_ptr get_keyspace_metadata(const locator::token_metadata& tm); + lw_shared_ptr get_keyspace_metadata(const locator::token_metadata2& tm); }; std::vector check_against_restricted_replication_strategies( diff --git a/cql3/statements/ks_prop_defs.cc b/cql3/statements/ks_prop_defs.cc index 0739f7a9f8..7120852a45 100644 --- a/cql3/statements/ks_prop_defs.cc +++ b/cql3/statements/ks_prop_defs.cc @@ -20,7 +20,7 @@ namespace statements { static std::map prepare_options( const sstring& strategy_class, - const locator::token_metadata& tm, + const locator::token_metadata2& tm, std::map options, const std::map& old_options = {}) { options.erase(ks_prop_defs::REPLICATION_STRATEGY_CLASS_KEY); @@ -111,13 +111,13 @@ std::optional ks_prop_defs::get_replication_strategy_class() const { return _strategy_class; } -lw_shared_ptr ks_prop_defs::as_ks_metadata(sstring ks_name, const locator::token_metadata& tm) { +lw_shared_ptr ks_prop_defs::as_ks_metadata(sstring ks_name, const locator::token_metadata2& tm) { auto sc = get_replication_strategy_class().value(); return data_dictionary::keyspace_metadata::new_keyspace(ks_name, sc, prepare_options(sc, tm, get_replication_options()), get_boolean(KW_DURABLE_WRITES, true), std::vector{}, get_storage_options()); } -lw_shared_ptr ks_prop_defs::as_ks_metadata_update(lw_shared_ptr old, const locator::token_metadata& tm) { +lw_shared_ptr ks_prop_defs::as_ks_metadata_update(lw_shared_ptr old, const locator::token_metadata2& tm) { std::map options; const auto& old_options = old->strategy_options(); auto sc = get_replication_strategy_class(); diff --git a/cql3/statements/ks_prop_defs.hh b/cql3/statements/ks_prop_defs.hh index 11182df218..8459997dd0 100644 --- a/cql3/statements/ks_prop_defs.hh +++ b/cql3/statements/ks_prop_defs.hh @@ -54,8 +54,8 @@ public: std::map get_replication_options() const; std::optional get_replication_strategy_class() const; data_dictionary::storage_options get_storage_options() const; - lw_shared_ptr as_ks_metadata(sstring ks_name, const locator::token_metadata&); - lw_shared_ptr as_ks_metadata_update(lw_shared_ptr old, const locator::token_metadata&); + lw_shared_ptr as_ks_metadata(sstring ks_name, const locator::token_metadata2&); + lw_shared_ptr as_ks_metadata_update(lw_shared_ptr old, const locator::token_metadata2&); #if 0 public KSMetaData asKSMetadataUpdate(KSMetaData old) throws RequestValidationException diff --git a/gms/gossiper.cc b/gms/gossiper.cc index 9de00489bd..f404b04d28 100644 --- a/gms/gossiper.cc +++ b/gms/gossiper.cc @@ -2635,7 +2635,7 @@ void gossiper::append_endpoint_state(std::stringstream& ss, const endpoint_state } locator::token_metadata2_ptr gossiper::get_token_metadata_ptr() const noexcept { - return _shared_token_metadata.get()->get_new_strong(); + return _shared_token_metadata.get(); } } // namespace gms diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index e6c1e4cd1b..320ca7ae4b 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -1336,7 +1336,7 @@ generic_token_metadata::set_version_tracker(version_tracker_t tracker) { _impl->set_version_tracker(std::move(tracker)); } -void shared_token_metadata::set(mutable_token_metadata_ptr tmptr) noexcept { +void shared_token_metadata::set(mutable_token_metadata2_ptr tmptr) noexcept { if (_shared->get_ring_version() >= tmptr->get_ring_version()) { on_internal_error(tlogger, format("shared_token_metadata: must not set non-increasing ring_version: {} -> {}", _shared->get_ring_version(), tmptr->get_ring_version())); } @@ -1375,7 +1375,7 @@ void shared_token_metadata::update_fence_version(token_metadata::version_t versi tlogger.debug("new fence_version is set, version {}", _fence_version); } -future<> shared_token_metadata::mutate_token_metadata(seastar::noncopyable_function (token_metadata&)> func) { +future<> shared_token_metadata::mutate_token_metadata(seastar::noncopyable_function (token_metadata2&)> func) { auto lk = co_await get_lock(); auto tm = co_await _shared->clone_async(); // bump the token_metadata ring_version @@ -1383,17 +1383,17 @@ future<> shared_token_metadata::mutate_token_metadata(seastar::noncopyable_funct // when the modified token_metadata is committed. tm.invalidate_cached_rings(); co_await func(tm); - set(make_token_metadata_ptr(std::move(tm))); + set(make_token_metadata2_ptr(std::move(tm))); } -future<> shared_token_metadata::mutate_on_all_shards(sharded& stm, seastar::noncopyable_function (token_metadata&)> func) { +future<> shared_token_metadata::mutate_on_all_shards(sharded& stm, seastar::noncopyable_function (token_metadata2&)> func) { auto base_shard = this_shard_id(); assert(base_shard == 0); auto lk = co_await stm.local().get_lock(); - std::vector pending_token_metadata_ptr; + std::vector pending_token_metadata_ptr; pending_token_metadata_ptr.resize(smp::count); - auto tmptr = make_token_metadata_ptr(co_await stm.local().get()->clone_async()); + auto tmptr = make_token_metadata2_ptr(co_await stm.local().get()->clone_async()); auto& tm = *tmptr; // bump the token_metadata ring_version // to invalidate cached token/replication mappings @@ -1404,7 +1404,7 @@ future<> shared_token_metadata::mutate_on_all_shards(sharded future<> { - pending_token_metadata_ptr[this_shard_id()] = make_token_metadata_ptr(co_await tm.clone_async()); + pending_token_metadata_ptr[this_shard_id()] = make_token_metadata2_ptr(co_await tm.clone_async()); }); co_await stm.invoke_on_all([&] (shared_token_metadata& stm) { diff --git a/locator/token_metadata.hh b/locator/token_metadata.hh index d8113f19e7..45752a6f21 100644 --- a/locator/token_metadata.hh +++ b/locator/token_metadata.hh @@ -373,7 +373,7 @@ mutable_token_metadata2_ptr make_token_metadata2_ptr(Args... args) { } class shared_token_metadata { - mutable_token_metadata_ptr _shared; + mutable_token_metadata2_ptr _shared; token_metadata_lock_func _lock_func; // We use this barrier during the transition to a new token_metadata version to ensure that the @@ -392,13 +392,13 @@ class shared_token_metadata { // includes its own invocation as an operation in the new phase. utils::phased_barrier _versions_barrier; shared_future<> _stale_versions_in_use{make_ready_future<>()}; - token_metadata::version_t _fence_version = 0; + token_metadata2::version_t _fence_version = 0; public: // used to construct the shared object as a sharded<> instance // lock_func returns semaphore_units<> - explicit shared_token_metadata(token_metadata_lock_func lock_func, token_metadata::config cfg) - : _shared(make_token_metadata_ptr(std::move(cfg))) + explicit shared_token_metadata(token_metadata_lock_func lock_func, token_metadata2::config cfg) + : _shared(make_token_metadata2_ptr(std::move(cfg))) , _lock_func(std::move(lock_func)) { _shared->set_version_tracker(_versions_barrier.start()); @@ -407,18 +407,18 @@ public: shared_token_metadata(const shared_token_metadata& x) = delete; shared_token_metadata(shared_token_metadata&& x) = default; - token_metadata_ptr get() const noexcept { + token_metadata2_ptr get() const noexcept { return _shared; } - void set(mutable_token_metadata_ptr tmptr) noexcept; + void set(mutable_token_metadata2_ptr tmptr) noexcept; future<> stale_versions_in_use() const { return _stale_versions_in_use.get_future(); } - void update_fence_version(token_metadata::version_t version); - token_metadata::version_t get_fence_version() const noexcept { + void update_fence_version(token_metadata2::version_t version); + token_metadata2::version_t get_fence_version() const noexcept { return _fence_version; } @@ -438,7 +438,7 @@ public: // If the functor is successful, the mutated clone // is set back to to the shared_token_metadata, // otherwise, the clone is destroyed. - future<> mutate_token_metadata(seastar::noncopyable_function (token_metadata&)> func); + future<> mutate_token_metadata(seastar::noncopyable_function (token_metadata2&)> func); // mutate_token_metadata_on_all_shards acquires the shared_token_metadata lock, // clones the token_metadata (using clone_async) @@ -450,7 +450,7 @@ public: // otherwise, the clone is destroyed. // // Must be called on shard 0. - static future<> mutate_on_all_shards(sharded& stm, seastar::noncopyable_function (token_metadata&)> func); + static future<> mutate_on_all_shards(sharded& stm, seastar::noncopyable_function (token_metadata2&)> func); }; } diff --git a/main.cc b/main.cc index e60816bcd1..4ce8097e55 100644 --- a/main.cc +++ b/main.cc @@ -1206,14 +1206,12 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl const auto listen_address = utils::resolve(cfg->listen_address, family).get0(); const auto host_id = initialize_local_info_thread(sys_ks, snitch, listen_address, *cfg, broadcast_addr, broadcast_rpc_addr); - shared_token_metadata::mutate_on_all_shards(token_metadata, [host_id, endpoint = broadcast_addr] (locator::token_metadata& tm) { + shared_token_metadata::mutate_on_all_shards(token_metadata, [host_id, endpoint = broadcast_addr] (locator::token_metadata2& tm) { // Makes local host id available in topology cfg as soon as possible. // Raft topology discard the endpoint-to-id map, so the local id can // still be found in the config. tm.get_topology().set_host_id_cfg(host_id); tm.get_topology().add_or_update_endpoint(endpoint, host_id); - tm.get_new()->get_topology().set_host_id_cfg(host_id); - tm.get_new()->get_topology().add_or_update_endpoint(endpoint, host_id); return make_ready_future<>(); }).get(); diff --git a/replica/database.cc b/replica/database.cc index 8cef7e40c4..a458eeb2f8 100644 --- a/replica/database.cc +++ b/replica/database.cc @@ -998,7 +998,7 @@ future<> database::add_column_family(keyspace& ks, schema_ptr schema, column_fam auto&& rs = ks.get_replication_strategy(); locator::effective_replication_map_ptr erm; if (auto pt_rs = rs.maybe_as_per_table()) { - erm = pt_rs->make_replication_map(schema->id(), _shared_token_metadata.get()->get_new_strong()); + erm = pt_rs->make_replication_map(schema->id(), _shared_token_metadata.get()); } else { erm = ks.get_effective_replication_map(); } @@ -1313,7 +1313,7 @@ keyspace::create_replication_strategy(const locator::shared_token_metadata& stm) rslogger.debug("replication strategy for keyspace {} is {}, opts={}", _metadata->name(), _metadata->strategy_name(), _metadata->strategy_options()); if (!_replication_strategy->is_per_table()) { - auto erm = co_await _erm_factory.create_effective_replication_map(_replication_strategy, stm.get()->get_new_strong()); + auto erm = co_await _erm_factory.create_effective_replication_map(_replication_strategy, stm.get()); update_effective_replication_map(std::move(erm)); } } diff --git a/replica/database.hh b/replica/database.hh index da6827b76a..0d51032835 100644 --- a/replica/database.hh +++ b/replica/database.hh @@ -1561,7 +1561,7 @@ public: } const locator::shared_token_metadata& get_shared_token_metadata() const { return _shared_token_metadata; } - const locator::token_metadata2& get_token_metadata() const { return *_shared_token_metadata.get()->get_new(); } + const locator::token_metadata2& get_token_metadata() const { return *_shared_token_metadata.get(); } wasm::manager& wasm() noexcept { return _wasm; } const wasm::manager& wasm() const noexcept { return _wasm; } diff --git a/service/forward_service.cc b/service/forward_service.cc index 91513a1e6e..44ed7e7ae7 100644 --- a/service/forward_service.cc +++ b/service/forward_service.cc @@ -303,7 +303,7 @@ public: } }; -locator::token_metadata_ptr forward_service::get_token_metadata_ptr() const noexcept { +locator::token_metadata2_ptr forward_service::get_token_metadata_ptr() const noexcept { return _shared_token_metadata.get(); } diff --git a/service/forward_service.hh b/service/forward_service.hh index 169deb0b63..8bcec3db97 100644 --- a/service/forward_service.hh +++ b/service/forward_service.hh @@ -159,7 +159,7 @@ private: // Used to execute a `forward_request` on a shard. future execute_on_this_shard(query::forward_request req, std::optional tr_info); - locator::token_metadata_ptr get_token_metadata_ptr() const noexcept; + locator::token_metadata2_ptr get_token_metadata_ptr() const noexcept; void register_metrics(); void init_messaging_service(); diff --git a/service/migration_manager.cc b/service/migration_manager.cc index a19ecce67e..28c8a67f73 100644 --- a/service/migration_manager.cc +++ b/service/migration_manager.cc @@ -1221,7 +1221,8 @@ future<> migration_manager::on_change(gms::inet_address endpoint, gms::applicati mlogger.debug("Ignoring state change for dead or unknown endpoint: {}", endpoint); return make_ready_future(); } - if (_storage_proxy.get_token_metadata_ptr()->is_normal_token_owner(endpoint)) { + const auto host_id = _gossiper.get_host_id(endpoint); + if (_storage_proxy.get_token_metadata_ptr()->is_normal_token_owner(host_id)) { schedule_schema_pull(endpoint, *ep_state); } } diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc index a1469aeff2..9b507a72d0 100644 --- a/service/storage_proxy.cc +++ b/service/storage_proxy.cc @@ -6545,7 +6545,7 @@ storage_proxy::stop() { return make_ready_future<>(); } -locator::token_metadata_ptr storage_proxy::get_token_metadata_ptr() const noexcept { +locator::token_metadata2_ptr storage_proxy::get_token_metadata_ptr() const noexcept { return _shared_token_metadata.get(); } diff --git a/service/storage_proxy.hh b/service/storage_proxy.hh index 8d5f517e2b..476e1b713f 100644 --- a/service/storage_proxy.hh +++ b/service/storage_proxy.hh @@ -220,7 +220,7 @@ public: return _erm_factory; } - locator::token_metadata_ptr get_token_metadata_ptr() const noexcept; + locator::token_metadata2_ptr get_token_metadata_ptr() const noexcept; query::max_result_size get_max_result_size(const query::partition_slice& slice) const; query::tombstone_limit get_tombstone_limit() const; diff --git a/service/storage_service.cc b/service/storage_service.cc index 2292166b8f..cf3bd3fc83 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -405,19 +405,15 @@ future<> storage_service::topology_state_load() { co_await _messaging.local().ban_host(locator::host_id{id.uuid()}); } - co_await mutate_token_metadata(seastar::coroutine::lambda([this, &id2ip, &am] (mutable_token_metadata_ptr tmptr) -> future<> { + co_await mutate_token_metadata(seastar::coroutine::lambda([this, &id2ip, &am] (mutable_token_metadata2_ptr tmptr) -> future<> { co_await tmptr->clear_gently(); // drop previous state tmptr->set_version(_topology_state_machine._topology.version); - tmptr->get_new()->set_version(_topology_state_machine._topology.version); auto update_topology = [&] (locator::host_id id, inet_address ip, const replica_state& rs) { - tmptr->update_topology(ip, locator::endpoint_dc_rack{rs.datacenter, rs.rack}, + tmptr->update_topology(id, locator::endpoint_dc_rack{rs.datacenter, rs.rack}, to_topology_node_state(rs.state), rs.shard_count); - tmptr->get_new()->update_topology(id, locator::endpoint_dc_rack{rs.datacenter, rs.rack}, - to_topology_node_state(rs.state), rs.shard_count); tmptr->update_host_id(id, ip); - tmptr->get_new()->update_host_id(id, ip); }; auto add_normal_node = [&] (raft::server_id id, const replica_state& rs) -> future<> { @@ -445,8 +441,7 @@ future<> storage_service::topology_state_load() { co_await _gossiper.add_local_application_state({{ gms::application_state::STATUS, gms::versioned_value::normal(rs.ring.value().tokens) }}); } update_topology(host_id, ip, rs); - co_await tmptr->update_normal_tokens(rs.ring.value().tokens, ip); - co_await tmptr->get_new()->update_normal_tokens(rs.ring.value().tokens, host_id); + co_await tmptr->update_normal_tokens(rs.ring.value().tokens, host_id); }; for (const auto& [id, rs]: _topology_state_machine._topology.normal_nodes) { @@ -474,7 +469,6 @@ future<> storage_service::topology_state_load() { } }, _topology_state_machine._topology.tstate); tmptr->set_read_new(read_new); - tmptr->get_new()->set_read_new(read_new); for (const auto& [id, rs]: _topology_state_machine._topology.transition_nodes) { locator::host_id host_id{id.uuid()}; @@ -500,11 +494,9 @@ future<> storage_service::topology_state_load() { // so we can perform writes to regular 'distributed' tables during the bootstrap procedure // (such as the CDC generation write). // It doesn't break anything to set the tokens to normal early in this single-node case. - co_await tmptr->update_normal_tokens(rs.ring.value().tokens, ip); - co_await tmptr->get_new()->update_normal_tokens(rs.ring.value().tokens, host_id); + co_await tmptr->update_normal_tokens(rs.ring.value().tokens, host_id); } else { - tmptr->add_bootstrap_tokens(rs.ring.value().tokens, ip); - tmptr->get_new()->add_bootstrap_tokens(rs.ring.value().tokens, host_id); + tmptr->add_bootstrap_tokens(rs.ring.value().tokens, host_id); co_await update_topology_change_info(tmptr, ::format("bootstrapping node {}/{}", id, ip)); } } @@ -512,10 +504,8 @@ future<> storage_service::topology_state_load() { case node_state::decommissioning: case node_state::removing: update_topology(host_id, ip, rs); - co_await tmptr->update_normal_tokens(rs.ring.value().tokens, ip); - co_await tmptr->get_new()->update_normal_tokens(rs.ring.value().tokens, host_id); - tmptr->add_leaving_endpoint(ip); - tmptr->get_new()->add_leaving_endpoint(host_id); + co_await tmptr->update_normal_tokens(rs.ring.value().tokens, host_id); + tmptr->add_leaving_endpoint(host_id); co_await update_topology_change_info(tmptr, ::format("{} {}/{}", rs.state, id, ip)); break; case node_state::replacing: { @@ -529,10 +519,9 @@ future<> storage_service::topology_state_load() { } assert(existing_ip); const auto replaced_host_id = locator::host_id(replaced_id.uuid()); - tmptr->get_new()->update_topology(replaced_host_id, std::nullopt, locator::node::state::being_replaced); + tmptr->update_topology(replaced_host_id, std::nullopt, locator::node::state::being_replaced); update_topology(host_id, ip, rs); - tmptr->add_replacing_endpoint(*existing_ip, ip); - tmptr->get_new()->add_replacing_endpoint(replaced_host_id, host_id); + tmptr->add_replacing_endpoint(replaced_host_id, host_id); co_await update_topology_change_info(tmptr, ::format("replacing {}/{} by {}/{}", replaced_id, *existing_ip, id, ip)); } } @@ -937,11 +926,11 @@ class topology_coordinator { bool _rollback = false; const locator::token_metadata2& get_token_metadata() const noexcept { - return *_shared_tm.get()->get_new(); + return *_shared_tm.get(); } locator::token_metadata2_ptr get_token_metadata_ptr() const noexcept { - return _shared_tm.get()->get_new_strong(); + return _shared_tm.get(); } // This is a topology snapshot for a given node. It contains pointers into the topology state machine @@ -3063,13 +3052,10 @@ future<> storage_service::join_token_ring(shardedupdate_topology(*replace_address, std::move(ri->dc_rack), locator::node::state::being_replaced); - co_await tmptr->update_normal_tokens(bootstrap_tokens, *replace_address); - - tmptr->get_new()->update_topology(tmptr->get_my_id(), std::nullopt, locator::node::state::replacing); - tmptr->get_new()->update_topology(ri->host_id, std::move(ri->dc_rack), locator::node::state::being_replaced); - co_await tmptr->get_new()->update_normal_tokens(bootstrap_tokens, ri->host_id); - tmptr->get_new()->update_host_id(ri->host_id, *replace_address); + tmptr->update_topology(tmptr->get_my_id(), std::nullopt, locator::node::state::replacing); + tmptr->update_topology(ri->host_id, std::move(ri->dc_rack), locator::node::state::being_replaced); + co_await tmptr->update_normal_tokens(bootstrap_tokens, ri->host_id); + tmptr->update_host_id(ri->host_id, *replace_address); replaced_host_id = ri->host_id; } @@ -3110,10 +3096,8 @@ future<> storage_service::join_token_ring(shardedupdate_topology(get_broadcast_address(), _snitch.local()->get_location(), locator::node::state::normal); - tmptr->get_new()->update_topology(tmptr->get_new()->get_my_id(), _snitch.local()->get_location(), locator::node::state::normal); - co_await tmptr->update_normal_tokens(my_tokens, get_broadcast_address()); - co_await tmptr->get_new()->update_normal_tokens(my_tokens, tmptr->get_new()->get_my_id()); + tmptr->update_topology(tmptr->get_my_id(), _snitch.local()->get_location(), locator::node::state::normal); + co_await tmptr->update_normal_tokens(my_tokens, tmptr->get_my_id()); cdc_gen_id = co_await _sys_ks.local().get_cdc_generation_id(); if (!cdc_gen_id) { @@ -3140,7 +3124,6 @@ future<> storage_service::join_token_ring(shardedupdate_host_id(local_host_id, get_broadcast_address()); - tmptr->get_new()->update_host_id(local_host_id, get_broadcast_address()); } // Replicate the tokens early because once gossip runs other nodes @@ -3431,14 +3414,12 @@ future<> storage_service::join_token_ring(sharded future<> { + co_await mutate_token_metadata([this, &bootstrap_tokens] (mutable_token_metadata2_ptr tmptr) -> future<> { // This node must know about its chosen tokens before other nodes do // since they may start sending writes to this node after it gossips status = NORMAL. // Therefore, in case we haven't updated _token_metadata with our tokens yet, do it now. - tmptr->update_topology(get_broadcast_address(), _snitch.local()->get_location(), locator::node::state::normal); - co_await tmptr->update_normal_tokens(bootstrap_tokens, get_broadcast_address()); - tmptr->get_new()->update_topology(tmptr->get_new()->get_my_id(), _snitch.local()->get_location(), locator::node::state::normal); - co_await tmptr->get_new()->update_normal_tokens(bootstrap_tokens, tmptr->get_new()->get_my_id()); + tmptr->update_topology(tmptr->get_my_id(), _snitch.local()->get_location(), locator::node::state::normal); + co_await tmptr->update_normal_tokens(bootstrap_tokens, tmptr->get_my_id()); }); if (!_sys_ks.local().bootstrap_complete()) { @@ -3574,12 +3555,10 @@ future<> storage_service::bootstrap(std::unordered_set& bootstrap_tokens, // When is_repair_based_node_ops_enabled is true, the bootstrap node // will use node_ops_cmd to bootstrap, node_ops_cmd will update the pending ranges. slogger.debug("bootstrap: update pending ranges: endpoint={} bootstrap_tokens={}", get_broadcast_address(), bootstrap_tokens); - mutate_token_metadata([this, &bootstrap_tokens] (mutable_token_metadata_ptr tmptr) { + mutate_token_metadata([this, &bootstrap_tokens] (mutable_token_metadata2_ptr tmptr) { auto endpoint = get_broadcast_address(); - tmptr->update_topology(endpoint, _snitch.local()->get_location(), locator::node::state::bootstrapping); - tmptr->add_bootstrap_tokens(bootstrap_tokens, endpoint); - tmptr->get_new()->update_topology(tmptr->get_new()->get_my_id(), _snitch.local()->get_location(), locator::node::state::bootstrapping); - tmptr->get_new()->add_bootstrap_tokens(bootstrap_tokens, tmptr->get_new()->get_my_id()); + tmptr->update_topology(tmptr->get_my_id(), _snitch.local()->get_location(), locator::node::state::bootstrapping); + tmptr->add_bootstrap_tokens(bootstrap_tokens, tmptr->get_my_id()); return update_topology_change_info(std::move(tmptr), ::format("bootstrapping node {}", endpoint)); }).get(); } @@ -3689,10 +3668,8 @@ future<> storage_service::handle_state_bootstrap(inet_address endpoint, gms::per }; const auto dc_rack = get_dc_rack_for(endpoint); const auto host_id = _gossiper.get_host_id(endpoint); - update_tm(*tmptr, endpoint, dc_rack); - update_tm(*tmptr->get_new(), host_id, dc_rack); + update_tm(*tmptr, host_id, dc_rack); tmptr->update_host_id(host_id, endpoint); - tmptr->get_new()->update_host_id(host_id, endpoint); co_await update_topology_change_info(tmptr, ::format("handle_state_bootstrap {}", endpoint)); co_await replicate_to_all_cores(std::move(tmptr)); @@ -3712,9 +3689,6 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit auto tmlock = std::make_unique(co_await get_token_metadata_lock()); auto tmptr = co_await get_mutable_token_metadata_ptr(); - if (tmptr->is_normal_token_owner(endpoint)) { - slogger.info("Node {} state jump to normal", endpoint); - } std::unordered_set endpoints_to_remove; auto do_remove_node = [&] (gms::inet_address node) { @@ -3723,15 +3697,17 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit // 2. new endpoint for the given host_id has bigger generation, we remove the old endpoint; // 3. old endpoint for the given host_id has bigger generation, we remove the new endpoint. // In all of these cases host_id is retained, only the IP addresses are changed. - // That's why we don't need to call remove_endpoint on tmptr->get_new(). - // However, it will be called eventually through the chain storage_service::remove_endpoint -> + // We don't need to call remove_endpoint on tmptr, since it will be called + // indirectly through the chain endpoints_to_remove->storage_service::remove_endpoint -> // _gossiper.remove_endpoint -> storage_service::on_remove. - tmptr->remove_endpoint(node); endpoints_to_remove.insert(node); }; // Order Matters, TM.updateHostID() should be called before TM.updateNormalToken(), (see CASSANDRA-4300). auto host_id = _gossiper.get_host_id(endpoint); + if (tmptr->is_normal_token_owner(host_id)) { + slogger.info("Node {}/{} state jump to normal", endpoint, host_id); + } auto existing = tmptr->get_endpoint_for_host_id_if_known(host_id); // Old node in replace-with-same-IP scenario. @@ -3753,7 +3729,6 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit do_remove_node(*existing); slogger.info("Set host_id={} to be owned by node={}, existing={}", host_id, endpoint, *existing); tmptr->update_host_id(host_id, endpoint); - tmptr->get_new()->update_host_id(host_id, endpoint); } else { // The new IP has smaller generation than the existing one, // we are going to remove it, so we add it to the endpoints_to_remove. @@ -3782,8 +3757,8 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit // token_metadata, since no IPs will be removed in this case. // We do this after update_normal_tokens, allowing for tokens to be properly // migrated to the new host_id. - tmptr->del_replacing_endpoint(endpoint); - if (const auto old_host_id = tmptr->get_new()->get_host_id_if_known(endpoint); old_host_id && *old_host_id != host_id) { + + if (const auto old_host_id = tmptr->get_host_id_if_known(endpoint); old_host_id && *old_host_id != host_id) { replaced_id = *old_host_id; } } else { @@ -3792,7 +3767,6 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit // receives the current state of the cluster from the gossiper. // For example, a new node receives this notification for every // existing node in the cluster. - tmptr->del_replacing_endpoint(endpoint); auto nodes = _gossiper.get_nodes_with_host_id(host_id); bool left = std::any_of(nodes.begin(), nodes.end(), [this] (const gms::inet_address& node) { return _gossiper.is_left(node); }); @@ -3803,7 +3777,6 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit } slogger.info("Set host_id={} to be owned by node={}", host_id, endpoint); tmptr->update_host_id(host_id, endpoint); - tmptr->get_new()->update_host_id(host_id, endpoint); } // Tokens owned by the handled endpoint. @@ -3817,8 +3790,8 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit std::unordered_map token_to_endpoint_map = get_token_to_endpoint(get_token_metadata()); std::unordered_set candidates_for_removal; - // Here we convert tokens from gossiper to owned_tokens, which will be assigned as a new - // normal tokens to token_metadata and its new host_id-based version. + // Here we convert endpoint tokens from gossiper to owned_tokens, which will be assigned as a new + // normal tokens to the token_metadata. // This transformation accounts for situations where some tokens // belong to outdated nodes - the ones with smaller generation. // We use endpoints instead of host_ids here since gossiper operates @@ -3878,7 +3851,7 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit endpoints_to_remove.insert(ep); } - bool is_normal_token_owner = tmptr->is_normal_token_owner(endpoint); + bool is_normal_token_owner = tmptr->is_normal_token_owner(host_id); bool do_notify_joined = false; if (endpoints_to_remove.contains(endpoint)) [[unlikely]] { @@ -3895,16 +3868,14 @@ future<> storage_service::handle_state_normal(inet_address endpoint, gms::permit } const auto dc_rack = get_dc_rack_for(endpoint); - tmptr->update_topology(endpoint, dc_rack, locator::node::state::normal); - tmptr->get_new()->update_topology(host_id, dc_rack, locator::node::state::normal); - co_await tmptr->update_normal_tokens(owned_tokens, endpoint); - co_await tmptr->get_new()->update_normal_tokens(owned_tokens, host_id); + tmptr->update_topology(host_id, dc_rack, locator::node::state::normal); + co_await tmptr->update_normal_tokens(owned_tokens, host_id); if (replaced_id) { - if (tmptr->get_new()->is_normal_token_owner(*replaced_id)) { + if (tmptr->is_normal_token_owner(*replaced_id)) { on_internal_error(slogger, ::format("replaced endpoint={}/{} still owns tokens {}", - endpoint, *replaced_id, tmptr->get_new()->get_tokens(*replaced_id))); + endpoint, *replaced_id, tmptr->get_tokens(*replaced_id))); } else { - tmptr->get_new()->remove_endpoint(*replaced_id); + tmptr->remove_endpoint(*replaced_id); slogger.info("node {}/{} is removed from token_metadata since it's replaced by {}/{} ", endpoint, *replaced_id, endpoint, host_id); } @@ -4015,10 +3986,9 @@ future<> storage_service::on_alive(gms::inet_address endpoint, gms::endpoint_sta auto tmlock = co_await get_token_metadata_lock(); auto tmptr = co_await get_mutable_token_metadata_ptr(); const auto dc_rack = get_dc_rack_for(endpoint); - tmptr->update_topology(endpoint, dc_rack); const auto host_id = _gossiper.get_host_id(endpoint); - tmptr->get_new()->update_host_id(host_id, endpoint); - tmptr->get_new()->update_topology(host_id, dc_rack); + tmptr->update_host_id(host_id, endpoint); + tmptr->update_topology(host_id, dc_rack); co_await replicate_to_all_cores(std::move(tmptr)); } } @@ -4097,13 +4067,12 @@ future<> storage_service::on_remove(gms::inet_address endpoint, gms::permit_id p slogger.debug("endpoint={} on_remove: permit_id={}", endpoint, pid); auto tmlock = co_await get_token_metadata_lock(); auto tmptr = co_await get_mutable_token_metadata_ptr(); - tmptr->remove_endpoint(endpoint); - // We should handle the case when we aren't able to find endpoint -> ip mapping in tm->get_new(). + // We should handle the case when we aren't able to find endpoint -> ip mapping in token_metadata. // This could happen e.g. when the new endpoint has bigger generation in handle_state_normal - the code // in handle_state_normal will remap host_id to the new IP and we won't find // old IP here. We should just skip the remove in that case. - if (const auto host_id = tmptr->get_new()->get_host_id_if_known(endpoint); host_id) { - tmptr->get_new()->remove_endpoint(*host_id); + if (const auto host_id = tmptr->get_host_id_if_known(endpoint); host_id) { + tmptr->remove_endpoint(*host_id); } co_await update_topology_change_info(tmptr, ::format("on_remove {}", endpoint)); co_await replicate_to_all_cores(std::move(tmptr)); @@ -4302,16 +4271,13 @@ future<> storage_service::join_cluster(sharded& co_await _sys_ks.local().remove_endpoint(ep); } else { const auto dc_rack = get_dc_rack(ep); - tmptr->update_topology(ep, dc_rack, locator::node::state::normal); - co_await tmptr->update_normal_tokens(tokens, ep); const auto hostIdIt = loaded_host_ids.find(ep); if (hostIdIt == loaded_host_ids.end()) { on_internal_error(slogger, format("can't find host_id for ep {}", ep)); } + tmptr->update_topology(hostIdIt->second, dc_rack, locator::node::state::normal); + co_await tmptr->update_normal_tokens(tokens, hostIdIt->second); tmptr->update_host_id(hostIdIt->second, ep); - tmptr->get_new()->update_topology(hostIdIt->second, dc_rack, locator::node::state::normal); - co_await tmptr->get_new()->update_normal_tokens(tokens, hostIdIt->second); - tmptr->get_new()->update_host_id(hostIdIt->second, ep); loaded_endpoints.insert(ep); co_await _gossiper.add_saved_endpoint(ep); } @@ -4336,13 +4302,13 @@ future<> storage_service::join_cluster(sharded& co_return co_await join_token_ring(sys_dist_ks, proxy, std::move(initial_contact_nodes), std::move(loaded_endpoints), std::move(loaded_peer_features), get_ring_delay()); } -future<> storage_service::replicate_to_all_cores(mutable_token_metadata_ptr tmptr) noexcept { +future<> storage_service::replicate_to_all_cores(mutable_token_metadata2_ptr tmptr) noexcept { assert(this_shard_id() == 0); slogger.debug("Replicating token_metadata to all cores"); std::exception_ptr ex; - std::vector pending_token_metadata_ptr; + std::vector pending_token_metadata_ptr; pending_token_metadata_ptr.resize(smp::count); std::vector> pending_effective_replication_maps; pending_effective_replication_maps.resize(smp::count); @@ -4373,7 +4339,7 @@ future<> storage_service::replicate_to_all_cores(mutable_token_metadata_ptr tmpt pending_token_metadata_ptr[base_shard] = tmptr; // clone a local copy of updated token_metadata on all other shards co_await smp::invoke_on_others(base_shard, [&, tmptr] () -> future<> { - pending_token_metadata_ptr[this_shard_id()] = make_token_metadata_ptr(co_await tmptr->clone_async()); + pending_token_metadata_ptr[this_shard_id()] = make_token_metadata2_ptr(co_await tmptr->clone_async()); }); // Precalculate new effective_replication_map for all keyspaces @@ -4389,7 +4355,7 @@ future<> storage_service::replicate_to_all_cores(mutable_token_metadata_ptr tmpt if (rs->is_per_table()) { continue; } - auto erm = co_await get_erm_factory().create_effective_replication_map(rs, tmptr->get_new_strong()); + auto erm = co_await get_erm_factory().create_effective_replication_map(rs, tmptr); pending_effective_replication_maps[base_shard].emplace(ks_name, std::move(erm)); } co_await container().invoke_on_others([&] (storage_service& ss) -> future<> { @@ -4400,7 +4366,7 @@ future<> storage_service::replicate_to_all_cores(mutable_token_metadata_ptr tmpt continue; } auto tmptr = pending_token_metadata_ptr[this_shard_id()]; - auto erm = co_await ss.get_erm_factory().create_effective_replication_map(rs, tmptr->get_new_strong()); + auto erm = co_await ss.get_erm_factory().create_effective_replication_map(rs, tmptr); pending_effective_replication_maps[this_shard_id()].emplace(ks_name, std::move(erm)); } }); @@ -4412,7 +4378,7 @@ future<> storage_service::replicate_to_all_cores(mutable_token_metadata_ptr tmpt auto rs = db.find_keyspace(table->schema()->keypace_name()).get_replication_strategy_ptr(); locator::effective_replication_map_ptr erm; if (auto pt_rs = rs->maybe_as_per_table()) { - erm = pt_rs->make_replication_map(id, tmptr->get_new_strong()); + erm = pt_rs->make_replication_map(id, tmptr); } else { erm = pending_effective_replication_maps[this_shard_id()][table->schema()->keypace_name()]; } @@ -5530,20 +5496,18 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad slogger.warn("{}", msg); throw std::runtime_error(msg); } - mutate_token_metadata([coordinator, &req, this] (mutable_token_metadata_ptr tmptr) mutable { + mutate_token_metadata([coordinator, &req, this] (mutable_token_metadata2_ptr tmptr) mutable { for (auto& node : req.leaving_nodes) { slogger.info("removenode[{}]: Added node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator); - tmptr->add_leaving_endpoint(node); - tmptr->get_new()->add_leaving_endpoint(tmptr->get_new()->get_host_id(node)); + tmptr->add_leaving_endpoint(tmptr->get_host_id(node)); } return update_topology_change_info(tmptr, ::format("removenode {}", req.leaving_nodes)); }).get(); node_ops_insert(ops_uuid, coordinator, std::move(req.ignore_nodes), [this, coordinator, req = std::move(req)] () mutable { - return mutate_token_metadata([this, coordinator, req = std::move(req)] (mutable_token_metadata_ptr tmptr) mutable { + return mutate_token_metadata([this, coordinator, req = std::move(req)] (mutable_token_metadata2_ptr tmptr) mutable { for (auto& node : req.leaving_nodes) { slogger.info("removenode[{}]: Removed node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator); - tmptr->del_leaving_endpoint(node); - tmptr->get_new()->del_leaving_endpoint(tmptr->get_new()->get_host_id(node)); + tmptr->del_leaving_endpoint(tmptr->get_host_id(node)); } return update_topology_change_info(tmptr, ::format("removenode {}", req.leaving_nodes)); }); @@ -5580,20 +5544,18 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad slogger.warn("{}", msg); throw std::runtime_error(msg); } - mutate_token_metadata([coordinator, &req, this] (mutable_token_metadata_ptr tmptr) mutable { + mutate_token_metadata([coordinator, &req, this] (mutable_token_metadata2_ptr tmptr) mutable { for (auto& node : req.leaving_nodes) { slogger.info("decommission[{}]: Added node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator); - tmptr->add_leaving_endpoint(node); - tmptr->get_new()->add_leaving_endpoint(tmptr->get_new()->get_host_id(node)); + tmptr->add_leaving_endpoint(tmptr->get_host_id(node)); } return update_topology_change_info(tmptr, ::format("decommission {}", req.leaving_nodes)); }).get(); node_ops_insert(ops_uuid, coordinator, std::move(req.ignore_nodes), [this, coordinator, req = std::move(req)] () mutable { - return mutate_token_metadata([this, coordinator, req = std::move(req)] (mutable_token_metadata_ptr tmptr) mutable { + return mutate_token_metadata([this, coordinator, req = std::move(req)] (mutable_token_metadata2_ptr tmptr) mutable { for (auto& node : req.leaving_nodes) { slogger.info("decommission[{}]: Removed node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator); - tmptr->del_leaving_endpoint(node); - tmptr->get_new()->del_leaving_endpoint(tmptr->get_new()->get_host_id(node)); + tmptr->del_leaving_endpoint(tmptr->get_host_id(node)); } return update_topology_change_info(tmptr, ::format("decommission {}", req.leaving_nodes)); }); @@ -5643,16 +5605,14 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad if (!coordinator_host_id) { throw std::runtime_error("Coordinator host_id not found"); } - mutate_token_metadata([coordinator, coordinator_host_id, &req, this] (mutable_token_metadata_ptr tmptr) mutable { + mutate_token_metadata([coordinator, coordinator_host_id, &req, this] (mutable_token_metadata2_ptr tmptr) mutable { for (auto& x: req.replace_nodes) { auto existing_node = x.first; auto replacing_node = x.second; - const auto existing_node_id = tmptr->get_new()->get_host_id(existing_node); + const auto existing_node_id = tmptr->get_host_id(existing_node); const auto replacing_node_id = *coordinator_host_id; slogger.info("replace[{}]: Added replacing_node={}/{} to replace existing_node={}/{}, coordinator={}/{}", req.ops_uuid, replacing_node, replacing_node_id, existing_node, existing_node_id, coordinator, *coordinator_host_id); - tmptr->update_topology(replacing_node, get_dc_rack_for(replacing_node), locator::node::state::replacing); - tmptr->add_replacing_endpoint(existing_node, replacing_node); // In case of replace-with-same-ip we need to map both host_id-s // to the same IP. The locator::topology allows this specifically in case @@ -5663,28 +5623,27 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad // current code. The IP will be placed in pending_endpoints and // excluded from normal_endpoints (maybe_remove_node_being_replaced function). // In handle_state_normal we'll remap the IP to the new host_id. - tmptr->get_new()->update_topology(existing_node_id, std::nullopt, locator::node::state::being_replaced); - tmptr->get_new()->update_topology(replacing_node_id, get_dc_rack_for(replacing_node), locator::node::state::replacing); - tmptr->get_new()->update_host_id(replacing_node_id, replacing_node); - tmptr->get_new()->add_replacing_endpoint(existing_node_id, replacing_node_id); + tmptr->update_topology(existing_node_id, std::nullopt, locator::node::state::being_replaced); + tmptr->update_topology(replacing_node_id, get_dc_rack_for(replacing_node), locator::node::state::replacing); + tmptr->update_host_id(replacing_node_id, replacing_node); + tmptr->add_replacing_endpoint(existing_node_id, replacing_node_id); } return make_ready_future<>(); }).get(); node_ops_insert(ops_uuid, coordinator, std::move(req.ignore_nodes), [this, coordinator, coordinator_host_id, req = std::move(req)] () mutable { - return mutate_token_metadata([this, coordinator, coordinator_host_id, req = std::move(req)] (mutable_token_metadata_ptr tmptr) mutable { + return mutate_token_metadata([this, coordinator, coordinator_host_id, req = std::move(req)] (mutable_token_metadata2_ptr tmptr) mutable { for (auto& x: req.replace_nodes) { auto existing_node = x.first; auto replacing_node = x.second; - const auto existing_node_id = tmptr->get_new()->get_host_id(existing_node); + const auto existing_node_id = tmptr->get_host_id(existing_node); const auto replacing_node_id = *coordinator_host_id; slogger.info("replace[{}]: Removed replacing_node={}/{} to replace existing_node={}/{}, coordinator={}/{}", req.ops_uuid, replacing_node, replacing_node_id, existing_node, existing_node_id, coordinator, *coordinator_host_id); - tmptr->del_replacing_endpoint(existing_node); - tmptr->get_new()->del_replacing_endpoint(existing_node_id); + tmptr->del_replacing_endpoint(existing_node_id); const auto dc_rack = get_dc_rack_for(replacing_node); - tmptr->get_new()->update_topology(existing_node_id, dc_rack, locator::node::state::normal); - tmptr->get_new()->remove_endpoint(replacing_node_id); + tmptr->update_topology(existing_node_id, dc_rack, locator::node::state::normal); + tmptr->remove_endpoint(replacing_node_id); } return update_topology_change_info(tmptr, ::format("replace {}", req.replace_nodes)); }); @@ -5723,7 +5682,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad if (!coordinator_host_id) { throw std::runtime_error("Coordinator host_id not found"); } - mutate_token_metadata([coordinator, coordinator_host_id, &req, this] (mutable_token_metadata_ptr tmptr) mutable { + mutate_token_metadata([coordinator, coordinator_host_id, &req, this] (mutable_token_metadata2_ptr tmptr) mutable { for (auto& x: req.bootstrap_nodes) { auto& endpoint = x.first; auto tokens = std::unordered_set(x.second.begin(), x.second.end()); @@ -5731,23 +5690,19 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad const auto dc_rack = get_dc_rack_for(endpoint); slogger.info("bootstrap[{}]: Added node={}/{} as bootstrap, coordinator={}/{}", req.ops_uuid, endpoint, host_id, coordinator, *coordinator_host_id); - tmptr->update_topology(endpoint, dc_rack, locator::node::state::bootstrapping); - tmptr->add_bootstrap_tokens(tokens, endpoint); - - tmptr->get_new()->update_host_id(host_id, endpoint); - tmptr->get_new()->update_topology(host_id, dc_rack, locator::node::state::bootstrapping); - tmptr->get_new()->add_bootstrap_tokens(tokens, host_id); + tmptr->update_host_id(host_id, endpoint); + tmptr->update_topology(host_id, dc_rack, locator::node::state::bootstrapping); + tmptr->add_bootstrap_tokens(tokens, host_id); } return update_topology_change_info(tmptr, ::format("bootstrap {}", req.bootstrap_nodes)); }).get(); node_ops_insert(ops_uuid, coordinator, std::move(req.ignore_nodes), [this, coordinator, req = std::move(req)] () mutable { - return mutate_token_metadata([this, coordinator, req = std::move(req)] (mutable_token_metadata_ptr tmptr) mutable { + return mutate_token_metadata([this, coordinator, req = std::move(req)] (mutable_token_metadata2_ptr tmptr) mutable { for (auto& x: req.bootstrap_nodes) { auto& endpoint = x.first; auto tokens = std::unordered_set(x.second.begin(), x.second.end()); slogger.info("bootstrap[{}]: Removed node={} as bootstrap, coordinator={}", req.ops_uuid, endpoint, coordinator); tmptr->remove_bootstrap_tokens(tokens); - tmptr->get_new()->remove_bootstrap_tokens(tokens); } return update_topology_change_info(tmptr, ::format("bootstrap {}", req.bootstrap_nodes)); }); @@ -6105,12 +6060,10 @@ future<> storage_service::excise(std::unordered_set tokens, inet_address co_await remove_endpoint(endpoint, pid); auto tmlock = std::make_optional(co_await get_token_metadata_lock()); auto tmptr = co_await get_mutable_token_metadata_ptr(); - tmptr->remove_endpoint(endpoint); - tmptr->remove_bootstrap_tokens(tokens); - if (const auto host_id = tmptr->get_new()->get_host_id_if_known(endpoint); host_id) { - tmptr->get_new()->remove_endpoint(*host_id); + if (const auto host_id = tmptr->get_host_id_if_known(endpoint); host_id) { + tmptr->remove_endpoint(*host_id); } - tmptr->get_new()->remove_bootstrap_tokens(tokens); + tmptr->remove_bootstrap_tokens(tokens); co_await update_topology_change_info(tmptr, ::format("excise {}", endpoint)); co_await replicate_to_all_cores(std::move(tmptr)); @@ -6127,11 +6080,11 @@ future<> storage_service::excise(std::unordered_set tokens, inet_address future<> storage_service::leave_ring() { co_await _cdc_gens.local().leave_ring(); co_await _sys_ks.local().set_bootstrap_state(db::system_keyspace::bootstrap_state::NEEDS_BOOTSTRAP); - co_await mutate_token_metadata([this] (mutable_token_metadata_ptr tmptr) { + co_await mutate_token_metadata([this] (mutable_token_metadata2_ptr tmptr) { auto endpoint = get_broadcast_address(); - tmptr->remove_endpoint(endpoint); - tmptr->get_new()->remove_endpoint(tmptr->get_new()->get_my_id()); - return update_topology_change_info(std::move(tmptr), ::format("leave_ring {}", endpoint)); + const auto my_id = tmptr->get_my_id(); + tmptr->remove_endpoint(my_id); + return update_topology_change_info(std::move(tmptr), ::format("leave_ring {}/{}", endpoint, my_id)); }); auto expire_time = _gossiper.compute_expire_time().time_since_epoch().count(); @@ -6289,7 +6242,7 @@ future storage_service::get_token_metadata_lock() // db::schema_tables::do_merge_schema. // // Note: must be called on shard 0. -future<> storage_service::mutate_token_metadata(std::function (mutable_token_metadata_ptr)> func, acquire_merge_lock acquire_merge_lock) noexcept { +future<> storage_service::mutate_token_metadata(std::function (mutable_token_metadata2_ptr)> func, acquire_merge_lock acquire_merge_lock) noexcept { assert(this_shard_id() == 0); std::optional tmlock; @@ -6301,14 +6254,11 @@ future<> storage_service::mutate_token_metadata(std::function (mutable_ co_await replicate_to_all_cores(std::move(tmptr)); } -future<> storage_service::update_topology_change_info(mutable_token_metadata_ptr tmptr, sstring reason) { +future<> storage_service::update_topology_change_info(mutable_token_metadata2_ptr tmptr, sstring reason) { assert(this_shard_id() == 0); try { - locator::dc_rack_fn get_dc_rack_from_gossiper([this] (inet_address ep) { return get_dc_rack_for(ep); }); - co_await tmptr->update_topology_change_info(get_dc_rack_from_gossiper); - - locator::dc_rack_fn get_dc_rack_by_host_id([this, &tm = *tmptr->get_new()] (locator::host_id host_id) -> std::optional { + locator::dc_rack_fn get_dc_rack_by_host_id([this, &tm = *tmptr] (locator::host_id host_id) -> std::optional { if (_raft_topology_change_enabled) { const auto server_id = raft::server_id(host_id.uuid()); const auto* node = _topology_state_machine._topology.find(server_id); @@ -6323,7 +6273,7 @@ future<> storage_service::update_topology_change_info(mutable_token_metadata_ptr return get_dc_rack_for(tm.get_endpoint_for_host_id(host_id)); }); - co_await tmptr->get_new()->update_topology_change_info(get_dc_rack_by_host_id); + co_await tmptr->update_topology_change_info(get_dc_rack_by_host_id); } catch (...) { auto ep = std::current_exception(); slogger.error("Failed to update topology change info for {}: {}", reason, ep); @@ -6332,7 +6282,7 @@ future<> storage_service::update_topology_change_info(mutable_token_metadata_ptr } future<> storage_service::update_topology_change_info(sstring reason, acquire_merge_lock acquire_merge_lock) { - return mutate_token_metadata([this, reason = std::move(reason)] (mutable_token_metadata_ptr tmptr) mutable { + return mutate_token_metadata([this, reason = std::move(reason)] (mutable_token_metadata2_ptr tmptr) mutable { return update_topology_change_info(std::move(tmptr), std::move(reason)); }, acquire_merge_lock); } @@ -6374,10 +6324,9 @@ future<> storage_service::load_tablet_metadata() { future<> storage_service::snitch_reconfigured() { assert(this_shard_id() == 0); auto& snitch = _snitch.local(); - co_await mutate_token_metadata([&] (mutable_token_metadata_ptr tmptr) -> future<> { + co_await mutate_token_metadata([&snitch] (mutable_token_metadata2_ptr tmptr) -> future<> { // re-read local rack and DC info - tmptr->update_topology(get_broadcast_address(), snitch->get_location()); - tmptr->get_new()->update_topology(tmptr->get_new()->get_my_id(), snitch->get_location()); + tmptr->update_topology(tmptr->get_my_id(), snitch->get_location()); return make_ready_future<>(); }); diff --git a/service/storage_service.hh b/service/storage_service.hh index ff52d39678..6f9d739e5c 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -214,24 +214,24 @@ private: // db::schema_tables::do_merge_schema. // // Note: must be called on shard 0. - future<> mutate_token_metadata(std::function (mutable_token_metadata_ptr)> func, acquire_merge_lock aml = acquire_merge_lock::yes) noexcept; + future<> mutate_token_metadata(std::function (mutable_token_metadata2_ptr)> func, acquire_merge_lock aml = acquire_merge_lock::yes) noexcept; // Update pending ranges locally and then replicate to all cores. // Should be serialized under token_metadata_lock. // Must be called on shard 0. - future<> update_topology_change_info(mutable_token_metadata_ptr tmptr, sstring reason); + future<> update_topology_change_info(mutable_token_metadata2_ptr tmptr, sstring reason); future<> update_topology_change_info(sstring reason, acquire_merge_lock aml = acquire_merge_lock::yes); future<> keyspace_changed(const sstring& ks_name); void register_metrics(); future<> snitch_reconfigured(); - future get_mutable_token_metadata_ptr() noexcept { - return _shared_token_metadata.get()->clone_async().then([] (token_metadata tm) { + future get_mutable_token_metadata_ptr() noexcept { + return _shared_token_metadata.get()->clone_async().then([] (token_metadata2 tm) { // bump the token_metadata ring_version // to invalidate cached token/replication mappings // when the modified token_metadata is committed. tm.invalidate_cached_rings(); - return make_ready_future(make_token_metadata_ptr(std::move(tm))); + return make_ready_future(make_token_metadata2_ptr(std::move(tm))); }); } @@ -259,11 +259,11 @@ public: } token_metadata2_ptr get_token_metadata_ptr() const noexcept { - return _shared_token_metadata.get()->get_new_strong(); + return _shared_token_metadata.get(); } const locator::token_metadata2& get_token_metadata() const noexcept { - return *_shared_token_metadata.get()->get_new(); + return *_shared_token_metadata.get(); } private: @@ -480,7 +480,7 @@ private: std::optional get_dc_rack_for(inet_address endpoint); private: // Should be serialized under token_metadata_lock. - future<> replicate_to_all_cores(mutable_token_metadata_ptr tmptr) noexcept; + future<> replicate_to_all_cores(mutable_token_metadata2_ptr tmptr) noexcept; sharded& _sys_ks; sharded& _sys_dist_ks; locator::snitch_signal_slot_t _snitch_reconfigure; diff --git a/service/tablet_allocator.cc b/service/tablet_allocator.cc index 89bdac9320..e66f9127c9 100644 --- a/service/tablet_allocator.cc +++ b/service/tablet_allocator.cc @@ -828,7 +828,7 @@ public: auto rs = abstract_replication_strategy::create_replication_strategy(ksm.strategy_name(), ksm.strategy_options()); if (auto&& tablet_rs = rs->maybe_as_tablet_aware()) { auto tm = _db.get_shared_token_metadata().get(); - auto map = tablet_rs->allocate_tablets_for_new_table(s.shared_from_this(), tm->get_new_strong()).get0(); + auto map = tablet_rs->allocate_tablets_for_new_table(s.shared_from_this(), tm).get0(); muts.emplace_back(tablet_map_to_mutation(map, s.id(), s.keypace_name(), s.cf_name(), ts).get0()); } } diff --git a/test/boost/locator_topology_test.cc b/test/boost/locator_topology_test.cc index 730f33b8dd..d6a2a9bc1f 100644 --- a/test/boost/locator_topology_test.cc +++ b/test/boost/locator_topology_test.cc @@ -267,20 +267,20 @@ SEASTAR_THREAD_TEST_CASE(test_load_sketch) { } }); - stm.mutate_token_metadata([&] (token_metadata& tm) { - tm.get_new()->update_host_id(host1, ip1); - tm.get_new()->update_host_id(host2, ip2); - tm.get_new()->update_host_id(host3, ip3); - tm.get_new()->update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, node1_shard_count); - tm.get_new()->update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, node2_shard_count); - tm.get_new()->update_topology(host3, locator::endpoint_dc_rack::default_location, std::nullopt, node3_shard_count); + stm.mutate_token_metadata([&] (token_metadata2& tm) { + tm.update_host_id(host1, ip1); + tm.update_host_id(host2, ip2); + tm.update_host_id(host3, ip3); + tm.update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, node1_shard_count); + tm.update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, node2_shard_count); + tm.update_topology(host3, locator::endpoint_dc_rack::default_location, std::nullopt, node3_shard_count); return make_ready_future<>(); }).get(); // Check that allocation is even when starting from empty state { auto tm = stm.get(); - load_sketch load(tm->get_new_strong()); + load_sketch load(tm); load.populate().get(); std::vector node1_shards(node1_shard_count, 0); @@ -312,7 +312,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_sketch) { std::vector node3_shards(node3_shard_count, 0); - stm.mutate_token_metadata([&] (token_metadata& tm) { + stm.mutate_token_metadata([&] (token_metadata2& tm) { tablet_metadata tab_meta; tablet_map tmap(4); @@ -342,13 +342,13 @@ SEASTAR_THREAD_TEST_CASE(test_load_sketch) { auto table = table_id(utils::make_random_uuid()); tab_meta.set_tablet_map(table, tmap); - tm.get_new()->set_tablets(std::move(tab_meta)); + tm.set_tablets(std::move(tab_meta)); return make_ready_future<>(); }).get(); { auto tm = stm.get(); - load_sketch load(tm->get_new_strong()); + load_sketch load(tm); load.populate().get(); // host3 has max shard load of 3 and 3 shards, and 4 tablets allocated. diff --git a/test/boost/network_topology_strategy_test.cc b/test/boost/network_topology_strategy_test.cc index 5ab8bab42f..c93ce2d00c 100644 --- a/test/boost/network_topology_strategy_test.cc +++ b/test/boost/network_topology_strategy_test.cc @@ -250,13 +250,13 @@ void simple_test() { }; // Initialize the token_metadata - stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { - auto& topo = tm.get_new()->get_topology(); + stm.mutate_token_metadata([&] (token_metadata2& tm) -> future<> { + auto& topo = tm.get_topology(); for (const auto& [ring_point, endpoint, id] : ring_points) { std::unordered_set tokens; tokens.insert({dht::token::kind::key, d2t(ring_point / ring_points.size())}); topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal); - co_await tm.get_new()->update_normal_tokens(std::move(tokens), id); + co_await tm.update_normal_tokens(std::move(tokens), id); } }).get(); @@ -271,7 +271,7 @@ void simple_test() { auto ars_ptr = abstract_replication_strategy::create_replication_strategy( "NetworkTopologyStrategy", options323); - full_ring_check(ring_points, options323, ars_ptr, stm.get()->get_new_strong()); + full_ring_check(ring_points, options323, ars_ptr, stm.get()); /////////////// // Create the replication strategy @@ -284,7 +284,7 @@ void simple_test() { ars_ptr = abstract_replication_strategy::create_replication_strategy( "NetworkTopologyStrategy", options320); - full_ring_check(ring_points, options320, ars_ptr, stm.get()->get_new_strong()); + full_ring_check(ring_points, options320, ars_ptr, stm.get()); // // Check cache invalidation: invalidate the cache and run a full ring @@ -292,11 +292,11 @@ void simple_test() { // points will be taken from the cache when it shouldn't and the // corresponding check will fail. // - stm.mutate_token_metadata([] (token_metadata& tm) { - tm.get_new()->invalidate_cached_rings(); + stm.mutate_token_metadata([] (token_metadata2& tm) { + tm.invalidate_cached_rings(); return make_ready_future<>(); }).get(); - full_ring_check(ring_points, options320, ars_ptr, stm.get()->get_new_strong()); + full_ring_check(ring_points, options320, ars_ptr, stm.get()); } // Run in a seastar thread. @@ -357,18 +357,18 @@ void heavy_origin_test() { } } - stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { - auto& topo = tm.get_new()->get_topology(); + stm.mutate_token_metadata([&] (token_metadata2& tm) -> future<> { + auto& topo = tm.get_topology(); for (const auto& [ring_point, endpoint, id] : ring_points) { topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal); - co_await tm.get_new()->update_normal_tokens(tokens[endpoint], id); + co_await tm.update_normal_tokens(tokens[endpoint], id); } }).get(); auto ars_ptr = abstract_replication_strategy::create_replication_strategy( "NetworkTopologyStrategy", config_options); - full_ring_check(ring_points, config_options, ars_ptr, stm.get()->get_new_strong()); + full_ring_check(ring_points, config_options, ars_ptr, stm.get()); } @@ -413,14 +413,14 @@ SEASTAR_THREAD_TEST_CASE(NetworkTopologyStrategy_tablets_test) { }; // Initialize the token_metadata - stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { - auto& topo = tm.get_new()->get_topology(); + stm.mutate_token_metadata([&] (token_metadata2& tm) -> future<> { + auto& topo = tm.get_topology(); for (const auto& [ring_point, endpoint, id] : ring_points) { std::unordered_set tokens; tokens.insert({dht::token::kind::key, d2t(ring_point / ring_points.size())}); topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal, 1); - tm.get_new()->update_host_id(id, endpoint); - co_await tm.get_new()->update_normal_tokens(std::move(tokens), id); + tm.update_host_id(id, endpoint); + co_await tm.update_normal_tokens(std::move(tokens), id); } }).get(); @@ -444,8 +444,8 @@ SEASTAR_THREAD_TEST_CASE(NetworkTopologyStrategy_tablets_test) { .with_column("v", utf8_type) .build(); - auto tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, stm.get()->get_new_strong()).get0(); - full_ring_check(tmap, options323, ars_ptr, stm.get()->get_new_strong()); + auto tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, stm.get()).get0(); + full_ring_check(tmap, options323, ars_ptr, stm.get()); /////////////// // Create the replication strategy @@ -461,8 +461,8 @@ SEASTAR_THREAD_TEST_CASE(NetworkTopologyStrategy_tablets_test) { tab_awr_ptr = ars_ptr->maybe_as_tablet_aware(); BOOST_REQUIRE(tab_awr_ptr); - tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, stm.get()->get_new_strong()).get0(); - full_ring_check(tmap, options320, ars_ptr, stm.get()->get_new_strong()); + tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, stm.get()).get0(); + full_ring_check(tmap, options320, ars_ptr, stm.get()); // Test the case of not enough nodes to meet RF in DC 102 std::map options324 = { @@ -477,8 +477,8 @@ SEASTAR_THREAD_TEST_CASE(NetworkTopologyStrategy_tablets_test) { tab_awr_ptr = ars_ptr->maybe_as_tablet_aware(); BOOST_REQUIRE(tab_awr_ptr); - tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, stm.get()->get_new_strong()).get0(); - full_ring_check(tmap, options324, ars_ptr, stm.get()->get_new_strong()); + tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, stm.get()).get0(); + full_ring_check(tmap, options324, ars_ptr, stm.get()); } /** @@ -650,11 +650,11 @@ static void test_equivalence(const shared_token_metadata& stm, const locator::to return std::make_pair(p.first, to_sstring(p.second)); }))); - const token_metadata2& tm = *stm.get()->get_new(); + const token_metadata2& tm = *stm.get(); for (size_t i = 0; i < 1000; ++i) { auto token = dht::token::get_random_token(); auto expected = calculate_natural_endpoints(token, tm, topo, datacenters); - auto actual = nts.calculate_natural_endpoints(token, *stm.get()->get_new()).get0(); + auto actual = nts.calculate_natural_endpoints(token, *stm.get()).get0(); // Because the old algorithm does not put the nodes in the correct order in the case where more replicas // are required than there are racks in a dc, we accept different order as long as the primary @@ -737,13 +737,13 @@ SEASTAR_THREAD_TEST_CASE(testCalculateEndpoints) { } } - stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { - generate_topology(tm.get_new()->get_topology(), datacenters, nodes); + stm.mutate_token_metadata([&] (token_metadata2& tm) -> future<> { + generate_topology(tm.get_topology(), datacenters, nodes); for (auto&& i : endpoint_tokens) { - co_await tm.get_new()->update_normal_tokens(std::move(i.second), i.first); + co_await tm.update_normal_tokens(std::move(i.second), i.first); } }).get(); - test_equivalence(stm, stm.get()->get_new()->get_topology(), datacenters); + test_equivalence(stm, stm.get()->get_topology(), datacenters); } } @@ -835,13 +835,13 @@ SEASTAR_THREAD_TEST_CASE(test_topology_compare_endpoints) { semaphore sem(1); shared_token_metadata stm([&sem] () noexcept { return get_units(sem, 1); }, tm_cfg); - stm.mutate_token_metadata([&] (token_metadata& tm) { - auto& topo = tm.get_new()->get_topology(); + stm.mutate_token_metadata([&] (token_metadata2& tm) { + auto& topo = tm.get_topology(); generate_topology(topo, datacenters, nodes); - const auto& address = tm.get_new()->get_endpoint_for_host_id(nodes[tests::random::get_int(0, NODES-1)]); - const auto& a1 = tm.get_new()->get_endpoint_for_host_id(nodes[tests::random::get_int(0, NODES-1)]); - const auto& a2 = tm.get_new()->get_endpoint_for_host_id(nodes[tests::random::get_int(0, NODES-1)]); + const auto& address = tm.get_endpoint_for_host_id(nodes[tests::random::get_int(0, NODES-1)]); + const auto& a1 = tm.get_endpoint_for_host_id(nodes[tests::random::get_int(0, NODES-1)]); + const auto& a2 = tm.get_endpoint_for_host_id(nodes[tests::random::get_int(0, NODES-1)]); topo.test_compare_endpoints(address, address, address); topo.test_compare_endpoints(address, address, a1); @@ -880,23 +880,23 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) { // get_location() should work before any node is added - BOOST_REQUIRE(stm.get()->get_new()->get_topology().get_location() == ip1_dc_rack); + BOOST_REQUIRE(stm.get()->get_topology().get_location() == ip1_dc_rack); - stm.mutate_token_metadata([&] (token_metadata& tm) { - tm.get_new()->update_host_id(host2, ip2); - tm.get_new()->update_host_id(host1, ip1); // this_node added last on purpose + stm.mutate_token_metadata([&] (token_metadata2& tm) { + tm.update_host_id(host2, ip2); + tm.update_host_id(host1, ip1); // this_node added last on purpose return make_ready_future<>(); }).get(); - const node* n1 = stm.get()->get_new()->get_topology().find_node(host1); + const node* n1 = stm.get()->get_topology().find_node(host1); BOOST_REQUIRE(n1); BOOST_REQUIRE(bool(n1->is_this_node())); BOOST_REQUIRE_EQUAL(n1->host_id(), host1); BOOST_REQUIRE_EQUAL(n1->endpoint(), ip1); BOOST_REQUIRE(n1->dc_rack() == ip1_dc_rack); - BOOST_REQUIRE(stm.get()->get_new()->get_topology().get_location() == ip1_dc_rack); + BOOST_REQUIRE(stm.get()->get_topology().get_location() == ip1_dc_rack); - const node* n2 = stm.get()->get_new()->get_topology().find_node(host2); + const node* n2 = stm.get()->get_topology().find_node(host2); BOOST_REQUIRE(n2); BOOST_REQUIRE(!bool(n2->is_this_node())); BOOST_REQUIRE_EQUAL(n2->host_id(), host2); @@ -905,46 +905,46 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) { // Removing local node - stm.mutate_token_metadata([&] (token_metadata& tm) { - tm.get_new()->remove_endpoint(host1); - tm.get_new()->update_host_id(host3, ip3); + stm.mutate_token_metadata([&] (token_metadata2& tm) { + tm.remove_endpoint(host1); + tm.update_host_id(host3, ip3); return make_ready_future<>(); }).get(); - n1 = stm.get()->get_new()->get_topology().find_node(host1); + n1 = stm.get()->get_topology().find_node(host1); BOOST_REQUIRE(!n1); - n1 = stm.get()->get_new()->get_topology().find_node(ip1); + n1 = stm.get()->get_topology().find_node(ip1); BOOST_REQUIRE(!n1); // Removing node with no local node - stm.mutate_token_metadata([&] (token_metadata& tm) { - tm.get_new()->remove_endpoint(host2); + stm.mutate_token_metadata([&] (token_metadata2& tm) { + tm.remove_endpoint(host2); return make_ready_future<>(); }).get(); - n2 = stm.get()->get_new()->get_topology().find_node(host2); + n2 = stm.get()->get_topology().find_node(host2); BOOST_REQUIRE(!n2); - n2 = stm.get()->get_new()->get_topology().find_node(ip2); + n2 = stm.get()->get_topology().find_node(ip2); BOOST_REQUIRE(!n2); // Repopulate after clear_gently() - stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { - co_await tm.get_new()->clear_gently(); - tm.get_new()->update_host_id(host2, ip2); - tm.get_new()->update_host_id(host1, ip1); // this_node added last on purpose + stm.mutate_token_metadata([&] (token_metadata2& tm) -> future<> { + co_await tm.clear_gently(); + tm.update_host_id(host2, ip2); + tm.update_host_id(host1, ip1); // this_node added last on purpose }).get(); - n1 = stm.get()->get_new()->get_topology().find_node(host1); + n1 = stm.get()->get_topology().find_node(host1); BOOST_REQUIRE(n1); BOOST_REQUIRE(bool(n1->is_this_node())); BOOST_REQUIRE_EQUAL(n1->host_id(), host1); BOOST_REQUIRE_EQUAL(n1->endpoint(), ip1); BOOST_REQUIRE(n1->dc_rack() == ip1_dc_rack); - BOOST_REQUIRE(stm.get()->get_new()->get_topology().get_location() == ip1_dc_rack); + BOOST_REQUIRE(stm.get()->get_topology().get_location() == ip1_dc_rack); - n2 = stm.get()->get_new()->get_topology().find_node(host2); + n2 = stm.get()->get_topology().find_node(host2); BOOST_REQUIRE(n2); BOOST_REQUIRE(!bool(n2->is_this_node())); BOOST_REQUIRE_EQUAL(n2->host_id(), host2); @@ -953,16 +953,16 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) { // get_location() should pick up endpoint_dc_rack from node info - stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { - co_await tm.get_new()->clear_gently(); - tm.get_new()->get_topology().add_or_update_endpoint(ip1, host1, ip1_dc_rack_v2, node::state::being_decommissioned); + stm.mutate_token_metadata([&] (token_metadata2& tm) -> future<> { + co_await tm.clear_gently(); + tm.get_topology().add_or_update_endpoint(ip1, host1, ip1_dc_rack_v2, node::state::being_decommissioned); }).get(); - n1 = stm.get()->get_new()->get_topology().find_node(host1); + n1 = stm.get()->get_topology().find_node(host1); BOOST_REQUIRE(n1); BOOST_REQUIRE(bool(n1->is_this_node())); BOOST_REQUIRE_EQUAL(n1->host_id(), host1); BOOST_REQUIRE_EQUAL(n1->endpoint(), ip1); BOOST_REQUIRE(n1->dc_rack() == ip1_dc_rack_v2); - BOOST_REQUIRE(stm.get()->get_new()->get_topology().get_location() == ip1_dc_rack_v2); + BOOST_REQUIRE(stm.get()->get_topology().get_location() == ip1_dc_rack_v2); } diff --git a/test/boost/tablets_test.cc b/test/boost/tablets_test.cc index 75430f33f5..ba56ce3a68 100644 --- a/test/boost/tablets_test.cc +++ b/test/boost/tablets_test.cc @@ -622,12 +622,12 @@ void apply_plan_as_in_progress(token_metadata2& tm, const migration_plan& plan) static void rebalance_tablets(tablet_allocator& talloc, shared_token_metadata& stm) { while (true) { - auto plan = talloc.balance_tablets(stm.get()->get_new_strong()).get0(); + auto plan = talloc.balance_tablets(stm.get()).get0(); if (plan.empty()) { break; } - stm.mutate_token_metadata([&] (token_metadata& tm) { - apply_plan(*tm.get_new(), plan); + stm.mutate_token_metadata([&] (token_metadata2& tm) { + apply_plan(tm, plan); return make_ready_future<>(); }).get(); } @@ -636,12 +636,12 @@ void rebalance_tablets(tablet_allocator& talloc, shared_token_metadata& stm) { static void rebalance_tablets_as_in_progress(tablet_allocator& talloc, shared_token_metadata& stm) { while (true) { - auto plan = talloc.balance_tablets(stm.get()->get_new_strong()).get0(); + auto plan = talloc.balance_tablets(stm.get()).get0(); if (plan.empty()) { break; } - stm.mutate_token_metadata([&] (token_metadata& tm) { - apply_plan_as_in_progress(*tm.get_new(), plan); + stm.mutate_token_metadata([&] (token_metadata2& tm) { + apply_plan_as_in_progress(tm, plan); return make_ready_future<>(); }).get(); } @@ -650,8 +650,8 @@ void rebalance_tablets_as_in_progress(tablet_allocator& talloc, shared_token_met // Completes any in progress tablet migrations. static void execute_transitions(shared_token_metadata& stm) { - stm.mutate_token_metadata([&] (token_metadata& tm) { - for (auto&& [tablet, tmap_] : tm.get_new()->tablets().all_tables()) { + stm.mutate_token_metadata([&] (token_metadata2& tm) { + for (auto&& [tablet, tmap_] : tm.tablets().all_tables()) { auto& tmap = tmap_; for (auto&& [tablet, trinfo]: tmap.transitions()) { auto ti = tmap.get_tablet_info(tablet); @@ -689,13 +689,13 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_empty_node) { } }); - stm.mutate_token_metadata([&] (auto& tm) { - tm.get_new()->update_host_id(host1, ip1); - tm.get_new()->update_host_id(host2, ip2); - tm.get_new()->update_host_id(host3, ip3); - tm.get_new()->update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); - tm.get_new()->update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); - tm.get_new()->update_topology(host3, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + stm.mutate_token_metadata([&] (token_metadata2& tm) { + tm.update_host_id(host1, ip1); + tm.update_host_id(host2, ip2); + tm.update_host_id(host3, ip3); + tm.update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.update_topology(host3, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); tablet_map tmap(4); auto tid = tmap.first_tablet(); @@ -728,13 +728,13 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_empty_node) { }); tablet_metadata tmeta; tmeta.set_tablet_map(table1, std::move(tmap)); - tm.get_new()->set_tablets(std::move(tmeta)); + tm.set_tablets(std::move(tmeta)); return make_ready_future<>(); }).get(); // Sanity check { - load_sketch load(stm.get()->get_new_strong()); + load_sketch load(stm.get()); load.populate().get(); BOOST_REQUIRE_EQUAL(load.get_load(host1), 4); BOOST_REQUIRE_EQUAL(load.get_avg_shard_load(host1), 2); @@ -747,7 +747,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_empty_node) { rebalance_tablets(e.get_tablet_allocator().local(), stm); { - load_sketch load(stm.get()->template get_new_strong()); + load_sketch load(stm.get()); load.populate().get(); for (auto h : {host1, host2, host3}) { @@ -783,15 +783,15 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rf_met) { } }); - stm.mutate_token_metadata([&](auto& tm) { + stm.mutate_token_metadata([&](token_metadata2& tm) { const unsigned shard_count = 2; - tm.get_new()->update_host_id(host1, ip1); - tm.get_new()->update_host_id(host2, ip2); - tm.get_new()->update_host_id(host3, ip3); - tm.get_new()->update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); - tm.get_new()->update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); - tm.get_new()->update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::being_decommissioned, + tm.update_host_id(host1, ip1); + tm.update_host_id(host2, ip2); + tm.update_host_id(host3, ip3); + tm.update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::being_decommissioned, shard_count); tablet_map tmap(4); @@ -825,29 +825,29 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rf_met) { }); tablet_metadata tmeta; tmeta.set_tablet_map(table1, std::move(tmap)); - tm.get_new()->set_tablets(std::move(tmeta)); + tm.set_tablets(std::move(tmeta)); return make_ready_future<>(); }).get(); rebalance_tablets(e.get_tablet_allocator().local(), stm); { - load_sketch load(stm.get()->get_new_strong()); + load_sketch load(stm.get()); load.populate().get(); BOOST_REQUIRE(load.get_avg_shard_load(host1) == 2); BOOST_REQUIRE(load.get_avg_shard_load(host2) == 2); BOOST_REQUIRE(load.get_avg_shard_load(host3) == 0); } - stm.mutate_token_metadata([&](auto& tm) { - tm.get_new()->update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::left); + stm.mutate_token_metadata([&](token_metadata2& tm) { + tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::left); return make_ready_future<>(); }).get(); rebalance_tablets(e.get_tablet_allocator().local(), stm); { - load_sketch load(stm.get()->get_new_strong()); + load_sketch load(stm.get()); load.populate().get(); BOOST_REQUIRE(load.get_avg_shard_load(host1) == 2); BOOST_REQUIRE(load.get_avg_shard_load(host2) == 2); @@ -885,17 +885,17 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_two_racks) { } }); - stm.mutate_token_metadata([&](auto& tm) { + stm.mutate_token_metadata([&](token_metadata2& tm) { const unsigned shard_count = 1; - tm.get_new()->update_host_id(host1, ip1); - tm.get_new()->update_host_id(host2, ip2); - tm.get_new()->update_host_id(host3, ip3); - tm.get_new()->update_host_id(host4, ip4); - tm.get_new()->update_topology(host1, racks[0], std::nullopt, shard_count); - tm.get_new()->update_topology(host2, racks[1], std::nullopt, shard_count); - tm.get_new()->update_topology(host3, racks[0], std::nullopt, shard_count); - tm.get_new()->update_topology(host4, racks[1], node::state::being_decommissioned, + tm.update_host_id(host1, ip1); + tm.update_host_id(host2, ip2); + tm.update_host_id(host3, ip3); + tm.update_host_id(host4, ip4); + tm.update_topology(host1, racks[0], std::nullopt, shard_count); + tm.update_topology(host2, racks[1], std::nullopt, shard_count); + tm.update_topology(host3, racks[0], std::nullopt, shard_count); + tm.update_topology(host4, racks[1], node::state::being_decommissioned, shard_count); tablet_map tmap(4); @@ -929,14 +929,14 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_two_racks) { }); tablet_metadata tmeta; tmeta.set_tablet_map(table1, std::move(tmap)); - tm.get_new()->set_tablets(std::move(tmeta)); + tm.set_tablets(std::move(tmeta)); return make_ready_future<>(); }).get(); rebalance_tablets(e.get_tablet_allocator().local(), stm); { - load_sketch load(stm.get()->get_new_strong()); + load_sketch load(stm.get()); load.populate().get(); BOOST_REQUIRE(load.get_avg_shard_load(host1) >= 2); BOOST_REQUIRE(load.get_avg_shard_load(host2) >= 2); @@ -947,10 +947,10 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_two_racks) { // Verify replicas are not collocated on racks { auto tm = stm.get(); - auto& tmap = tm->get_new()->tablets().get_tablet_map(table1); + auto& tmap = tm->tablets().get_tablet_map(table1); tmap.for_each_tablet([&](auto tid, auto& tinfo) { - auto rack1 = tm->get_new()->get_topology().get_rack(tinfo.replicas[0].host); - auto rack2 = tm->get_new()->get_topology().get_rack(tinfo.replicas[1].host); + auto rack1 = tm->get_topology().get_rack(tinfo.replicas[0].host); + auto rack2 = tm->get_topology().get_rack(tinfo.replicas[1].host); BOOST_REQUIRE(rack1 != rack2); }).get(); } @@ -986,17 +986,17 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rack_load_failure) { } }); - stm.mutate_token_metadata([&](auto& tm) { + stm.mutate_token_metadata([&](token_metadata2& tm) { const unsigned shard_count = 1; - tm.get_new()->update_host_id(host1, ip1); - tm.get_new()->update_host_id(host2, ip2); - tm.get_new()->update_host_id(host3, ip3); - tm.get_new()->update_host_id(host4, ip4); - tm.get_new()->update_topology(host1, racks[0], std::nullopt, shard_count); - tm.get_new()->update_topology(host2, racks[0], std::nullopt, shard_count); - tm.get_new()->update_topology(host3, racks[0], std::nullopt, shard_count); - tm.get_new()->update_topology(host4, racks[1], node::state::being_decommissioned, + tm.update_host_id(host1, ip1); + tm.update_host_id(host2, ip2); + tm.update_host_id(host3, ip3); + tm.update_host_id(host4, ip4); + tm.update_topology(host1, racks[0], std::nullopt, shard_count); + tm.update_topology(host2, racks[0], std::nullopt, shard_count); + tm.update_topology(host3, racks[0], std::nullopt, shard_count); + tm.update_topology(host4, racks[1], node::state::being_decommissioned, shard_count); tablet_map tmap(4); @@ -1030,7 +1030,7 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rack_load_failure) { }); tablet_metadata tmeta; tmeta.set_tablet_map(table1, std::move(tmap)); - tm.get_new()->set_tablets(std::move(tmeta)); + tm.set_tablets(std::move(tmeta)); return make_ready_future<>(); }).get(); @@ -1060,15 +1060,15 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rf_not_met) { } }); - stm.mutate_token_metadata([&](auto& tm) { + stm.mutate_token_metadata([&](token_metadata2& tm) { const unsigned shard_count = 2; - tm.get_new()->update_host_id(host1, ip1); - tm.get_new()->update_host_id(host2, ip2); - tm.get_new()->update_host_id(host3, ip3); - tm.get_new()->update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); - tm.get_new()->update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); - tm.get_new()->update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::being_decommissioned, + tm.update_host_id(host1, ip1); + tm.update_host_id(host2, ip2); + tm.update_host_id(host3, ip3); + tm.update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::being_decommissioned, shard_count); tablet_map tmap(1); @@ -1082,7 +1082,7 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rf_not_met) { }); tablet_metadata tmeta; tmeta.set_tablet_map(table1, std::move(tmap)); - tm.get_new()->set_tablets(std::move(tmeta)); + tm.set_tablets(std::move(tmeta)); return make_ready_future<>(); }).get(); @@ -1117,13 +1117,13 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_works_with_in_progress_transitions) } }); - stm.mutate_token_metadata([&] (auto& tm) { - tm.get_new()->update_host_id(host1, ip1); - tm.get_new()->update_host_id(host2, ip2); - tm.get_new()->update_host_id(host3, ip3); - tm.get_new()->update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, 1); - tm.get_new()->update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, 1); - tm.get_new()->update_topology(host3, locator::endpoint_dc_rack::default_location, std::nullopt, 2); + stm.mutate_token_metadata([&] (token_metadata2& tm) { + tm.update_host_id(host1, ip1); + tm.update_host_id(host2, ip2); + tm.update_host_id(host3, ip3); + tm.update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, 1); + tm.update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, 1); + tm.update_topology(host3, locator::endpoint_dc_rack::default_location, std::nullopt, 2); tablet_map tmap(4); std::optional tid = tmap.first_tablet(); @@ -1146,7 +1146,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_works_with_in_progress_transitions) }); tablet_metadata tmeta; tmeta.set_tablet_map(table1, std::move(tmap)); - tm.get_new()->set_tablets(std::move(tmeta)); + tm.set_tablets(std::move(tmeta)); return make_ready_future<>(); }).get(); @@ -1154,7 +1154,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_works_with_in_progress_transitions) execute_transitions(stm); { - load_sketch load(stm.get()->get_new_strong()); + load_sketch load(stm.get()); load.populate().get(); for (auto h : {host1, host2, host3}) { @@ -1186,13 +1186,13 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancer_shuffle_mode) { } }); - stm.mutate_token_metadata([&] (auto& tm) { - tm.get_new()->update_host_id(host1, ip1); - tm.get_new()->update_host_id(host2, ip2); - tm.get_new()->update_host_id(host3, ip3); - tm.get_new()->update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, 1); - tm.get_new()->update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, 1); - tm.get_new()->update_topology(host3, locator::endpoint_dc_rack::default_location, std::nullopt, 2); + stm.mutate_token_metadata([&] (token_metadata2& tm) { + tm.update_host_id(host1, ip1); + tm.update_host_id(host2, ip2); + tm.update_host_id(host3, ip3); + tm.update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, 1); + tm.update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, 1); + tm.update_topology(host3, locator::endpoint_dc_rack::default_location, std::nullopt, 2); tablet_map tmap(4); std::optional tid = tmap.first_tablet(); @@ -1207,20 +1207,20 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancer_shuffle_mode) { } tablet_metadata tmeta; tmeta.set_tablet_map(table1, std::move(tmap)); - tm.get_new()->set_tablets(std::move(tmeta)); + tm.set_tablets(std::move(tmeta)); return make_ready_future<>(); }).get(); rebalance_tablets(e.get_tablet_allocator().local(), stm); - BOOST_REQUIRE(e.get_tablet_allocator().local().balance_tablets(stm.get()->get_new_strong()).get0().empty()); + BOOST_REQUIRE(e.get_tablet_allocator().local().balance_tablets(stm.get()).get0().empty()); utils::get_local_injector().enable("tablet_allocator_shuffle"); auto disable_injection = seastar::defer([&] { utils::get_local_injector().disable("tablet_allocator_shuffle"); }); - BOOST_REQUIRE(!e.get_tablet_allocator().local().balance_tablets(stm.get()->get_new_strong()).get0().empty()); + BOOST_REQUIRE(!e.get_tablet_allocator().local().balance_tablets(stm.get()).get0().empty()); }).get(); } #endif @@ -1249,15 +1249,15 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_two_empty_nodes) { } }); - stm.mutate_token_metadata([&] (auto& tm) { - tm.get_new()->update_host_id(host1, ip1); - tm.get_new()->update_host_id(host2, ip2); - tm.get_new()->update_host_id(host3, ip3); - tm.get_new()->update_host_id(host4, ip4); - tm.get_new()->update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); - tm.get_new()->update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); - tm.get_new()->update_topology(host3, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); - tm.get_new()->update_topology(host4, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + stm.mutate_token_metadata([&] (token_metadata2& tm) { + tm.update_host_id(host1, ip1); + tm.update_host_id(host2, ip2); + tm.update_host_id(host3, ip3); + tm.update_host_id(host4, ip4); + tm.update_topology(host1, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.update_topology(host2, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.update_topology(host3, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.update_topology(host4, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); tablet_map tmap(16); for (auto tid : tmap.tablet_ids()) { @@ -1270,14 +1270,14 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_two_empty_nodes) { } tablet_metadata tmeta; tmeta.set_tablet_map(table1, std::move(tmap)); - tm.get_new()->set_tablets(std::move(tmeta)); + tm.set_tablets(std::move(tmeta)); return make_ready_future<>(); }).get(); rebalance_tablets(e.get_tablet_allocator().local(), stm); { - load_sketch load(stm.get()->get_new_strong()); + load_sketch load(stm.get()); load.populate().get(); for (auto h : {host1, host2, host3, host4}) { @@ -1405,16 +1405,16 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_random_load) { }); size_t total_tablet_count = 0; - stm.mutate_token_metadata([&](auto& tm) { + stm.mutate_token_metadata([&](token_metadata2& tm) { tablet_metadata tmeta; int i = 0; for (auto h : hosts) { auto ip = inet_address(format("192.168.0.{}", ++i)); auto shard_count = 2; - tm.get_new()->update_host_id(h, ip); + tm.update_host_id(h, ip); auto rack = racks[i % racks.size()]; - tm.get_new()->update_topology(h, rack, std::nullopt, shard_count); + tm.update_topology(h, rack, std::nullopt, shard_count); if (h != hosts[0]) { // Leave the first host empty by making it invisible to allocation algorithm. hosts_by_rack[rack.rack].push_back(h); @@ -1445,7 +1445,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_random_load) { } tablet_replica_set replicas; for (auto h : replica_hosts) { - auto shard_count = tm.get_new()->get_topology().find_node(h)->get_shard_count(); + auto shard_count = tm.get_topology().find_node(h)->get_shard_count(); auto shard = tests::random::get_int(0, shard_count - 1); replicas.push_back(tablet_replica {h, shard}); } @@ -1454,17 +1454,17 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_random_load) { total_tablet_count += tmap.tablet_count(); tmeta.set_tablet_map(table, std::move(tmap)); } - tm.get_new()->set_tablets(std::move(tmeta)); + tm.set_tablets(std::move(tmeta)); return make_ready_future<>(); }).get(); - testlog.debug("tablet metadata: {}", stm.get()->get_new()->tablets()); + testlog.debug("tablet metadata: {}", stm.get()->tablets()); testlog.info("Total tablet count: {}, hosts: {}", total_tablet_count, hosts.size()); rebalance_tablets(e.get_tablet_allocator().local(), stm); { - load_sketch load(stm.get()->get_new_strong()); + load_sketch load(stm.get()); load.populate().get(); min_max_tracker min_max_load; @@ -1474,7 +1474,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_random_load) { min_max_load.update(l); } - testlog.debug("tablet metadata: {}", stm.get()->get_new()->tablets()); + testlog.debug("tablet metadata: {}", stm.get()->tablets()); testlog.debug("Min load: {}, max load: {}", min_max_load.min(), min_max_load.max()); // FIXME: The algorithm cannot achieve balance in all cases yet, so we only check that it stops. diff --git a/test/lib/cql_test_env.cc b/test/lib/cql_test_env.cc index 0f6321ef25..13f3038796 100644 --- a/test/lib/cql_test_env.cc +++ b/test/lib/cql_test_env.cc @@ -640,17 +640,10 @@ private: host_id = linfo.host_id; _sys_ks.local().save_local_info(std::move(linfo), _snitch.local()->get_location(), my_address, my_address).get(); } - locator::shared_token_metadata::mutate_on_all_shards(_token_metadata, [hostid = host_id, &cfg_in] (locator::token_metadata& tm) { + locator::shared_token_metadata::mutate_on_all_shards(_token_metadata, [hostid = host_id, &cfg_in] (locator::token_metadata2& tm) { auto& topo = tm.get_topology(); topo.set_host_id_cfg(hostid); topo.add_or_update_endpoint(cfg_in.broadcast_address, - hostid, - std::nullopt, - locator::node::state::normal, - smp::count); - auto& topo_new = tm.get_new()->get_topology(); - topo_new.set_host_id_cfg(hostid); - topo_new.add_or_update_endpoint(utils::fb_utilities::get_broadcast_address(), hostid, std::nullopt, locator::node::state::normal, From 7b55ccbd8e8a82b0f75a1a7aedf6131bec4a7dde Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Tue, 7 Nov 2023 17:02:44 +0400 Subject: [PATCH 46/51] token_metadata: drop the template Replace token_metadata2 ->token_metadata, make token_metadata back non-template. No behavior changes, just compilation fixes. --- api/api_init.hh | 5 +- cdc/generation.cc | 8 +- cdc/generation.hh | 4 +- cdc/log.hh | 5 +- cql3/statements/create_keyspace_statement.cc | 2 +- cql3/statements/create_keyspace_statement.hh | 8 +- cql3/statements/ks_prop_defs.cc | 6 +- cql3/statements/ks_prop_defs.hh | 9 +- db/view/view.cc | 4 +- db/view/view_update_checks.hh | 7 +- db/virtual_tables.cc | 2 +- dht/boot_strapper.cc | 6 +- dht/boot_strapper.hh | 14 +- dht/range_streamer.cc | 12 +- dht/range_streamer.hh | 10 +- gms/gossiper.cc | 2 +- gms/gossiper.hh | 2 +- locator/abstract_replication_strategy.cc | 53 +- locator/abstract_replication_strategy.hh | 30 +- locator/everywhere_replication_strategy.cc | 4 +- locator/everywhere_replication_strategy.hh | 4 +- locator/load_sketch.hh | 4 +- locator/local_strategy.cc | 4 +- locator/local_strategy.hh | 4 +- locator/network_topology_strategy.cc | 10 +- locator/network_topology_strategy.hh | 8 +- locator/simple_strategy.cc | 4 +- locator/simple_strategy.hh | 4 +- locator/tablet_metadata_guard.hh | 2 +- locator/tablet_replication_strategy.hh | 4 +- locator/tablet_sharder.hh | 4 +- locator/tablets.cc | 8 +- locator/token_metadata.cc | 545 +++++++------------ locator/token_metadata.hh | 181 ++---- locator/token_metadata_fwd.hh | 8 +- locator/token_range_splitter.hh | 2 +- locator/topology.hh | 1 - locator/util.cc | 2 +- main.cc | 2 +- node_ops/node_ops_ctl.hh | 7 +- repair/repair.cc | 44 +- repair/row_level.cc | 6 +- repair/row_level.hh | 14 +- replica/database.hh | 2 +- service/forward_service.cc | 2 +- service/forward_service.hh | 2 +- service/storage_proxy.cc | 6 +- service/storage_proxy.hh | 2 +- service/storage_service.cc | 89 ++- service/storage_service.hh | 21 +- service/tablet_allocator.cc | 10 +- service/tablet_allocator.hh | 2 +- test/boost/locator_topology_test.cc | 4 +- test/boost/network_topology_strategy_test.cc | 34 +- test/boost/storage_proxy_test.cc | 6 +- test/boost/tablets_test.cc | 32 +- test/boost/token_metadata_test.cc | 6 +- test/lib/cql_test_env.cc | 2 +- 58 files changed, 506 insertions(+), 779 deletions(-) diff --git a/api/api_init.hh b/api/api_init.hh index cff8089a8f..c2065dc28b 100644 --- a/api/api_init.hh +++ b/api/api_init.hh @@ -39,10 +39,7 @@ namespace gms { namespace locator { -template -class generic_token_metadata; -using token_metadata = generic_token_metadata; -using token_metadata2 = generic_token_metadata; +class token_metadata; class shared_token_metadata; class snitch_ptr; diff --git a/cdc/generation.cc b/cdc/generation.cc index 2946872a4e..6a06431283 100644 --- a/cdc/generation.cc +++ b/cdc/generation.cc @@ -192,7 +192,7 @@ bool should_propose_first_generation(const gms::inet_address& me, const gms::gos }) == stop_iteration::no; } -bool is_cdc_generation_optimal(const cdc::topology_description& gen, const locator::token_metadata2& tm) { +bool is_cdc_generation_optimal(const cdc::topology_description& gen, const locator::token_metadata& tm) { if (tm.sorted_tokens().size() != gen.entries().size()) { // We probably have garbage streams from old generations cdc_log.info("Generation size does not match the token ring"); @@ -324,7 +324,7 @@ topology_description limit_number_of_streams_if_needed(topology_description&& de } // Compute a set of tokens that split the token ring into vnodes. -static auto get_tokens(const std::unordered_set& bootstrap_tokens, const locator::token_metadata2_ptr tmptr) { +static auto get_tokens(const std::unordered_set& bootstrap_tokens, const locator::token_metadata_ptr tmptr) { auto tokens = tmptr->sorted_tokens(); auto it = tokens.insert(tokens.end(), bootstrap_tokens.begin(), bootstrap_tokens.end()); std::sort(it, tokens.end()); @@ -352,7 +352,7 @@ static token_range_description create_token_range_description( cdc::topology_description make_new_generation_description( const std::unordered_set& bootstrap_tokens, const noncopyable_function(dht::token)>& get_sharding_info, - const locator::token_metadata2_ptr tmptr) { + const locator::token_metadata_ptr tmptr) { const auto tokens = get_tokens(bootstrap_tokens, tmptr); utils::chunked_vector vnode_descriptions; @@ -378,7 +378,7 @@ db_clock::time_point new_generation_timestamp(bool add_delay, std::chrono::milli } future generation_service::legacy_make_new_generation(const std::unordered_set& bootstrap_tokens, bool add_delay) { - const locator::token_metadata2_ptr tmptr = _token_metadata.get(); + const locator::token_metadata_ptr tmptr = _token_metadata.get(); // Fetch sharding parameters for a node that owns vnode ending with this token // using gossiped application states. diff --git a/cdc/generation.hh b/cdc/generation.hh index d61a800611..71d5a09637 100644 --- a/cdc/generation.hh +++ b/cdc/generation.hh @@ -137,7 +137,7 @@ bool should_propose_first_generation(const gms::inet_address& me, const gms::gos * Checks if the CDC generation is optimal, which is true if its `topology_description` is consistent * with `token_metadata`. */ -bool is_cdc_generation_optimal(const cdc::topology_description& gen, const locator::token_metadata2& tm); +bool is_cdc_generation_optimal(const cdc::topology_description& gen, const locator::token_metadata& tm); /* * Generate a set of CDC stream identifiers such that for each shard @@ -157,7 +157,7 @@ bool is_cdc_generation_optimal(const cdc::topology_description& gen, const locat cdc::topology_description make_new_generation_description( const std::unordered_set& bootstrap_tokens, const noncopyable_function (dht::token)>& get_sharding_info, - const locator::token_metadata2_ptr); + const locator::token_metadata_ptr); db_clock::time_point new_generation_timestamp(bool add_delay, std::chrono::milliseconds ring_delay); diff --git a/cdc/log.hh b/cdc/log.hh index d81f98ea2f..a796729dcd 100644 --- a/cdc/log.hh +++ b/cdc/log.hh @@ -36,10 +36,7 @@ using schema_ptr = seastar::lw_shared_ptr; namespace locator { -template -class generic_token_metadata; -using token_metadata = generic_token_metadata; -using token_metadata2 = generic_token_metadata; +class token_metadata; } // namespace locator diff --git a/cql3/statements/create_keyspace_statement.cc b/cql3/statements/create_keyspace_statement.cc index eb89dd354e..3f5260cda6 100644 --- a/cql3/statements/create_keyspace_statement.cc +++ b/cql3/statements/create_keyspace_statement.cc @@ -253,7 +253,7 @@ create_keyspace_statement::execute(query_processor& qp, service::query_state& st }); } -lw_shared_ptr create_keyspace_statement::get_keyspace_metadata(const locator::token_metadata2& tm) { +lw_shared_ptr create_keyspace_statement::get_keyspace_metadata(const locator::token_metadata& tm) { _attrs->validate(); return _attrs->as_ks_metadata(_name, tm); } diff --git a/cql3/statements/create_keyspace_statement.hh b/cql3/statements/create_keyspace_statement.hh index 533aeeab2c..509c9cb6e6 100644 --- a/cql3/statements/create_keyspace_statement.hh +++ b/cql3/statements/create_keyspace_statement.hh @@ -17,11 +17,7 @@ namespace locator { -template -class generic_token_metadata; -using token_metadata = generic_token_metadata; -using token_metadata2 = generic_token_metadata; - +class token_metadata; }; namespace data_dictionary { @@ -76,7 +72,7 @@ public: virtual future<::shared_ptr> execute(query_processor& qp, service::query_state& state, const query_options& options, std::optional guard) const override; - lw_shared_ptr get_keyspace_metadata(const locator::token_metadata2& tm); + lw_shared_ptr get_keyspace_metadata(const locator::token_metadata& tm); }; std::vector check_against_restricted_replication_strategies( diff --git a/cql3/statements/ks_prop_defs.cc b/cql3/statements/ks_prop_defs.cc index 7120852a45..0739f7a9f8 100644 --- a/cql3/statements/ks_prop_defs.cc +++ b/cql3/statements/ks_prop_defs.cc @@ -20,7 +20,7 @@ namespace statements { static std::map prepare_options( const sstring& strategy_class, - const locator::token_metadata2& tm, + const locator::token_metadata& tm, std::map options, const std::map& old_options = {}) { options.erase(ks_prop_defs::REPLICATION_STRATEGY_CLASS_KEY); @@ -111,13 +111,13 @@ std::optional ks_prop_defs::get_replication_strategy_class() const { return _strategy_class; } -lw_shared_ptr ks_prop_defs::as_ks_metadata(sstring ks_name, const locator::token_metadata2& tm) { +lw_shared_ptr ks_prop_defs::as_ks_metadata(sstring ks_name, const locator::token_metadata& tm) { auto sc = get_replication_strategy_class().value(); return data_dictionary::keyspace_metadata::new_keyspace(ks_name, sc, prepare_options(sc, tm, get_replication_options()), get_boolean(KW_DURABLE_WRITES, true), std::vector{}, get_storage_options()); } -lw_shared_ptr ks_prop_defs::as_ks_metadata_update(lw_shared_ptr old, const locator::token_metadata2& tm) { +lw_shared_ptr ks_prop_defs::as_ks_metadata_update(lw_shared_ptr old, const locator::token_metadata& tm) { std::map options; const auto& old_options = old->strategy_options(); auto sc = get_replication_strategy_class(); diff --git a/cql3/statements/ks_prop_defs.hh b/cql3/statements/ks_prop_defs.hh index 8459997dd0..72b9f86dcd 100644 --- a/cql3/statements/ks_prop_defs.hh +++ b/cql3/statements/ks_prop_defs.hh @@ -26,10 +26,7 @@ namespace gms { } namespace locator { - template - class generic_token_metadata; - using token_metadata = generic_token_metadata; - using token_metadata2 = generic_token_metadata; + class token_metadata; class shared_token_metadata; struct snitch_ptr; class abstract_replication_strategy; @@ -54,8 +51,8 @@ public: std::map get_replication_options() const; std::optional get_replication_strategy_class() const; data_dictionary::storage_options get_storage_options() const; - lw_shared_ptr as_ks_metadata(sstring ks_name, const locator::token_metadata2&); - lw_shared_ptr as_ks_metadata_update(lw_shared_ptr old, const locator::token_metadata2&); + lw_shared_ptr as_ks_metadata(sstring ks_name, const locator::token_metadata&); + lw_shared_ptr as_ks_metadata_update(lw_shared_ptr old, const locator::token_metadata&); #if 0 public KSMetaData asKSMetadataUpdate(KSMetaData old) throws RequestValidationException diff --git a/db/view/view.cc b/db/view/view.cc index f2df589afe..726974fadd 100644 --- a/db/view/view.cc +++ b/db/view/view.cc @@ -2573,7 +2573,7 @@ update_backlog node_update_backlog::add_fetch(unsigned shard, update_backlog bac return std::max(backlog, _max.load(std::memory_order_relaxed)); } -future check_view_build_ongoing(db::system_distributed_keyspace& sys_dist_ks, const locator::token_metadata2& tm, const sstring& ks_name, +future check_view_build_ongoing(db::system_distributed_keyspace& sys_dist_ks, const locator::token_metadata& tm, const sstring& ks_name, const sstring& cf_name) { using view_statuses_type = std::unordered_map; return sys_dist_ks.view_status(ks_name, cf_name).then([&tm] (view_statuses_type&& view_statuses) { @@ -2584,7 +2584,7 @@ future check_view_build_ongoing(db::system_distributed_keyspace& sys_dist_ }); } -future check_needs_view_update_path(db::system_distributed_keyspace& sys_dist_ks, const locator::token_metadata2& tm, const replica::table& t, +future check_needs_view_update_path(db::system_distributed_keyspace& sys_dist_ks, const locator::token_metadata& tm, const replica::table& t, streaming::stream_reason reason) { if (is_internal_keyspace(t.schema()->ks_name())) { return make_ready_future(false); diff --git a/db/view/view_update_checks.hh b/db/view/view_update_checks.hh index 6ee8e9b173..45c9f03f5f 100644 --- a/db/view/view_update_checks.hh +++ b/db/view/view_update_checks.hh @@ -24,15 +24,12 @@ class system_distributed_keyspace; } namespace locator { -template -class generic_token_metadata; -using token_metadata = generic_token_metadata; -using token_metadata2 = generic_token_metadata; +class token_metadata; } namespace db::view { -future check_needs_view_update_path(db::system_distributed_keyspace& sys_dist_ks, const locator::token_metadata2& tm, const replica::table& t, +future check_needs_view_update_path(db::system_distributed_keyspace& sys_dist_ks, const locator::token_metadata& tm, const replica::table& t, streaming::stream_reason reason); } diff --git a/db/virtual_tables.cc b/db/virtual_tables.cc index 2089714cef..a1635148fd 100644 --- a/db/virtual_tables.cc +++ b/db/virtual_tables.cc @@ -65,7 +65,7 @@ public: future<> execute(std::function mutation_sink) override { return _ss.get_ownership().then([&, mutation_sink] (std::map ownership) { - const locator::token_metadata2& tm = _ss.get_token_metadata(); + const locator::token_metadata& tm = _ss.get_token_metadata(); _gossiper.for_each_endpoint_state([&] (const gms::inet_address& endpoint, const gms::endpoint_state&) { mutation m(schema(), partition_key::from_single_value(*schema(), data_value(endpoint).serialize_nonnull())); diff --git a/dht/boot_strapper.cc b/dht/boot_strapper.cc index b9fc52a361..0de4e84e0d 100644 --- a/dht/boot_strapper.cc +++ b/dht/boot_strapper.cc @@ -63,7 +63,7 @@ future<> boot_strapper::bootstrap(streaming::stream_reason reason, gms::gossiper } } -std::unordered_set boot_strapper::get_random_bootstrap_tokens(const token_metadata2_ptr tmptr, size_t num_tokens, dht::check_token_endpoint check) { +std::unordered_set boot_strapper::get_random_bootstrap_tokens(const token_metadata_ptr tmptr, size_t num_tokens, dht::check_token_endpoint check) { if (num_tokens < 1) { throw std::runtime_error("num_tokens must be >= 1"); } @@ -77,7 +77,7 @@ std::unordered_set boot_strapper::get_random_bootstrap_tokens(const token return tokens; } -std::unordered_set boot_strapper::get_bootstrap_tokens(const token_metadata2_ptr tmptr, const db::config& cfg, dht::check_token_endpoint check) { +std::unordered_set boot_strapper::get_bootstrap_tokens(const token_metadata_ptr tmptr, const db::config& cfg, dht::check_token_endpoint check) { std::unordered_set initial_tokens; sstring tokens_string = cfg.initial_token(); try { @@ -104,7 +104,7 @@ std::unordered_set boot_strapper::get_bootstrap_tokens(const token_metada return get_random_bootstrap_tokens(tmptr, cfg.num_tokens(), check); } -std::unordered_set boot_strapper::get_random_tokens(const token_metadata2_ptr tmptr, size_t num_tokens) { +std::unordered_set boot_strapper::get_random_tokens(const token_metadata_ptr tmptr, size_t num_tokens) { std::unordered_set tokens; while (tokens.size() < num_tokens) { auto token = dht::token::get_random_token(); diff --git a/dht/boot_strapper.hh b/dht/boot_strapper.hh index 5f88257f05..61ec72f136 100644 --- a/dht/boot_strapper.hh +++ b/dht/boot_strapper.hh @@ -29,9 +29,7 @@ using check_token_endpoint = bool_class; class boot_strapper { using inet_address = gms::inet_address; using token_metadata = locator::token_metadata; - using token_metadata2 = locator::token_metadata2; using token_metadata_ptr = locator::token_metadata_ptr; - using token_metadata2_ptr = locator::token_metadata2_ptr; using token = dht::token; distributed& _db; sharded& _stream_manager; @@ -42,10 +40,10 @@ class boot_strapper { locator::endpoint_dc_rack _dr; /* token of the node being bootstrapped. */ std::unordered_set _tokens; - const locator::token_metadata2_ptr _token_metadata_ptr; + const locator::token_metadata_ptr _token_metadata_ptr; public: boot_strapper(distributed& db, sharded& sm, abort_source& abort_source, - locator::host_id addr, locator::endpoint_dc_rack dr, std::unordered_set tokens, const token_metadata2_ptr tmptr) + locator::host_id addr, locator::endpoint_dc_rack dr, std::unordered_set tokens, const token_metadata_ptr tmptr) : _db(db) , _stream_manager(sm) , _abort_source(abort_source) @@ -62,14 +60,14 @@ public: * otherwise, if num_tokens == 1, pick a token to assume half the load of the most-loaded node. * else choose num_tokens tokens at random */ - static std::unordered_set get_bootstrap_tokens(const token_metadata2_ptr tmptr, const db::config& cfg, check_token_endpoint check); + static std::unordered_set get_bootstrap_tokens(const token_metadata_ptr tmptr, const db::config& cfg, check_token_endpoint check); /** * Same as above but does not consult initialtoken config */ - static std::unordered_set get_random_bootstrap_tokens(const token_metadata2_ptr tmptr, size_t num_tokens, check_token_endpoint check); + static std::unordered_set get_random_bootstrap_tokens(const token_metadata_ptr tmptr, size_t num_tokens, check_token_endpoint check); - static std::unordered_set get_random_tokens(const token_metadata2_ptr tmptr, size_t num_tokens); + static std::unordered_set get_random_tokens(const token_metadata_ptr tmptr, size_t num_tokens); #if 0 public static class StringSerializer implements IVersionedSerializer { @@ -93,7 +91,7 @@ public: #endif private: - const token_metadata2& get_token_metadata() { + const token_metadata& get_token_metadata() { return *_token_metadata_ptr; } }; diff --git a/dht/range_streamer.cc b/dht/range_streamer.cc index f27d3a48e7..37c2e6135b 100644 --- a/dht/range_streamer.cc +++ b/dht/range_streamer.cc @@ -123,14 +123,14 @@ range_streamer::get_all_ranges_with_strict_sources_for(const sstring& keyspace_n auto& strat = erm->get_replication_strategy(); //Active ranges - auto metadata_clone = locator::make_token_metadata2_ptr(get_token_metadata().clone_only_token_map().get0()); - auto range_addresses = strat.get_range_addresses(token_metadata(metadata_clone)).get0(); + auto metadata_clone = get_token_metadata().clone_only_token_map().get0(); + auto range_addresses = strat.get_range_addresses(metadata_clone).get0(); //Pending ranges - metadata_clone->update_topology(_address, _dr); - metadata_clone->update_normal_tokens(_tokens, _address).get(); - auto pending_range_addresses = strat.get_range_addresses(token_metadata(metadata_clone)).get0(); - metadata_clone->clear_gently().get(); + metadata_clone.update_topology(_address, _dr); + metadata_clone.update_normal_tokens(_tokens, _address).get(); + auto pending_range_addresses = strat.get_range_addresses(metadata_clone).get0(); + metadata_clone.clear_gently().get(); //Collects the source that will have its range moved to the new node std::unordered_map> range_sources; diff --git a/dht/range_streamer.hh b/dht/range_streamer.hh index 75ceb1b01b..00c6019f5e 100644 --- a/dht/range_streamer.hh +++ b/dht/range_streamer.hh @@ -37,9 +37,7 @@ class range_streamer { public: using inet_address = gms::inet_address; using token_metadata = locator::token_metadata; - using token_metadata2 = locator::token_metadata2; using token_metadata_ptr = locator::token_metadata_ptr; - using token_metadata2_ptr = locator::token_metadata2_ptr; using stream_plan = streaming::stream_plan; using stream_state = streaming::stream_state; public: @@ -79,7 +77,7 @@ public: } }; - range_streamer(distributed& db, sharded& sm, const token_metadata2_ptr tmptr, abort_source& abort_source, std::unordered_set tokens, + range_streamer(distributed& db, sharded& sm, const token_metadata_ptr tmptr, abort_source& abort_source, std::unordered_set tokens, locator::host_id address, locator::endpoint_dc_rack dr, sstring description, streaming::stream_reason reason, service::frozen_topology_guard topo_guard, std::vector tables = {}) @@ -98,7 +96,7 @@ public: _abort_source.check(); } - range_streamer(distributed& db, sharded& sm, const token_metadata2_ptr tmptr, abort_source& abort_source, + range_streamer(distributed& db, sharded& sm, const token_metadata_ptr tmptr, abort_source& abort_source, locator::host_id address, locator::endpoint_dc_rack dr, sstring description, streaming::stream_reason reason, service::frozen_topology_guard topo_guard, std::vector tables = {}) : range_streamer(db, sm, std::move(tmptr), abort_source, std::unordered_set(), address, std::move(dr), description, reason, std::move(topo_guard), std::move(tables)) { } @@ -147,7 +145,7 @@ private: #endif // Can be called only before stream_async(). - const token_metadata2& get_token_metadata() { + const token_metadata& get_token_metadata() { return *_token_metadata_ptr; } public: @@ -156,7 +154,7 @@ public: private: distributed& _db; sharded& _stream_manager; - token_metadata2_ptr _token_metadata_ptr; + token_metadata_ptr _token_metadata_ptr; abort_source& _abort_source; std::unordered_set _tokens; locator::host_id _address; diff --git a/gms/gossiper.cc b/gms/gossiper.cc index f404b04d28..db346e4a26 100644 --- a/gms/gossiper.cc +++ b/gms/gossiper.cc @@ -2634,7 +2634,7 @@ void gossiper::append_endpoint_state(std::stringstream& ss, const endpoint_state } } -locator::token_metadata2_ptr gossiper::get_token_metadata_ptr() const noexcept { +locator::token_metadata_ptr gossiper::get_token_metadata_ptr() const noexcept { return _shared_token_metadata.get(); } diff --git a/gms/gossiper.hh b/gms/gossiper.hh index 6d6fedd670..dee8d910e9 100644 --- a/gms/gossiper.hh +++ b/gms/gossiper.hh @@ -674,7 +674,7 @@ private: gossip_config _gcfg; // Get features supported by a particular node std::set get_supported_features(inet_address endpoint) const; - locator::token_metadata2_ptr get_token_metadata_ptr() const noexcept; + locator::token_metadata_ptr get_token_metadata_ptr() const noexcept; public: void check_knows_remote_features(std::set& local_features, const std::unordered_map& loaded_peer_features) const; // Get features supported by all the nodes this node knows about diff --git a/locator/abstract_replication_strategy.cc b/locator/abstract_replication_strategy.cc index c173512919..a0ff02d898 100644 --- a/locator/abstract_replication_strategy.cc +++ b/locator/abstract_replication_strategy.cc @@ -19,7 +19,7 @@ namespace locator { -static endpoint_set resolve_endpoints(const host_id_set& host_ids, const token_metadata2& tm) { +static endpoint_set resolve_endpoints(const host_id_set& host_ids, const token_metadata& tm) { endpoint_set result{}; result.reserve(host_ids.size()); for (const auto& host_id: host_ids) { @@ -68,9 +68,9 @@ void abstract_replication_strategy::validate_replication_strategy(const sstring& } } -future abstract_replication_strategy::calculate_natural_ips(const token& search_token, const token_metadata2_ptr& tm) const { - const auto host_ids = co_await calculate_natural_endpoints(search_token, *tm); - co_return resolve_endpoints(host_ids, *tm); +future abstract_replication_strategy::calculate_natural_ips(const token& search_token, const token_metadata& tm) const { + const auto host_ids = co_await calculate_natural_endpoints(search_token, tm); + co_return resolve_endpoints(host_ids, tm); } using strategy_class_registry = class_registry< @@ -87,7 +87,7 @@ inet_address_vector_replica_set vnode_effective_replication_map::get_natural_end return natural_endpoints; } -void maybe_remove_node_being_replaced(const token_metadata2& tm, +void maybe_remove_node_being_replaced(const token_metadata& tm, const abstract_replication_strategy& rs, inet_address_vector_replica_set& natural_endpoints) { if (tm.is_any_node_being_replaced() && @@ -264,10 +264,10 @@ abstract_replication_strategy::get_ranges(locator::host_id ep, token_metadata_pt future abstract_replication_strategy::get_ranges(locator::host_id ep, const token_metadata& tm) const { dht::token_range_vector ret; - if (!tm.get_new()->is_normal_token_owner(ep)) { + if (!tm.is_normal_token_owner(ep)) { co_return ret; } - const auto& sorted_tokens = tm.get_new()->sorted_tokens(); + const auto& sorted_tokens = tm.sorted_tokens(); if (sorted_tokens.empty()) { on_internal_error(rslogger, "Token metadata is empty"); } @@ -279,7 +279,7 @@ abstract_replication_strategy::get_ranges(locator::host_id ep, const token_metad // Using the common path would make the function quadratic in the number of endpoints. should_add = true; } else { - auto eps = co_await calculate_natural_endpoints(tok, *tm.get_new()); + auto eps = co_await calculate_natural_endpoints(tok, tm); should_add = eps.contains(ep); } if (should_add) { @@ -327,7 +327,7 @@ vnode_effective_replication_map::get_primary_ranges_within_dc(inet_address ep) c future> vnode_effective_replication_map::get_range_addresses() const { - const token_metadata2& tm = *_tmptr; + const token_metadata& tm = *_tmptr; std::unordered_map ret; for (auto& t : tm.sorted_tokens()) { dht::token_range_vector ranges = tm.get_primary_ranges_for(t); @@ -342,10 +342,9 @@ vnode_effective_replication_map::get_range_addresses() const { future> abstract_replication_strategy::get_range_addresses(const token_metadata& tm) const { std::unordered_map ret; - auto tm_new = tm.get_new_strong(); - for (auto& t : tm_new->sorted_tokens()) { - dht::token_range_vector ranges = tm_new->get_primary_ranges_for(t); - auto eps = co_await calculate_natural_ips(t, tm_new); + for (auto& t : tm.sorted_tokens()) { + dht::token_range_vector ranges = tm.get_primary_ranges_for(t); + auto eps = co_await calculate_natural_ips(t, tm); for (auto& r : ranges) { ret.emplace(r, eps.get_vector()); } @@ -354,26 +353,26 @@ abstract_replication_strategy::get_range_addresses(const token_metadata& tm) con } future -abstract_replication_strategy::get_pending_address_ranges(const token_metadata2_ptr tmptr, std::unordered_set pending_tokens, locator::host_id pending_address, locator::endpoint_dc_rack dr) const { +abstract_replication_strategy::get_pending_address_ranges(const token_metadata_ptr tmptr, std::unordered_set pending_tokens, locator::host_id pending_address, locator::endpoint_dc_rack dr) const { dht::token_range_vector ret; - auto temp = make_token_metadata2_ptr(co_await tmptr->clone_only_token_map()); - temp->update_topology(pending_address, std::move(dr)); - co_await temp->update_normal_tokens(pending_tokens, pending_address); - for (const auto& t : temp->sorted_tokens()) { - auto eps = co_await calculate_natural_endpoints(t, *temp); + auto temp = co_await tmptr->clone_only_token_map(); + temp.update_topology(pending_address, std::move(dr)); + co_await temp.update_normal_tokens(pending_tokens, pending_address); + for (const auto& t : temp.sorted_tokens()) { + auto eps = co_await calculate_natural_endpoints(t, temp); if (eps.contains(pending_address)) { - dht::token_range_vector r = temp->get_primary_ranges_for(t); + dht::token_range_vector r = temp.get_primary_ranges_for(t); rslogger.debug("get_pending_address_ranges: token={} primary_range={} endpoint={}", t, r, pending_address); ret.insert(ret.end(), r.begin(), r.end()); } } - co_await temp->clear_gently(); + co_await temp.clear_gently(); co_return ret; } static const auto default_replication_map_key = dht::token::from_int64(0); -future calculate_effective_replication_map(replication_strategy_ptr rs, token_metadata2_ptr tmptr) { +future calculate_effective_replication_map(replication_strategy_ptr rs, token_metadata_ptr tmptr) { replication_map replication_map; ring_mapping pending_endpoints; ring_mapping read_endpoints; @@ -441,11 +440,11 @@ future calculate_effective_replicat } } else if (depend_on_token) { for (const auto &t : sorted_tokens) { - auto eps = co_await rs->calculate_natural_ips(t, tmptr); + auto eps = co_await rs->calculate_natural_ips(t, *tmptr); replication_map.emplace(t, std::move(eps).extract_vector()); } } else { - auto eps = co_await rs->calculate_natural_ips(default_replication_map_key, tmptr); + auto eps = co_await rs->calculate_natural_ips(default_replication_map_key, *tmptr); replication_map.emplace(default_replication_map_key, std::move(eps).extract_vector()); } @@ -512,7 +511,7 @@ vnode_effective_replication_map::~vnode_effective_replication_map() { } effective_replication_map::effective_replication_map(replication_strategy_ptr rs, - token_metadata2_ptr tmptr, + token_metadata_ptr tmptr, size_t replication_factor) noexcept : _rs(std::move(rs)) , _tmptr(std::move(tmptr)) @@ -520,11 +519,11 @@ effective_replication_map::effective_replication_map(replication_strategy_ptr rs , _validity_abort_source(std::make_unique()) { } -vnode_effective_replication_map::factory_key vnode_effective_replication_map::make_factory_key(const replication_strategy_ptr& rs, const token_metadata2_ptr& tmptr) { +vnode_effective_replication_map::factory_key vnode_effective_replication_map::make_factory_key(const replication_strategy_ptr& rs, const token_metadata_ptr& tmptr) { return factory_key(rs->get_type(), rs->get_config_options(), tmptr->get_ring_version()); } -future effective_replication_map_factory::create_effective_replication_map(replication_strategy_ptr rs, token_metadata2_ptr tmptr) { +future effective_replication_map_factory::create_effective_replication_map(replication_strategy_ptr rs, token_metadata_ptr tmptr) { // lookup key on local shard auto key = vnode_effective_replication_map::make_factory_key(rs, tmptr); auto erm = find_effective_replication_map(key); diff --git a/locator/abstract_replication_strategy.hh b/locator/abstract_replication_strategy.hh index 99bd927663..dbd35cfe67 100644 --- a/locator/abstract_replication_strategy.hh +++ b/locator/abstract_replication_strategy.hh @@ -103,8 +103,8 @@ public: // is small, that implementation may not yield since by itself it won't cause a reactor stall (assuming practical // cluster sizes and number of tokens per node). The caller is responsible for yielding if they call this function // in a loop. - virtual future calculate_natural_endpoints(const token& search_token, const token_metadata2& tm) const = 0; - future calculate_natural_ips(const token& search_token, const token_metadata2_ptr& tm) const; + virtual future calculate_natural_endpoints(const token& search_token, const token_metadata& tm) const = 0; + future calculate_natural_ips(const token& search_token, const token_metadata& tm) const; virtual ~abstract_replication_strategy() {} static ptr_type create_replication_strategy(const sstring& strategy_name, const replication_strategy_config_options& config_options); @@ -119,7 +119,7 @@ public: virtual void validate_options(const gms::feature_service&) const = 0; virtual std::optional> recognized_options(const topology&) const = 0; - virtual size_t get_replication_factor(const token_metadata2& tm) const = 0; + virtual size_t get_replication_factor(const token_metadata& tm) const = 0; // Decide if the replication strategy allow removing the node being // replaced from the natural endpoints when a node is being replaced in the // cluster. LocalStrategy is the not allowed to do so because it always @@ -155,7 +155,7 @@ public: // Caller must ensure that token_metadata will not change throughout the call. future> get_range_addresses(const token_metadata& tm) const; - future get_pending_address_ranges(const token_metadata2_ptr tmptr, std::unordered_set pending_tokens, locator::host_id pending_address, locator::endpoint_dc_rack dr) const; + future get_pending_address_ranges(const token_metadata_ptr tmptr, std::unordered_set pending_tokens, locator::host_id pending_address, locator::endpoint_dc_rack dr) const; }; using ring_mapping = boost::icl::interval_map>; @@ -175,17 +175,17 @@ using mutable_replication_strategy_ptr = seastar::shared_ptr _validity_abort_source; public: - effective_replication_map(replication_strategy_ptr, token_metadata2_ptr, size_t replication_factor) noexcept; + effective_replication_map(replication_strategy_ptr, token_metadata_ptr, size_t replication_factor) noexcept; effective_replication_map(effective_replication_map&&) noexcept = default; virtual ~effective_replication_map() = default; const abstract_replication_strategy& get_replication_strategy() const noexcept { return *_rs; } - const token_metadata2& get_token_metadata() const noexcept { return *_tmptr; } - const token_metadata2_ptr& get_token_metadata_ptr() const noexcept { return _tmptr; } + const token_metadata& get_token_metadata() const noexcept { return *_tmptr; } + const token_metadata_ptr& get_token_metadata_ptr() const noexcept { return _tmptr; } const topology& get_topology() const noexcept { return _tmptr->get_topology(); } size_t get_replication_factor() const noexcept { return _replication_factor; } @@ -255,7 +255,7 @@ protected: } public: virtual ~per_table_replication_strategy() = default; - virtual effective_replication_map_ptr make_replication_map(table_id, token_metadata2_ptr) const = 0; + virtual effective_replication_map_ptr make_replication_map(table_id, token_metadata_ptr) const = 0; }; // Holds the full replication_map resulting from applying the @@ -302,7 +302,7 @@ public: // effective_replication_map std::unique_ptr make_splitter() const override; const dht::sharder& get_sharder(const schema& s) const override; public: - explicit vnode_effective_replication_map(replication_strategy_ptr rs, token_metadata2_ptr tmptr, replication_map replication_map, + explicit vnode_effective_replication_map(replication_strategy_ptr rs, token_metadata_ptr tmptr, replication_map replication_map, ring_mapping pending_endpoints, ring_mapping read_endpoints, size_t replication_factor) noexcept : effective_replication_map(std::move(rs), std::move(tmptr), replication_factor) , _replication_map(std::move(replication_map)) @@ -357,7 +357,7 @@ private: const inet_address_vector_replica_set& do_get_natural_endpoints(const token& tok, bool is_vnode) const; public: - static factory_key make_factory_key(const replication_strategy_ptr& rs, const token_metadata2_ptr& tmptr); + static factory_key make_factory_key(const replication_strategy_ptr& rs, const token_metadata_ptr& tmptr); const factory_key& get_factory_key() const noexcept { return *_factory_key; @@ -382,7 +382,7 @@ using mutable_vnode_effective_replication_map_ptr = shared_ptr( std::move(rs), std::move(tmptr), std::move(replication_map), @@ -390,7 +390,7 @@ inline mutable_vnode_erm_ptr make_effective_replication_map(replication_strategy } // Apply the replication strategy over the current configuration and the given token_metadata. -future calculate_effective_replication_map(replication_strategy_ptr rs, token_metadata2_ptr tmptr); +future calculate_effective_replication_map(replication_strategy_ptr rs, token_metadata_ptr tmptr); // Class to hold a coherent view of a keyspace // effective replication map on all shards @@ -478,7 +478,7 @@ public: // vnode_effective_replication_map for the local shard. // // Therefore create should be called first on shard 0, then on all other shards. - future create_effective_replication_map(replication_strategy_ptr rs, token_metadata2_ptr tmptr); + future create_effective_replication_map(replication_strategy_ptr rs, token_metadata_ptr tmptr); future<> stop() noexcept; @@ -497,7 +497,7 @@ private: friend class vnode_effective_replication_map; }; -void maybe_remove_node_being_replaced(const token_metadata2&, +void maybe_remove_node_being_replaced(const token_metadata&, const abstract_replication_strategy&, inet_address_vector_replica_set& natural_endpoints); diff --git a/locator/everywhere_replication_strategy.cc b/locator/everywhere_replication_strategy.cc index cb25cb5b47..45cb60c27f 100644 --- a/locator/everywhere_replication_strategy.cc +++ b/locator/everywhere_replication_strategy.cc @@ -20,7 +20,7 @@ everywhere_replication_strategy::everywhere_replication_strategy(const replicati _natural_endpoints_depend_on_token = false; } -future everywhere_replication_strategy::calculate_natural_endpoints(const token& search_token, const token_metadata2& tm) const { +future everywhere_replication_strategy::calculate_natural_endpoints(const token& search_token, const token_metadata& tm) const { if (tm.sorted_tokens().empty()) { host_id_set result{host_id_vector_replica_set({host_id{}})}; return make_ready_future(std::move(result)); @@ -29,7 +29,7 @@ future everywhere_replication_strategy::calculate_natural_endpoints return make_ready_future(host_id_set(all_endpoints.begin(), all_endpoints.end())); } -size_t everywhere_replication_strategy::get_replication_factor(const token_metadata2& tm) const { +size_t everywhere_replication_strategy::get_replication_factor(const token_metadata& tm) const { return tm.sorted_tokens().empty() ? 1 : tm.count_normal_token_owners(); } diff --git a/locator/everywhere_replication_strategy.hh b/locator/everywhere_replication_strategy.hh index f6de1823bc..a67c584d2c 100644 --- a/locator/everywhere_replication_strategy.hh +++ b/locator/everywhere_replication_strategy.hh @@ -18,7 +18,7 @@ class everywhere_replication_strategy : public abstract_replication_strategy { public: everywhere_replication_strategy(const replication_strategy_config_options& config_options); - virtual future calculate_natural_endpoints(const token& search_token, const token_metadata2& tm) const override; + virtual future calculate_natural_endpoints(const token& search_token, const token_metadata& tm) const override; virtual void validate_options(const gms::feature_service&) const override { /* noop */ } @@ -27,7 +27,7 @@ public: return std::nullopt; } - virtual size_t get_replication_factor(const token_metadata2& tm) const override; + virtual size_t get_replication_factor(const token_metadata& tm) const override; virtual bool allow_remove_node_being_replaced_from_natural_endpoints() const override { return true; diff --git a/locator/load_sketch.hh b/locator/load_sketch.hh index fb663ad5aa..df4ac4bbfe 100644 --- a/locator/load_sketch.hh +++ b/locator/load_sketch.hh @@ -56,7 +56,7 @@ class load_sketch { } }; std::unordered_map _nodes; - token_metadata2_ptr _tm; + token_metadata_ptr _tm; private: tablet_replica_set get_replicas_for_tablet_load(const tablet_info& ti, const tablet_transition_info* trinfo) const { // We reflect migrations in the load as if they already happened, @@ -65,7 +65,7 @@ private: } public: - load_sketch(token_metadata2_ptr tm) + load_sketch(token_metadata_ptr tm) : _tm(std::move(tm)) { } diff --git a/locator/local_strategy.cc b/locator/local_strategy.cc index a0539b2888..60409235af 100644 --- a/locator/local_strategy.cc +++ b/locator/local_strategy.cc @@ -18,7 +18,7 @@ local_strategy::local_strategy(const replication_strategy_config_options& config _natural_endpoints_depend_on_token = false; } -future local_strategy::calculate_natural_endpoints(const token& t, const token_metadata2& tm) const { +future local_strategy::calculate_natural_endpoints(const token& t, const token_metadata& tm) const { return make_ready_future(host_id_set{host_id{}}); } @@ -30,7 +30,7 @@ std::optional> local_strategy::recognized_options(co return {}; } -size_t local_strategy::get_replication_factor(const token_metadata2&) const { +size_t local_strategy::get_replication_factor(const token_metadata&) const { return 1; } diff --git a/locator/local_strategy.hh b/locator/local_strategy.hh index ffaa18d493..b4b1660e0e 100644 --- a/locator/local_strategy.hh +++ b/locator/local_strategy.hh @@ -25,9 +25,9 @@ class local_strategy : public abstract_replication_strategy { public: local_strategy(const replication_strategy_config_options& config_options); virtual ~local_strategy() {}; - virtual size_t get_replication_factor(const token_metadata2&) const override; + virtual size_t get_replication_factor(const token_metadata&) const override; - virtual future calculate_natural_endpoints(const token& search_token, const token_metadata2& tm) const override; + virtual future calculate_natural_endpoints(const token& search_token, const token_metadata& tm) const override; virtual void validate_options(const gms::feature_service&) const override; diff --git a/locator/network_topology_strategy.cc b/locator/network_topology_strategy.cc index 01e167f048..fc9cd90572 100644 --- a/locator/network_topology_strategy.cc +++ b/locator/network_topology_strategy.cc @@ -160,7 +160,7 @@ class natural_endpoints_tracker { } }; - const token_metadata2& _tm; + const token_metadata& _tm; const topology& _tp; std::unordered_map _dc_rep_factor; @@ -189,7 +189,7 @@ class natural_endpoints_tracker { size_t _dcs_to_fill; public: - natural_endpoints_tracker(const token_metadata2& tm, const std::unordered_map& dc_rep_factor) + natural_endpoints_tracker(const token_metadata& tm, const std::unordered_map& dc_rep_factor) : _tm(tm) , _tp(_tm.get_topology()) , _dc_rep_factor(dc_rep_factor) @@ -239,7 +239,7 @@ public: future network_topology_strategy::calculate_natural_endpoints( - const token& search_token, const token_metadata2& tm) const { + const token& search_token, const token_metadata& tm) const { natural_endpoints_tracker tracker(tm, _dc_rep_factor); @@ -281,14 +281,14 @@ std::optional> network_topology_strategy::recognized return opts; } -effective_replication_map_ptr network_topology_strategy::make_replication_map(table_id table, token_metadata2_ptr tm) const { +effective_replication_map_ptr network_topology_strategy::make_replication_map(table_id table, token_metadata_ptr tm) const { if (!uses_tablets()) { on_internal_error(rslogger, format("make_replication_map() called for table {} but replication strategy not configured to use tablets", table)); } return do_make_replication_map(table, shared_from_this(), std::move(tm), _rep_factor); } -future network_topology_strategy::allocate_tablets_for_new_table(schema_ptr s, token_metadata2_ptr tm) const { +future network_topology_strategy::allocate_tablets_for_new_table(schema_ptr s, token_metadata_ptr tm) const { auto tablet_count = get_initial_tablets(); auto aligned_tablet_count = 1ul << log2ceil(tablet_count); if (tablet_count != aligned_tablet_count) { diff --git a/locator/network_topology_strategy.hh b/locator/network_topology_strategy.hh index 3ce065e98b..b5f1d118ff 100644 --- a/locator/network_topology_strategy.hh +++ b/locator/network_topology_strategy.hh @@ -25,7 +25,7 @@ public: network_topology_strategy( const replication_strategy_config_options& config_options); - virtual size_t get_replication_factor(const token_metadata2&) const override { + virtual size_t get_replication_factor(const token_metadata&) const override { return _rep_factor; } @@ -43,15 +43,15 @@ public: } public: // tablet_aware_replication_strategy - virtual effective_replication_map_ptr make_replication_map(table_id, token_metadata2_ptr) const override; - virtual future allocate_tablets_for_new_table(schema_ptr, token_metadata2_ptr) const override; + virtual effective_replication_map_ptr make_replication_map(table_id, token_metadata_ptr) const override; + virtual future allocate_tablets_for_new_table(schema_ptr, token_metadata_ptr) const override; protected: /** * calculate endpoints in one pass through the tokens by tracking our * progress in each DC, rack etc. */ virtual future calculate_natural_endpoints( - const token& search_token, const token_metadata2& tm) const override; + const token& search_token, const token_metadata& tm) const override; virtual void validate_options(const gms::feature_service&) const override; diff --git a/locator/simple_strategy.cc b/locator/simple_strategy.cc index 22b9f780a7..17730ed2ef 100644 --- a/locator/simple_strategy.cc +++ b/locator/simple_strategy.cc @@ -33,7 +33,7 @@ simple_strategy::simple_strategy(const replication_strategy_config_options& conf } } -future simple_strategy::calculate_natural_endpoints(const token& t, const token_metadata2& tm) const { +future simple_strategy::calculate_natural_endpoints(const token& t, const token_metadata& tm) const { const std::vector& tokens = tm.sorted_tokens(); if (tokens.empty()) { @@ -63,7 +63,7 @@ future simple_strategy::calculate_natural_endpoints(const token& t, co_return endpoints; } -size_t simple_strategy::get_replication_factor(const token_metadata2&) const { +size_t simple_strategy::get_replication_factor(const token_metadata&) const { return _replication_factor; } diff --git a/locator/simple_strategy.hh b/locator/simple_strategy.hh index cb75372048..a4cacccebf 100644 --- a/locator/simple_strategy.hh +++ b/locator/simple_strategy.hh @@ -19,14 +19,14 @@ class simple_strategy : public abstract_replication_strategy { public: simple_strategy(const replication_strategy_config_options& config_options); virtual ~simple_strategy() {}; - virtual size_t get_replication_factor(const token_metadata2& tm) const override; + virtual size_t get_replication_factor(const token_metadata& tm) const override; virtual void validate_options(const gms::feature_service&) const override; virtual std::optional> recognized_options(const topology&) const override; virtual bool allow_remove_node_being_replaced_from_natural_endpoints() const override { return true; } - virtual future calculate_natural_endpoints(const token& search_token, const token_metadata2& tm) const override; + virtual future calculate_natural_endpoints(const token& search_token, const token_metadata& tm) const override; private: size_t _replication_factor = 1; }; diff --git a/locator/tablet_metadata_guard.hh b/locator/tablet_metadata_guard.hh index ca5187d26f..0731ea8faa 100644 --- a/locator/tablet_metadata_guard.hh +++ b/locator/tablet_metadata_guard.hh @@ -45,7 +45,7 @@ public: return _abort_source; } - locator::token_metadata2_ptr get_token_metadata() { + locator::token_metadata_ptr get_token_metadata() { return _erm->get_token_metadata_ptr(); } diff --git a/locator/tablet_replication_strategy.hh b/locator/tablet_replication_strategy.hh index 03257ed0c2..9403cf7c8c 100644 --- a/locator/tablet_replication_strategy.hh +++ b/locator/tablet_replication_strategy.hh @@ -38,13 +38,13 @@ protected: size_t get_initial_tablets() const { return _initial_tablets; } effective_replication_map_ptr do_make_replication_map(table_id, replication_strategy_ptr, - token_metadata2_ptr, + token_metadata_ptr, size_t replication_factor) const; public: /// Generates tablet_map for a new table. /// Runs under group0 guard. - virtual future allocate_tablets_for_new_table(schema_ptr, token_metadata2_ptr) const = 0; + virtual future allocate_tablets_for_new_table(schema_ptr, token_metadata_ptr) const = 0; }; } // namespace locator diff --git a/locator/tablet_sharder.hh b/locator/tablet_sharder.hh index 74a5c1fdd5..b133d272db 100644 --- a/locator/tablet_sharder.hh +++ b/locator/tablet_sharder.hh @@ -17,7 +17,7 @@ namespace locator { /// Implements sharder object which reflects assignment of tablets of a given table to local shards. /// Token ranges which don't have local tablets are reported to belong to shard 0. class tablet_sharder : public dht::sharder { - const token_metadata2& _tm; + const token_metadata& _tm; table_id _table; mutable const tablet_map* _tmap = nullptr; private: @@ -29,7 +29,7 @@ private: } } public: - tablet_sharder(const token_metadata2& tm, table_id table) + tablet_sharder(const token_metadata& tm, table_id table) : _tm(tm) , _table(table) { } diff --git a/locator/tablets.cc b/locator/tablets.cc index 2a145d75fe..ef69433402 100644 --- a/locator/tablets.cc +++ b/locator/tablets.cc @@ -348,7 +348,7 @@ private: public: tablet_effective_replication_map(table_id table, replication_strategy_ptr rs, - token_metadata2_ptr tmptr, + token_metadata_ptr tmptr, size_t replication_factor) : effective_replication_map(std::move(rs), std::move(tmptr), replication_factor) , _table(table) @@ -480,11 +480,11 @@ public: virtual std::unique_ptr make_splitter() const override { class splitter : public token_range_splitter { - token_metadata2_ptr _tmptr; // To keep the tablet map alive. + token_metadata_ptr _tmptr; // To keep the tablet map alive. const tablet_map& _tmap; std::optional _next; public: - splitter(token_metadata2_ptr tmptr, const tablet_map& tmap) + splitter(token_metadata_ptr tmptr, const tablet_map& tmap) : _tmptr(std::move(tmptr)) , _tmap(tmap) { } @@ -548,7 +548,7 @@ std::unordered_set tablet_aware_replication_strategy::recognized_tablet } effective_replication_map_ptr tablet_aware_replication_strategy::do_make_replication_map( - table_id table, replication_strategy_ptr rs, token_metadata2_ptr tm, size_t replication_factor) const { + table_id table, replication_strategy_ptr rs, token_metadata_ptr tm, size_t replication_factor) const { return seastar::make_shared(table, std::move(rs), std::move(tm), replication_factor); } diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index 320ca7ae4b..0b2517e83f 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -27,12 +27,6 @@ namespace locator { static logging::logger tlogger("token_metadata"); -template -inline static constexpr const topology::key_kind kind_for_node_id_type - = std::is_same_v - ? topology::key_kind::inet_address - : topology::key_kind::host_id; - template static void remove_by_value(C& container, V value) { for (auto it = container.begin(); it != container.end();) { @@ -44,7 +38,6 @@ static void remove_by_value(C& container, V value) { } } -template class token_metadata_impl final { private: /** @@ -53,17 +46,17 @@ private: * multiple tokens. Hence, the BiMultiValMap collection. */ // FIXME: have to be BiMultiValMap - std::unordered_map _token_to_endpoint_map; + std::unordered_map _token_to_endpoint_map; // Track the unique set of nodes in _token_to_endpoint_map - std::unordered_set _normal_token_owners; + std::unordered_set _normal_token_owners; - std::unordered_map _bootstrap_tokens; - std::unordered_set _leaving_endpoints; + std::unordered_map _bootstrap_tokens; + std::unordered_set _leaving_endpoints; // The map between the existing node to be replaced and the replacing node - std::unordered_map _replacing_endpoints; + std::unordered_map _replacing_endpoints; - std::optional> _topology_change_info; + std::optional _topology_change_info; std::vector _sorted_tokens; @@ -99,26 +92,26 @@ private: struct shallow_copy {}; public: token_metadata_impl(shallow_copy, const token_metadata_impl& o) noexcept - : _topology(topology::config{}, kind_for_node_id_type) + : _topology(topology::config{}, topology::key_kind::host_id) {} - token_metadata_impl(token_metadata::config cfg) noexcept : _topology(std::move(cfg.topo_cfg), kind_for_node_id_type) {}; + token_metadata_impl(token_metadata::config cfg) noexcept : _topology(std::move(cfg.topo_cfg), topology::key_kind::host_id) {}; token_metadata_impl(const token_metadata_impl&) = delete; // it's too huge for direct copy, use clone_async() token_metadata_impl(token_metadata_impl&&) noexcept = default; const std::vector& sorted_tokens() const; - future<> update_normal_tokens(std::unordered_set tokens, NodeId endpoint); + future<> update_normal_tokens(std::unordered_set tokens, host_id endpoint); const token& first_token(const token& start) const; size_t first_token_index(const token& start) const; - std::optional get_endpoint(const token& token) const; - std::vector get_tokens(const NodeId& addr) const; - const std::unordered_map& get_token_to_endpoint() const { + std::optional get_endpoint(const token& token) const; + std::vector get_tokens(const host_id& addr) const; + const std::unordered_map& get_token_to_endpoint() const { return _token_to_endpoint_map; } - const std::unordered_set& get_leaving_endpoints() const { + const std::unordered_set& get_leaving_endpoints() const { return _leaving_endpoints; } - const std::unordered_map& get_bootstrap_tokens() const { + const std::unordered_map& get_bootstrap_tokens() const { return _bootstrap_tokens; } @@ -138,9 +131,9 @@ public: * * @return The requested range (see the description above) */ - boost::iterator_range::tokens_iterator> ring_range(const token& start) const; + boost::iterator_range ring_range(const token& start) const; - boost::iterator_range::tokens_iterator> ring_range(dht::ring_position_view pos) const; + boost::iterator_range ring_range(dht::ring_position_view pos) const; topology& get_topology() { return _topology; @@ -176,30 +169,30 @@ public: /** @return a copy of the endpoint-to-id map for read-only operations */ std::unordered_map get_endpoint_to_host_id_map_for_reading() const; - void add_bootstrap_token(token t, NodeId endpoint); + void add_bootstrap_token(token t, host_id endpoint); - void add_bootstrap_tokens(std::unordered_set tokens, NodeId endpoint); + void add_bootstrap_tokens(std::unordered_set tokens, host_id endpoint); void remove_bootstrap_tokens(std::unordered_set tokens); - void add_leaving_endpoint(NodeId endpoint); - void del_leaving_endpoint(NodeId endpoint); + void add_leaving_endpoint(host_id endpoint); + void del_leaving_endpoint(host_id endpoint); public: - void remove_endpoint(NodeId endpoint); + void remove_endpoint(host_id endpoint); - bool is_normal_token_owner(NodeId endpoint) const; + bool is_normal_token_owner(host_id endpoint) const; - bool is_leaving(NodeId endpoint) const; + bool is_leaving(host_id endpoint) const; // Is this node being replaced by another node - bool is_being_replaced(NodeId endpoint) const; + bool is_being_replaced(host_id endpoint) const; // Is any node being replaced by another node bool is_any_node_being_replaced() const; - void add_replacing_endpoint(NodeId existing_node, NodeId replacing_node); + void add_replacing_endpoint(host_id existing_node, host_id replacing_node); - void del_replacing_endpoint(NodeId existing_node); + void del_replacing_endpoint(host_id existing_node); public: /** @@ -248,8 +241,8 @@ public: static range interval_to_range(boost::icl::interval::interval_type i); public: - future<> update_topology_change_info(dc_rack_fn& get_dc_rack); - const std::optional>& get_topology_change_info() const { + future<> update_topology_change_info(dc_rack_fn& get_dc_rack); + const std::optional& get_topology_change_info() const { return _topology_change_info; } public: @@ -260,7 +253,7 @@ public: // node that is still joining the cluster, e.g., a node that is still // streaming data before it finishes the bootstrap process and turns into // NORMAL status. - const std::unordered_set& get_all_endpoints() const noexcept { + const std::unordered_set& get_all_endpoints() const noexcept { return _normal_token_owners; } @@ -290,11 +283,11 @@ public: void set_version(token_metadata::version_t version) { if (version <= 0) { on_internal_error(tlogger, - format("token_metadata_impl::set_version: invalid new version {}", version)); + format("token_metadata_impl::set_version: invalid new version {}", version)); } if (version < _version) { on_internal_error(tlogger, - format("token_metadata_impl::set_version: new version can't be smaller than the previous one, " + format("token_metadata_impl::set_version: new version can't be smaller than the previous one, " "new version {}, previous version {}", version, _version)); } _version = version; @@ -303,31 +296,26 @@ public: _version_tracker = std::move(tracker); } - friend class generic_token_metadata; + friend class token_metadata; }; -template -thread_local long token_metadata_impl::_static_ring_version; +thread_local long token_metadata_impl::_static_ring_version; -template -generic_token_metadata::tokens_iterator::tokens_iterator(const token& start, const token_metadata_impl* token_metadata) +token_metadata::tokens_iterator::tokens_iterator(const token& start, const token_metadata_impl* token_metadata) : _token_metadata(token_metadata) { _cur_it = _token_metadata->sorted_tokens().begin() + _token_metadata->first_token_index(start); _remaining = _token_metadata->sorted_tokens().size(); } -template -bool generic_token_metadata::tokens_iterator::operator==(const tokens_iterator& it) const { +bool token_metadata::tokens_iterator::operator==(const tokens_iterator& it) const { return _remaining == it._remaining; } -template -const token& generic_token_metadata::tokens_iterator::operator*() const { +const token& token_metadata::tokens_iterator::operator*() const { return *_cur_it; } -template -typename generic_token_metadata::tokens_iterator& generic_token_metadata::tokens_iterator::operator++() { +token_metadata::tokens_iterator& token_metadata::tokens_iterator::operator++() { ++_cur_it; if (_cur_it == _token_metadata->sorted_tokens().end()) { _cur_it = _token_metadata->sorted_tokens().begin(); @@ -336,22 +324,19 @@ typename generic_token_metadata::tokens_iterator& generic_token_metadata return *this; } -template -host_id generic_token_metadata::get_my_id() const { +host_id token_metadata::get_my_id() const { return get_topology().get_config().this_host_id; } -template inline -boost::iterator_range::tokens_iterator> -token_metadata_impl::ring_range(const token& start) const { - auto begin = typename generic_token_metadata::tokens_iterator(start, this); - auto end = typename generic_token_metadata::tokens_iterator(); +boost::iterator_range +token_metadata_impl::ring_range(const token& start) const { + auto begin = token_metadata::tokens_iterator(start, this); + auto end = token_metadata::tokens_iterator(); return boost::make_iterator_range(begin, end); } -template -future>> token_metadata_impl::clone_async() const noexcept { +future> token_metadata_impl::clone_async() const noexcept { auto ret = co_await clone_only_token_map(); ret->_bootstrap_tokens.reserve(_bootstrap_tokens.size()); for (const auto& p : _bootstrap_tokens) { @@ -365,8 +350,7 @@ future>> token_metadata_impl co_return ret; } -template -future>> token_metadata_impl::clone_only_token_map(bool clone_sorted_tokens) const noexcept { +future> token_metadata_impl::clone_only_token_map(bool clone_sorted_tokens) const noexcept { auto ret = std::make_unique(shallow_copy{}, *this); ret->_token_to_endpoint_map.reserve(_token_to_endpoint_map.size()); for (const auto& p : _token_to_endpoint_map) { @@ -384,8 +368,7 @@ future>> token_metadata_impl co_return ret; } -template -future<> token_metadata_impl::clear_gently() noexcept { +future<> token_metadata_impl::clear_gently() noexcept { co_await utils::clear_gently(_token_to_endpoint_map); co_await utils::clear_gently(_normal_token_owners); co_await utils::clear_gently(_bootstrap_tokens); @@ -397,8 +380,7 @@ future<> token_metadata_impl::clear_gently() noexcept { co_return; } -template -void token_metadata_impl::sort_tokens() { +void token_metadata_impl::sort_tokens() { std::vector sorted; sorted.reserve(_token_to_endpoint_map.size()); @@ -411,28 +393,23 @@ void token_metadata_impl::sort_tokens() { _sorted_tokens = std::move(sorted); } -template -const tablet_metadata& generic_token_metadata::tablets() const { +const tablet_metadata& token_metadata::tablets() const { return _impl->tablets(); } -template -tablet_metadata& generic_token_metadata::tablets() { +tablet_metadata& token_metadata::tablets() { return _impl->tablets(); } -template -void generic_token_metadata::set_tablets(tablet_metadata tm) { +void token_metadata::set_tablets(tablet_metadata tm) { _impl->set_tablets(std::move(tm)); } -template -const std::vector& token_metadata_impl::sorted_tokens() const { +const std::vector& token_metadata_impl::sorted_tokens() const { return _sorted_tokens; } -template -std::vector token_metadata_impl::get_tokens(const NodeId& addr) const { +std::vector token_metadata_impl::get_tokens(const host_id& addr) const { std::vector res; for (auto&& i : _token_to_endpoint_map) { if (i.second == addr) { @@ -443,8 +420,7 @@ std::vector token_metadata_impl::get_tokens(const NodeId& addr) c return res; } -template -future<> token_metadata_impl::update_normal_tokens(std::unordered_set tokens, NodeId endpoint) { +future<> token_metadata_impl::update_normal_tokens(std::unordered_set tokens, host_id endpoint) { if (tokens.empty()) { co_return; } @@ -483,7 +459,7 @@ future<> token_metadata_impl::update_normal_tokens(std::unordered_set(t, endpoint)); + auto prev = _token_to_endpoint_map.insert(std::pair(t, endpoint)); should_sort_tokens |= prev.second; // new token inserted -> sort if (prev.first->second != endpoint) { tlogger.debug("Token {} changing ownership from {} to {}", t, prev.first->second, endpoint); @@ -501,8 +477,7 @@ future<> token_metadata_impl::update_normal_tokens(std::unordered_set -size_t token_metadata_impl::first_token_index(const token& start) const { +size_t token_metadata_impl::first_token_index(const token& start) const { if (_sorted_tokens.empty()) { auto msg = format("sorted_tokens is empty in first_token_index!"); tlogger.error("{}", msg); @@ -516,13 +491,11 @@ size_t token_metadata_impl::first_token_index(const token& start) const } } -template -const token& token_metadata_impl::first_token(const token& start) const { +const token& token_metadata_impl::first_token(const token& start) const { return _sorted_tokens[first_token_index(start)]; } -template -std::optional token_metadata_impl::get_endpoint(const token& token) const { +std::optional token_metadata_impl::get_endpoint(const token& token) const { auto it = _token_to_endpoint_map.find(token); if (it == _token_to_endpoint_map.end()) { return std::nullopt; @@ -531,8 +504,7 @@ std::optional token_metadata_impl::get_endpoint(const token& tok } } -template -void token_metadata_impl::debug_show() const { +void token_metadata_impl::debug_show() const { auto reporter = std::make_shared>(); reporter->set_callback ([reporter, this] { fmt::print("Endpoint -> Token\n"); @@ -547,13 +519,11 @@ void token_metadata_impl::debug_show() const { reporter->arm_periodic(std::chrono::seconds(1)); } -template -void token_metadata_impl::update_host_id(const host_id& host_id, inet_address endpoint) { +void token_metadata_impl::update_host_id(const host_id& host_id, inet_address endpoint) { _topology.add_or_update_endpoint(endpoint, host_id); } -template -host_id token_metadata_impl::get_host_id(inet_address endpoint) const { +host_id token_metadata_impl::get_host_id(inet_address endpoint) const { if (const auto* node = _topology.find_node(endpoint)) [[likely]] { return node->host_id(); } else { @@ -561,8 +531,7 @@ host_id token_metadata_impl::get_host_id(inet_address endpoint) const { } } -template -std::optional token_metadata_impl::get_host_id_if_known(inet_address endpoint) const { +std::optional token_metadata_impl::get_host_id_if_known(inet_address endpoint) const { if (const auto* node = _topology.find_node(endpoint)) [[likely]] { return node->host_id(); } else { @@ -570,8 +539,7 @@ std::optional token_metadata_impl::get_host_id_if_known(inet_ad } } -template -std::optional token_metadata_impl::get_endpoint_for_host_id_if_known(host_id host_id) const { +std::optional token_metadata_impl::get_endpoint_for_host_id_if_known(host_id host_id) const { if (const auto* node = _topology.find_node(host_id)) [[likely]] { return node->endpoint(); } else { @@ -579,8 +547,7 @@ std::optional token_metadata_impl::get_endpoint_for_host_i } } -template -inet_address token_metadata_impl::get_endpoint_for_host_id(host_id host_id) const { +inet_address token_metadata_impl::get_endpoint_for_host_id(host_id host_id) const { if (const auto* node = _topology.find_node(host_id)) [[likely]] { return node->endpoint(); } else { @@ -588,8 +555,7 @@ inet_address token_metadata_impl::get_endpoint_for_host_id(host_id host_ } } -template -std::unordered_map token_metadata_impl::get_endpoint_to_host_id_map_for_reading() const { +std::unordered_map token_metadata_impl::get_endpoint_to_host_id_map_for_reading() const { const auto& nodes = _topology.get_nodes_by_endpoint(); std::unordered_map map; map.reserve(nodes.size()); @@ -607,25 +573,21 @@ std::unordered_map token_metadata_impl::get_endpo return map; } -template -bool token_metadata_impl::is_normal_token_owner(NodeId endpoint) const { +bool token_metadata_impl::is_normal_token_owner(host_id endpoint) const { return _normal_token_owners.contains(endpoint); } -template -void token_metadata_impl::add_bootstrap_token(token t, NodeId endpoint) { +void token_metadata_impl::add_bootstrap_token(token t, host_id endpoint) { std::unordered_set tokens{t}; add_bootstrap_tokens(tokens, endpoint); } -template -boost::iterator_range::tokens_iterator> -token_metadata_impl::ring_range(const dht::ring_position_view start) const { +boost::iterator_range +token_metadata_impl::ring_range(const dht::ring_position_view start) const { return ring_range(start.token()); } -template -void token_metadata_impl::add_bootstrap_tokens(std::unordered_set tokens, NodeId endpoint) { +void token_metadata_impl::add_bootstrap_tokens(std::unordered_set tokens, host_id endpoint) { for (auto t : tokens) { auto old_endpoint = _bootstrap_tokens.find(t); if (old_endpoint != _bootstrap_tokens.end() && (*old_endpoint).second != endpoint) { @@ -640,15 +602,14 @@ void token_metadata_impl::add_bootstrap_tokens(std::unordered_set } } - std::erase_if(_bootstrap_tokens, [endpoint] (const std::pair& n) { return n.second == endpoint; }); + std::erase_if(_bootstrap_tokens, [endpoint] (const std::pair& n) { return n.second == endpoint; }); for (auto t : tokens) { _bootstrap_tokens[t] = endpoint; } } -template -void token_metadata_impl::remove_bootstrap_tokens(std::unordered_set tokens) { +void token_metadata_impl::remove_bootstrap_tokens(std::unordered_set tokens) { if (tokens.empty()) { tlogger.warn("tokens is empty in remove_bootstrap_tokens!"); return; @@ -658,23 +619,19 @@ void token_metadata_impl::remove_bootstrap_tokens(std::unordered_set -bool token_metadata_impl::is_leaving(NodeId endpoint) const { +bool token_metadata_impl::is_leaving(host_id endpoint) const { return _leaving_endpoints.contains(endpoint); } -template -bool token_metadata_impl::is_being_replaced(NodeId endpoint) const { +bool token_metadata_impl::is_being_replaced(host_id endpoint) const { return _replacing_endpoints.contains(endpoint); } -template -bool token_metadata_impl::is_any_node_being_replaced() const { +bool token_metadata_impl::is_any_node_being_replaced() const { return !_replacing_endpoints.empty(); } -template -void token_metadata_impl::remove_endpoint(NodeId endpoint) { +void token_metadata_impl::remove_endpoint(host_id endpoint) { remove_by_value(_bootstrap_tokens, endpoint); remove_by_value(_token_to_endpoint_map, endpoint); _normal_token_owners.erase(endpoint); @@ -684,8 +641,7 @@ void token_metadata_impl::remove_endpoint(NodeId endpoint) { invalidate_cached_rings(); } -template -token token_metadata_impl::get_predecessor(token t) const { +token token_metadata_impl::get_predecessor(token t) const { auto& tokens = sorted_tokens(); auto it = std::lower_bound(tokens.begin(), tokens.end(), t); if (it == tokens.end() || *it != t) { @@ -701,8 +657,7 @@ token token_metadata_impl::get_predecessor(token t) const { } } -template -dht::token_range_vector token_metadata_impl::get_primary_ranges_for(std::unordered_set tokens) const { +dht::token_range_vector token_metadata_impl::get_primary_ranges_for(std::unordered_set tokens) const { dht::token_range_vector ranges; ranges.reserve(tokens.size() + 1); // one of the ranges will wrap for (auto right : tokens) { @@ -715,14 +670,12 @@ dht::token_range_vector token_metadata_impl::get_primary_ranges_for(std: return ranges; } -template -dht::token_range_vector token_metadata_impl::get_primary_ranges_for(token right) const { +dht::token_range_vector token_metadata_impl::get_primary_ranges_for(token right) const { return get_primary_ranges_for(std::unordered_set{right}); } -template boost::icl::interval::interval_type -token_metadata_impl::range_to_interval(range r) { +token_metadata_impl::range_to_interval(range r) { bool start_inclusive = false; bool end_inclusive = false; token start = dht::minimum_token(); @@ -749,9 +702,8 @@ token_metadata_impl::range_to_interval(range r) { } } -template range -token_metadata_impl::interval_to_range(boost::icl::interval::interval_type i) { +token_metadata_impl::interval_to_range(boost::icl::interval::interval_type i) { bool start_inclusive; bool end_inclusive; auto bounds = i.bounds().bits(); @@ -773,8 +725,7 @@ token_metadata_impl::interval_to_range(boost::icl::interval::inte return range({{i.lower(), start_inclusive}}, {{i.upper(), end_inclusive}}); } -template -future<> token_metadata_impl::update_topology_change_info(dc_rack_fn& get_dc_rack) { +future<> token_metadata_impl::update_topology_change_info(dc_rack_fn& get_dc_rack) { if (_bootstrap_tokens.empty() && _leaving_endpoints.empty() && _replacing_endpoints.empty()) { co_await utils::clear_gently(_topology_change_info); _topology_change_info.reset(); @@ -787,7 +738,7 @@ future<> token_metadata_impl::update_topology_change_info(dc_rack_fn> new_normal_tokens; + std::unordered_map> new_normal_tokens; if (!_replacing_endpoints.empty()) { for (const auto& [token, inet_address]: _token_to_endpoint_map) { const auto it = _replacing_endpoints.find(inet_address); @@ -847,21 +798,19 @@ future<> token_metadata_impl::update_topology_change_info(dc_rack_fn>(std::move(target_token_metadata)), - base_token_metadata ? make_lw_shared>(std::move(base_token_metadata)): nullptr, + _topology_change_info.emplace(make_lw_shared(std::move(target_token_metadata)), + base_token_metadata ? make_lw_shared(std::move(base_token_metadata)): nullptr, std::move(all_tokens), _read_new); co_await utils::clear_gently(prev_value); } -template -size_t token_metadata_impl::count_normal_token_owners() const { +size_t token_metadata_impl::count_normal_token_owners() const { return _normal_token_owners.size(); } -template -future<> token_metadata_impl::update_normal_token_owners() { - std::unordered_set eps; +future<> token_metadata_impl::update_normal_token_owners() { + std::unordered_set eps; for (auto [t, ep]: _token_to_endpoint_map) { eps.insert(ep); co_await coroutine::maybe_yield(); @@ -869,25 +818,21 @@ future<> token_metadata_impl::update_normal_token_owners() { _normal_token_owners = std::move(eps); } -template -void token_metadata_impl::add_leaving_endpoint(NodeId endpoint) { +void token_metadata_impl::add_leaving_endpoint(host_id endpoint) { _leaving_endpoints.emplace(endpoint); } -template -void token_metadata_impl::del_leaving_endpoint(NodeId endpoint) { +void token_metadata_impl::del_leaving_endpoint(host_id endpoint) { _leaving_endpoints.erase(endpoint); } -template -void token_metadata_impl::add_replacing_endpoint(NodeId existing_node, NodeId replacing_node) { +void token_metadata_impl::add_replacing_endpoint(host_id existing_node, host_id replacing_node) { tlogger.info("Added node {} as pending replacing endpoint which replaces existing node {}", replacing_node, existing_node); _replacing_endpoints[existing_node] = replacing_node; } -template -void token_metadata_impl::del_replacing_endpoint(NodeId existing_node) { +void token_metadata_impl::del_replacing_endpoint(host_id existing_node) { if (_replacing_endpoints.contains(existing_node)) { tlogger.info("Removed node {} as pending replacing endpoint which replaces existing node {}", _replacing_endpoints[existing_node], existing_node); @@ -895,9 +840,8 @@ void token_metadata_impl::del_replacing_endpoint(NodeId existing_node) { _replacing_endpoints.erase(existing_node); } -template -topology_change_info::topology_change_info(lw_shared_ptr> target_token_metadata_, - lw_shared_ptr> base_token_metadata_, +topology_change_info::topology_change_info(lw_shared_ptr target_token_metadata_, + lw_shared_ptr base_token_metadata_, std::vector all_tokens_, token_metadata::read_new_t read_new_) : target_token_metadata(std::move(target_token_metadata_)) @@ -907,136 +851,96 @@ topology_change_info::topology_change_info(lw_shared_ptr -future<> topology_change_info::clear_gently() { +future<> topology_change_info::clear_gently() { co_await utils::clear_gently(target_token_metadata); co_await utils::clear_gently(base_token_metadata); co_await utils::clear_gently(all_tokens); } -template -generic_token_metadata::generic_token_metadata(std::unique_ptr> impl) +token_metadata::token_metadata(std::unique_ptr impl) : _impl(std::move(impl)) { } -template -template -requires std::is_same_v -generic_token_metadata::generic_token_metadata(std::unique_ptr> impl, - token_metadata2 new_value) - : _impl(std::move(impl)) - , _new_value(make_token_metadata2_ptr(std::move(new_value))) +token_metadata::token_metadata(config cfg) + : _impl(std::make_unique(cfg)) { } -template -template -requires std::is_same_v -generic_token_metadata::generic_token_metadata(token_metadata2_ptr new_value) - : _impl(nullptr) - , _new_value(std::move(new_value)) -{ -} +token_metadata::~token_metadata() = default; -template -generic_token_metadata::generic_token_metadata(config cfg) - : _impl(std::make_unique>(cfg)) -{ - if constexpr (std::is_same_v) { - _new_value = make_token_metadata2_ptr(std::move(cfg)); - } -} +token_metadata::token_metadata(token_metadata&&) noexcept = default; -template -generic_token_metadata::~generic_token_metadata() = default; +token_metadata& token_metadata::token_metadata::operator=(token_metadata&&) noexcept = default; -template -generic_token_metadata::generic_token_metadata(generic_token_metadata&&) noexcept = default; - -template -generic_token_metadata& generic_token_metadata::generic_token_metadata::operator=(generic_token_metadata&&) noexcept = default; - -template const std::vector& -generic_token_metadata::sorted_tokens() const { +token_metadata::sorted_tokens() const { return _impl->sorted_tokens(); } -template future<> -generic_token_metadata::update_normal_tokens(std::unordered_set tokens, NodeId endpoint) { +token_metadata::update_normal_tokens(std::unordered_set tokens, host_id endpoint) { return _impl->update_normal_tokens(std::move(tokens), endpoint); } -template const token& -generic_token_metadata::first_token(const token& start) const { +token_metadata::first_token(const token& start) const { return _impl->first_token(start); } -template size_t -generic_token_metadata::first_token_index(const token& start) const { +token_metadata::first_token_index(const token& start) const { return _impl->first_token_index(start); } -template -std::optional -generic_token_metadata::get_endpoint(const token& token) const { +std::optional +token_metadata::get_endpoint(const token& token) const { return _impl->get_endpoint(token); } -template std::vector -generic_token_metadata::get_tokens(const NodeId& addr) const { +token_metadata::get_tokens(const host_id& addr) const { return _impl->get_tokens(addr); } -template -const std::unordered_map& -generic_token_metadata::get_token_to_endpoint() const { +const std::unordered_map& +token_metadata::get_token_to_endpoint() const { return _impl->get_token_to_endpoint(); } -template -const std::unordered_set& -generic_token_metadata::get_leaving_endpoints() const { +const std::unordered_set& +token_metadata::get_leaving_endpoints() const { return _impl->get_leaving_endpoints(); } -template -const std::unordered_map& -generic_token_metadata::get_bootstrap_tokens() const { +const std::unordered_map& +token_metadata::get_bootstrap_tokens() const { return _impl->get_bootstrap_tokens(); } -template void -generic_token_metadata::update_topology(NodeId ep, std::optional opt_dr, std::optional opt_st, std::optional shard_count) { +token_metadata::update_topology(host_id ep, std::optional opt_dr, std::optional opt_st, std::optional shard_count) { _impl->update_topology(ep, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); } -template -boost::iterator_range::tokens_iterator> -generic_token_metadata::ring_range(const token& start) const { +boost::iterator_range +token_metadata::ring_range(const token& start) const { return _impl->ring_range(start); } -template -boost::iterator_range::tokens_iterator> -generic_token_metadata::ring_range(dht::ring_position_view start) const { +boost::iterator_range +token_metadata::ring_range(dht::ring_position_view start) const { return _impl->ring_range(start); } class token_metadata_ring_splitter : public locator::token_range_splitter { - token_metadata2_ptr _tmptr; - boost::iterator_range _range; + token_metadata_ptr _tmptr; + boost::iterator_range _range; public: - token_metadata_ring_splitter(token_metadata2_ptr tmptr) + token_metadata_ring_splitter(token_metadata_ptr tmptr) : _tmptr(std::move(tmptr)) , _range(_tmptr->sorted_tokens().empty() // ring_range() throws if the ring is empty - ? boost::make_iterator_range(token_metadata2::tokens_iterator(), token_metadata2::tokens_iterator()) + ? boost::make_iterator_range(token_metadata::tokens_iterator(), token_metadata::tokens_iterator()) : _tmptr->ring_range(dht::minimum_token())) { } @@ -1054,239 +958,179 @@ public: } }; -std::unique_ptr make_splitter(token_metadata2_ptr tmptr) { +std::unique_ptr make_splitter(token_metadata_ptr tmptr) { return std::make_unique(std::move(tmptr)); } -template topology& -generic_token_metadata::get_topology() { +token_metadata::get_topology() { return _impl->get_topology(); } -template const topology& -generic_token_metadata::get_topology() const { +token_metadata::get_topology() const { return _impl->get_topology(); } -template void -generic_token_metadata::debug_show() const { +token_metadata::debug_show() const { _impl->debug_show(); } -template void -generic_token_metadata::update_host_id(const host_id& host_id, inet_address endpoint) { +token_metadata::update_host_id(const host_id& host_id, inet_address endpoint) { _impl->update_host_id(host_id, endpoint); } -template host_id -generic_token_metadata::get_host_id(inet_address endpoint) const { +token_metadata::get_host_id(inet_address endpoint) const { return _impl->get_host_id(endpoint); } -template std::optional -generic_token_metadata::get_host_id_if_known(inet_address endpoint) const { +token_metadata::get_host_id_if_known(inet_address endpoint) const { return _impl->get_host_id_if_known(endpoint); } -template -std::optional::inet_address> -generic_token_metadata::get_endpoint_for_host_id_if_known(host_id host_id) const { +std::optional +token_metadata::get_endpoint_for_host_id_if_known(host_id host_id) const { return _impl->get_endpoint_for_host_id_if_known(host_id); } -template -typename generic_token_metadata::inet_address -generic_token_metadata::get_endpoint_for_host_id(host_id host_id) const { +token_metadata::inet_address +token_metadata::get_endpoint_for_host_id(host_id host_id) const { return _impl->get_endpoint_for_host_id(host_id); } -template -host_id_or_endpoint generic_token_metadata::parse_host_id_and_endpoint(const sstring& host_id_string) const { +host_id_or_endpoint token_metadata::parse_host_id_and_endpoint(const sstring& host_id_string) const { auto res = host_id_or_endpoint(host_id_string); res.resolve(*this); return res; } -template std::unordered_map -generic_token_metadata::get_endpoint_to_host_id_map_for_reading() const { +token_metadata::get_endpoint_to_host_id_map_for_reading() const { return _impl->get_endpoint_to_host_id_map_for_reading(); } -template void -generic_token_metadata::add_bootstrap_token(token t, NodeId endpoint) { +token_metadata::add_bootstrap_token(token t, host_id endpoint) { _impl->add_bootstrap_token(t, endpoint); } -template void -generic_token_metadata::add_bootstrap_tokens(std::unordered_set tokens, NodeId endpoint) { +token_metadata::add_bootstrap_tokens(std::unordered_set tokens, host_id endpoint) { _impl->add_bootstrap_tokens(std::move(tokens), endpoint); } -template void -generic_token_metadata::remove_bootstrap_tokens(std::unordered_set tokens) { +token_metadata::remove_bootstrap_tokens(std::unordered_set tokens) { _impl->remove_bootstrap_tokens(std::move(tokens)); } -template void -generic_token_metadata::add_leaving_endpoint(NodeId endpoint) { +token_metadata::add_leaving_endpoint(host_id endpoint) { _impl->add_leaving_endpoint(endpoint); } -template void -generic_token_metadata::del_leaving_endpoint(NodeId endpoint) { +token_metadata::del_leaving_endpoint(host_id endpoint) { _impl->del_leaving_endpoint(endpoint); } -template void -generic_token_metadata::remove_endpoint(NodeId endpoint) { +token_metadata::remove_endpoint(host_id endpoint) { _impl->remove_endpoint(endpoint); _impl->sort_tokens(); } -template bool -generic_token_metadata::is_normal_token_owner(NodeId endpoint) const { +token_metadata::is_normal_token_owner(host_id endpoint) const { return _impl->is_normal_token_owner(endpoint); } -template bool -generic_token_metadata::is_leaving(NodeId endpoint) const { +token_metadata::is_leaving(host_id endpoint) const { return _impl->is_leaving(endpoint); } -template bool -generic_token_metadata::is_being_replaced(NodeId endpoint) const { +token_metadata::is_being_replaced(host_id endpoint) const { return _impl->is_being_replaced(endpoint); } -template bool -generic_token_metadata::is_any_node_being_replaced() const { +token_metadata::is_any_node_being_replaced() const { return _impl->is_any_node_being_replaced(); } -template -void generic_token_metadata::add_replacing_endpoint(NodeId existing_node, NodeId replacing_node) { +void token_metadata::add_replacing_endpoint(host_id existing_node, host_id replacing_node) { _impl->add_replacing_endpoint(existing_node, replacing_node); } -template -void generic_token_metadata::del_replacing_endpoint(NodeId existing_node) { +void token_metadata::del_replacing_endpoint(host_id existing_node) { _impl->del_replacing_endpoint(existing_node); } -template -future> generic_token_metadata::clone_async() const noexcept { - if constexpr (std::is_same_v) { - co_return !holds_alternative(_new_value) - ? generic_token_metadata(co_await _impl->clone_async(), co_await get_new()->clone_async()) - : generic_token_metadata(co_await _impl->clone_async()); - } else { - co_return generic_token_metadata(co_await _impl->clone_async()); - } +future token_metadata::clone_async() const noexcept { + co_return token_metadata(co_await _impl->clone_async()); } -template -future> -generic_token_metadata::clone_only_token_map() const noexcept { - if constexpr (std::is_same_v) { - co_return !holds_alternative(_new_value) - ? generic_token_metadata(co_await _impl->clone_only_token_map(), co_await get_new()->clone_only_token_map()) - : generic_token_metadata(co_await _impl->clone_only_token_map()); - } else { - co_return generic_token_metadata(co_await _impl->clone_only_token_map()); - } +future +token_metadata::clone_only_token_map() const noexcept { + co_return token_metadata(co_await _impl->clone_only_token_map()); } -template -future> -generic_token_metadata::clone_after_all_left() const noexcept { - if constexpr (std::is_same_v) { - co_return !holds_alternative(_new_value) - ? generic_token_metadata(co_await _impl->clone_after_all_left(), co_await get_new()->clone_after_all_left()) - : generic_token_metadata(co_await _impl->clone_after_all_left()); - } else { - co_return generic_token_metadata(co_await _impl->clone_after_all_left()); - } +future +token_metadata::clone_after_all_left() const noexcept { + co_return token_metadata(co_await _impl->clone_after_all_left()); } -template -future<> generic_token_metadata::clear_gently() noexcept { - co_await _impl->clear_gently(); - if constexpr (std::is_same_v) { - if (holds_alternative>(_new_value)) { - co_await get_new()->clear_gently(); - } - } +future<> token_metadata::clear_gently() noexcept { + return _impl->clear_gently(); } -template dht::token_range_vector -generic_token_metadata::get_primary_ranges_for(std::unordered_set tokens) const { +token_metadata::get_primary_ranges_for(std::unordered_set tokens) const { return _impl->get_primary_ranges_for(std::move(tokens)); } -template dht::token_range_vector -generic_token_metadata::get_primary_ranges_for(token right) const { +token_metadata::get_primary_ranges_for(token right) const { return _impl->get_primary_ranges_for(right); } -template boost::icl::interval::interval_type -generic_token_metadata::range_to_interval(range r) { - return token_metadata_impl::range_to_interval(std::move(r)); +token_metadata::range_to_interval(range r) { + return token_metadata_impl::range_to_interval(std::move(r)); } -template range -generic_token_metadata::interval_to_range(boost::icl::interval::interval_type i) { - return token_metadata_impl::interval_to_range(std::move(i)); +token_metadata::interval_to_range(boost::icl::interval::interval_type i) { + return token_metadata_impl::interval_to_range(std::move(i)); } -template future<> -generic_token_metadata::update_topology_change_info(dc_rack_fn& get_dc_rack) { +token_metadata::update_topology_change_info(dc_rack_fn& get_dc_rack) { return _impl->update_topology_change_info(get_dc_rack); } -template -const std::optional>& -generic_token_metadata::get_topology_change_info() const { +const std::optional& +token_metadata::get_topology_change_info() const { return _impl->get_topology_change_info(); } -template token -generic_token_metadata::get_predecessor(token t) const { +token_metadata::get_predecessor(token t) const { return _impl->get_predecessor(t); } -template -const std::unordered_set& -generic_token_metadata::get_all_endpoints() const { +const std::unordered_set& +token_metadata::get_all_endpoints() const { return _impl->get_all_endpoints(); } -template -template -requires std::is_same_v -std::unordered_set generic_token_metadata::get_all_ips() const { +std::unordered_set token_metadata::get_all_ips() const { const auto& host_ids = _impl->get_all_endpoints(); std::unordered_set result; result.reserve(host_ids.size()); @@ -1296,47 +1140,40 @@ std::unordered_set generic_token_metadata::get_all_ip return result; } -template size_t -generic_token_metadata::count_normal_token_owners() const { +token_metadata::count_normal_token_owners() const { return _impl->count_normal_token_owners(); } -template void -generic_token_metadata::set_read_new(read_new_t read_new) { +token_metadata::set_read_new(read_new_t read_new) { _impl->set_read_new(read_new); } -template long -generic_token_metadata::get_ring_version() const { +token_metadata::get_ring_version() const { return _impl->get_ring_version(); } -template void -generic_token_metadata::invalidate_cached_rings() { +token_metadata::invalidate_cached_rings() { _impl->invalidate_cached_rings(); } -template auto -generic_token_metadata::get_version() const -> version_t { +token_metadata::get_version() const -> version_t { return _impl->get_version(); } -template void -generic_token_metadata::set_version(version_t version) { +token_metadata::set_version(version_t version) { _impl->set_version(version); } -template void -generic_token_metadata::set_version_tracker(version_tracker_t tracker) { +token_metadata::set_version_tracker(version_tracker_t tracker) { _impl->set_version_tracker(std::move(tracker)); } -void shared_token_metadata::set(mutable_token_metadata2_ptr tmptr) noexcept { +void shared_token_metadata::set(mutable_token_metadata_ptr tmptr) noexcept { if (_shared->get_ring_version() >= tmptr->get_ring_version()) { on_internal_error(tlogger, format("shared_token_metadata: must not set non-increasing ring_version: {} -> {}", _shared->get_ring_version(), tmptr->get_ring_version())); } @@ -1354,7 +1191,7 @@ void shared_token_metadata::set(mutable_token_metadata2_ptr tmptr) noexcept { void shared_token_metadata::update_fence_version(token_metadata::version_t version) { if (const auto current_version = _shared->get_version(); version > current_version) { - // The generic_token_metadata::version under no circumstance can go backwards. + // The token_metadata::version under no circumstance can go backwards. // Even in case of topology change coordinator moving to another node // this condition must hold, that is why we treat its violation // as an internal error. @@ -1375,7 +1212,7 @@ void shared_token_metadata::update_fence_version(token_metadata::version_t versi tlogger.debug("new fence_version is set, version {}", _fence_version); } -future<> shared_token_metadata::mutate_token_metadata(seastar::noncopyable_function (token_metadata2&)> func) { +future<> shared_token_metadata::mutate_token_metadata(seastar::noncopyable_function (token_metadata&)> func) { auto lk = co_await get_lock(); auto tm = co_await _shared->clone_async(); // bump the token_metadata ring_version @@ -1383,17 +1220,17 @@ future<> shared_token_metadata::mutate_token_metadata(seastar::noncopyable_funct // when the modified token_metadata is committed. tm.invalidate_cached_rings(); co_await func(tm); - set(make_token_metadata2_ptr(std::move(tm))); + set(make_token_metadata_ptr(std::move(tm))); } -future<> shared_token_metadata::mutate_on_all_shards(sharded& stm, seastar::noncopyable_function (token_metadata2&)> func) { +future<> shared_token_metadata::mutate_on_all_shards(sharded& stm, seastar::noncopyable_function (token_metadata&)> func) { auto base_shard = this_shard_id(); assert(base_shard == 0); auto lk = co_await stm.local().get_lock(); - std::vector pending_token_metadata_ptr; + std::vector pending_token_metadata_ptr; pending_token_metadata_ptr.resize(smp::count); - auto tmptr = make_token_metadata2_ptr(co_await stm.local().get()->clone_async()); + auto tmptr = make_token_metadata_ptr(co_await stm.local().get()->clone_async()); auto& tm = *tmptr; // bump the token_metadata ring_version // to invalidate cached token/replication mappings @@ -1404,7 +1241,7 @@ future<> shared_token_metadata::mutate_on_all_shards(sharded future<> { - pending_token_metadata_ptr[this_shard_id()] = make_token_metadata2_ptr(co_await tm.clone_async()); + pending_token_metadata_ptr[this_shard_id()] = make_token_metadata_ptr(co_await tm.clone_async()); }); co_await stm.invoke_on_all([&] (shared_token_metadata& stm) { @@ -1441,8 +1278,7 @@ host_id_or_endpoint::host_id_or_endpoint(const sstring& s, param_type restrict) } } -template -void host_id_or_endpoint::resolve(const generic_token_metadata& tm) { +void host_id_or_endpoint::resolve(const token_metadata& tm) { if (id) { auto endpoint_opt = tm.get_endpoint_for_host_id_if_known(id); if (!endpoint_opt) { @@ -1458,15 +1294,4 @@ void host_id_or_endpoint::resolve(const generic_token_metadata& tm) { } } -template class generic_token_metadata; -template class generic_token_metadata; -template void host_id_or_endpoint::resolve(const token_metadata& tm); -template void host_id_or_endpoint::resolve(const token_metadata2& tm); -template token_metadata2* generic_token_metadata::get_new<>(); -template const token_metadata2* generic_token_metadata::get_new<>() const; -template lw_shared_ptr generic_token_metadata::get_new_strong<>() const; -template generic_token_metadata::generic_token_metadata(std::unique_ptr>, token_metadata2); -template generic_token_metadata::generic_token_metadata(token_metadata2_ptr); -template std::unordered_set generic_token_metadata::get_all_ips<>() const; - } // namespace locator diff --git a/locator/token_metadata.hh b/locator/token_metadata.hh index 45752a6f21..e77df66a2d 100644 --- a/locator/token_metadata.hh +++ b/locator/token_metadata.hh @@ -43,10 +43,7 @@ class abstract_replication_strategy; using token = dht::token; -template -class generic_token_metadata; -using token_metadata = generic_token_metadata; -using token_metadata2 = generic_token_metadata; +class token_metadata; class tablet_metadata; struct host_id_or_endpoint { @@ -71,29 +68,14 @@ struct host_id_or_endpoint { // Map the host_id to endpoint based on whichever of them is set, // using the token_metadata - template - void resolve(const generic_token_metadata& tm); + void resolve(const token_metadata& tm); }; -template class token_metadata_impl; -template struct topology_change_info; -class generic_token_metadata_base { -public: - struct config { - topology::config topo_cfg; - }; - using inet_address = gms::inet_address; - using version_t = service::topology::version_t; - using version_tracker_t = utils::phased_barrier::operation; -}; - -template -class generic_token_metadata final: public generic_token_metadata_base { - std::unique_ptr> _impl; - std::variant, lw_shared_ptr> _new_value; +class token_metadata final { + std::unique_ptr _impl; private: friend class token_metadata_ring_splitter; class tokens_iterator { @@ -105,30 +87,31 @@ private: using reference = token&; public: tokens_iterator() = default; - tokens_iterator(const token& start, const token_metadata_impl* token_metadata); + tokens_iterator(const token& start, const token_metadata_impl* token_metadata); bool operator==(const tokens_iterator& it) const; const token& operator*() const; tokens_iterator& operator++(); private: std::vector::const_iterator _cur_it; size_t _remaining = 0; - const token_metadata_impl* _token_metadata = nullptr; + const token_metadata_impl* _token_metadata = nullptr; - friend class token_metadata_impl; + friend class token_metadata_impl; }; public: - generic_token_metadata(config cfg); - explicit generic_token_metadata(std::unique_ptr> impl); - template - requires std::is_same_v - generic_token_metadata(std::unique_ptr> impl, token_metadata2 new_value); - template - requires std::is_same_v - generic_token_metadata(lw_shared_ptr new_value); - generic_token_metadata(generic_token_metadata&&) noexcept; // Can't use "= default;" - hits some static_assert in unique_ptr - generic_token_metadata& operator=(generic_token_metadata&&) noexcept; - ~generic_token_metadata(); + struct config { + topology::config topo_cfg; + }; + using inet_address = gms::inet_address; + using version_t = service::topology::version_t; + using version_tracker_t = utils::phased_barrier::operation; + + token_metadata(config cfg); + explicit token_metadata(std::unique_ptr impl); + token_metadata(token_metadata&&) noexcept; // Can't use "= default;" - hits some static_assert in unique_ptr + token_metadata& operator=(token_metadata&&) noexcept; + ~token_metadata(); const std::vector& sorted_tokens() const; const tablet_metadata& tablets() const; tablet_metadata& tablets(); @@ -138,52 +121,21 @@ public: // // Note: the function is not exception safe! // It must be called only on a temporary copy of the token_metadata - future<> update_normal_tokens(std::unordered_set tokens, NodeId endpoint); + future<> update_normal_tokens(std::unordered_set tokens, host_id endpoint); const token& first_token(const token& start) const; size_t first_token_index(const token& start) const; - std::optional get_endpoint(const token& token) const; - std::vector get_tokens(const NodeId& addr) const; - const std::unordered_map& get_token_to_endpoint() const; - const std::unordered_set& get_leaving_endpoints() const; - const std::unordered_map& get_bootstrap_tokens() const; - - template - requires std::is_same_v - token_metadata2* get_new() { - if (holds_alternative>(_new_value)) { - return get>(_new_value).get(); - } - throw_with_backtrace("no mutable new value"); - } - - template - requires std::is_same_v - const token_metadata2* get_new() const { - if (holds_alternative>(_new_value)) { - return get>(_new_value).get(); - } - if (holds_alternative>(_new_value)) { - return get>(_new_value).get(); - } - throw_with_backtrace("no new value"); - } - - template - requires std::is_same_v - lw_shared_ptr get_new_strong() const { - if (holds_alternative>(_new_value)) { - return get>(_new_value); - } - if (holds_alternative>(_new_value)) { - return get>(_new_value); - } - throw_with_backtrace("no new value"); - } + std::optional get_endpoint(const token& token) const; + std::vector get_tokens(const host_id& addr) const; + const std::unordered_map& get_token_to_endpoint() const; + const std::unordered_set& get_leaving_endpoints() const; + const std::unordered_map& get_bootstrap_tokens() const; /** - * Update or add endpoint given its inet_address and endpoint_dc_rack. + * Update or add a node for a given host_id. + * The other arguments (dc, state, shard_count) are optional, i.e. the corresponding node + * fields won't be updated if std::nullopt is passed. */ - void update_topology(NodeId ep, std::optional opt_dr, std::optional opt_st = std::nullopt, + void update_topology(host_id ep, std::optional opt_dr, std::optional opt_st = std::nullopt, std::optional shard_count = std::nullopt); /** * Creates an iterable range of the sorted tokens starting at the token t @@ -235,39 +187,39 @@ public: /// Returns host_id of the local node. host_id get_my_id() const; - void add_bootstrap_token(token t, NodeId endpoint); + void add_bootstrap_token(token t, host_id endpoint); - void add_bootstrap_tokens(std::unordered_set tokens, NodeId endpoint); + void add_bootstrap_tokens(std::unordered_set tokens, host_id endpoint); void remove_bootstrap_tokens(std::unordered_set tokens); - void add_leaving_endpoint(NodeId endpoint); - void del_leaving_endpoint(NodeId endpoint); + void add_leaving_endpoint(host_id endpoint); + void del_leaving_endpoint(host_id endpoint); - void remove_endpoint(NodeId endpoint); + void remove_endpoint(host_id endpoint); // Checks if the node is part of the token ring. If yes, the node is one of // the nodes that owns the tokens and inside the set _normal_token_owners. - bool is_normal_token_owner(NodeId endpoint) const; + bool is_normal_token_owner(host_id endpoint) const; - bool is_leaving(NodeId endpoint) const; + bool is_leaving(host_id endpoint) const; // Is this node being replaced by another node - bool is_being_replaced(NodeId endpoint) const; + bool is_being_replaced(host_id endpoint) const; // Is any node being replaced by another node bool is_any_node_being_replaced() const; - void add_replacing_endpoint(NodeId existing_node, NodeId replacing_node); + void add_replacing_endpoint(host_id existing_node, host_id replacing_node); - void del_replacing_endpoint(NodeId existing_node); + void del_replacing_endpoint(host_id existing_node); /** * Create a full copy of token_metadata using asynchronous continuations. * The caller must ensure that the cloned object will not change if * the function yields. */ - future clone_async() const noexcept; + future clone_async() const noexcept; /** * Create a copy of TokenMetadata with only tokenToEndpointMap. That is, pending ranges, @@ -275,7 +227,7 @@ public: * The caller must ensure that the cloned object will not change if * the function yields. */ - future clone_only_token_map() const noexcept; + future clone_only_token_map() const noexcept; /** * Create a copy of TokenMetadata with tokenToEndpointMap reflecting situation after all * current leave operations have finished. @@ -284,7 +236,7 @@ public: * * @return a future holding a new token metadata */ - future clone_after_all_left() const noexcept; + future clone_after_all_left() const noexcept; /** * Gently clear the token_metadata members. @@ -304,16 +256,14 @@ public: static boost::icl::interval::interval_type range_to_interval(range r); static range interval_to_range(boost::icl::interval::interval_type i); - future<> update_topology_change_info(dc_rack_fn& get_dc_rack); + future<> update_topology_change_info(dc_rack_fn& get_dc_rack); - const std::optional>& get_topology_change_info() const; + const std::optional& get_topology_change_info() const; token get_predecessor(token t) const; - const std::unordered_set& get_all_endpoints() const; + const std::unordered_set& get_all_endpoints() const; - template - requires std::is_same_v std::unordered_set get_all_ips() const; /* Returns the number of different endpoints that own tokens in the ring. @@ -334,26 +284,20 @@ public: version_t get_version() const; void set_version(version_t version); - friend class token_metadata_impl; + friend class token_metadata_impl; friend class shared_token_metadata; private: void set_version_tracker(version_tracker_t tracker); }; -extern template class generic_token_metadata; -extern template class generic_token_metadata; -extern template void host_id_or_endpoint::resolve(const token_metadata& tm); -extern template void host_id_or_endpoint::resolve(const token_metadata2& tm); - -template struct topology_change_info { - lw_shared_ptr> target_token_metadata; - lw_shared_ptr> base_token_metadata; + lw_shared_ptr target_token_metadata; + lw_shared_ptr base_token_metadata; std::vector all_tokens; token_metadata::read_new_t read_new; - topology_change_info(lw_shared_ptr> target_token_metadata_, - lw_shared_ptr> base_token_metadata_, + topology_change_info(lw_shared_ptr target_token_metadata_, + lw_shared_ptr base_token_metadata_, std::vector all_tokens_, token_metadata::read_new_t read_new_); future<> clear_gently(); @@ -367,13 +311,8 @@ mutable_token_metadata_ptr make_token_metadata_ptr(Args... args) { return make_lw_shared(std::forward(args)...); } -template -mutable_token_metadata2_ptr make_token_metadata2_ptr(Args... args) { - return make_lw_shared(std::forward(args)...); -} - class shared_token_metadata { - mutable_token_metadata2_ptr _shared; + mutable_token_metadata_ptr _shared; token_metadata_lock_func _lock_func; // We use this barrier during the transition to a new token_metadata version to ensure that the @@ -392,13 +331,13 @@ class shared_token_metadata { // includes its own invocation as an operation in the new phase. utils::phased_barrier _versions_barrier; shared_future<> _stale_versions_in_use{make_ready_future<>()}; - token_metadata2::version_t _fence_version = 0; + token_metadata::version_t _fence_version = 0; public: // used to construct the shared object as a sharded<> instance // lock_func returns semaphore_units<> - explicit shared_token_metadata(token_metadata_lock_func lock_func, token_metadata2::config cfg) - : _shared(make_token_metadata2_ptr(std::move(cfg))) + explicit shared_token_metadata(token_metadata_lock_func lock_func, token_metadata::config cfg) + : _shared(make_token_metadata_ptr(std::move(cfg))) , _lock_func(std::move(lock_func)) { _shared->set_version_tracker(_versions_barrier.start()); @@ -407,18 +346,18 @@ public: shared_token_metadata(const shared_token_metadata& x) = delete; shared_token_metadata(shared_token_metadata&& x) = default; - token_metadata2_ptr get() const noexcept { + token_metadata_ptr get() const noexcept { return _shared; } - void set(mutable_token_metadata2_ptr tmptr) noexcept; + void set(mutable_token_metadata_ptr tmptr) noexcept; future<> stale_versions_in_use() const { return _stale_versions_in_use.get_future(); } - void update_fence_version(token_metadata2::version_t version); - token_metadata2::version_t get_fence_version() const noexcept { + void update_fence_version(token_metadata::version_t version); + token_metadata::version_t get_fence_version() const noexcept { return _fence_version; } @@ -438,7 +377,7 @@ public: // If the functor is successful, the mutated clone // is set back to to the shared_token_metadata, // otherwise, the clone is destroyed. - future<> mutate_token_metadata(seastar::noncopyable_function (token_metadata2&)> func); + future<> mutate_token_metadata(seastar::noncopyable_function (token_metadata&)> func); // mutate_token_metadata_on_all_shards acquires the shared_token_metadata lock, // clones the token_metadata (using clone_async) @@ -450,7 +389,7 @@ public: // otherwise, the clone is destroyed. // // Must be called on shard 0. - static future<> mutate_on_all_shards(sharded& stm, seastar::noncopyable_function (token_metadata2&)> func); + static future<> mutate_on_all_shards(sharded& stm, seastar::noncopyable_function (token_metadata&)> func); }; } diff --git a/locator/token_metadata_fwd.hh b/locator/token_metadata_fwd.hh index 5e67605e3b..07f470bca7 100644 --- a/locator/token_metadata_fwd.hh +++ b/locator/token_metadata_fwd.hh @@ -11,13 +11,9 @@ namespace locator { -template -class generic_token_metadata; -using token_metadata = generic_token_metadata; +class token_metadata; + using token_metadata_ptr = lw_shared_ptr; using mutable_token_metadata_ptr = lw_shared_ptr; -using token_metadata2 = generic_token_metadata; -using token_metadata2_ptr = lw_shared_ptr; -using mutable_token_metadata2_ptr = lw_shared_ptr; } // namespace locator diff --git a/locator/token_range_splitter.hh b/locator/token_range_splitter.hh index 0222f14b0b..ac55d65fd1 100644 --- a/locator/token_range_splitter.hh +++ b/locator/token_range_splitter.hh @@ -40,6 +40,6 @@ public: virtual std::optional next_token() = 0; }; -std::unique_ptr make_splitter(token_metadata2_ptr); +std::unique_ptr make_splitter(token_metadata_ptr); } \ No newline at end of file diff --git a/locator/topology.hh b/locator/topology.hh index 034ac92c44..d9adaf69d7 100644 --- a/locator/topology.hh +++ b/locator/topology.hh @@ -424,7 +424,6 @@ private: return _nodes_by_endpoint; }; - template friend class token_metadata_impl; public: void test_compare_endpoints(const inet_address& address, const inet_address& a1, const inet_address& a2) const; diff --git a/locator/util.cc b/locator/util.cc index 8283cee2ad..1a16fd1313 100644 --- a/locator/util.cc +++ b/locator/util.cc @@ -54,7 +54,7 @@ get_range_to_address_map(locator::effective_replication_map_ptr erm, // Caller is responsible to hold token_metadata valid until the returned future is resolved static future> -get_tokens_in_local_dc(const locator::token_metadata2& tm) { +get_tokens_in_local_dc(const locator::token_metadata& tm) { std::vector filtered_tokens; auto local_dc_filter = tm.get_topology().get_local_dc_filter(); for (auto token : tm.sorted_tokens()) { diff --git a/main.cc b/main.cc index 4ce8097e55..1a01486031 100644 --- a/main.cc +++ b/main.cc @@ -1206,7 +1206,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl const auto listen_address = utils::resolve(cfg->listen_address, family).get0(); const auto host_id = initialize_local_info_thread(sys_ks, snitch, listen_address, *cfg, broadcast_addr, broadcast_rpc_addr); - shared_token_metadata::mutate_on_all_shards(token_metadata, [host_id, endpoint = broadcast_addr] (locator::token_metadata2& tm) { + shared_token_metadata::mutate_on_all_shards(token_metadata, [host_id, endpoint = broadcast_addr] (locator::token_metadata& tm) { // Makes local host id available in topology cfg as soon as possible. // Raft topology discard the endpoint-to-id map, so the local id can // still be found in the config. diff --git a/node_ops/node_ops_ctl.hh b/node_ops/node_ops_ctl.hh index e452b90172..6cba6b8cc0 100644 --- a/node_ops/node_ops_ctl.hh +++ b/node_ops/node_ops_ctl.hh @@ -25,10 +25,7 @@ class storage_service; } namespace locator { -template -class generic_token_metadata; -using token_metadata = generic_token_metadata; -using token_metadata2 = generic_token_metadata; +class token_metadata; } class node_ops_info { @@ -139,7 +136,7 @@ public: sstring desc; locator::host_id host_id; // Host ID of the node operand (i.e. added, replaced, or leaving node) gms::inet_address endpoint; // IP address of the node operand (i.e. added, replaced, or leaving node) - lw_shared_ptr tmptr; + lw_shared_ptr tmptr; std::unordered_set sync_nodes; std::unordered_set ignore_nodes; node_ops_cmd_request req; diff --git a/repair/repair.cc b/repair/repair.cc index f59eb4f90d..f64f0feb5f 100644 --- a/repair/repair.cc +++ b/repair/repair.cc @@ -1492,7 +1492,7 @@ std::optional repair::data_sync_repair_task_impl::expected_children_numb return smp::count; } -future<> repair_service::bootstrap_with_repair(locator::token_metadata2_ptr tmptr, std::unordered_set bootstrap_tokens) { +future<> repair_service::bootstrap_with_repair(locator::token_metadata_ptr tmptr, std::unordered_set bootstrap_tokens) { assert(this_shard_id() == 0); using inet_address = gms::inet_address; return seastar::async([this, tmptr = std::move(tmptr), tokens = std::move(bootstrap_tokens)] () mutable { @@ -1531,14 +1531,14 @@ future<> repair_service::bootstrap_with_repair(locator::token_metadata2_ptr tmpt auto replication_factor = erm->get_replication_factor(); //Active ranges - auto metadata_clone = locator::make_token_metadata2_ptr(tmptr->clone_only_token_map().get0()); - auto range_addresses = strat.get_range_addresses(locator::token_metadata(metadata_clone)).get0(); + auto metadata_clone = tmptr->clone_only_token_map().get0(); + auto range_addresses = strat.get_range_addresses(metadata_clone).get0(); //Pending ranges - metadata_clone->update_topology(myid, myloc, locator::node::state::bootstrapping); - metadata_clone->update_normal_tokens(tokens, myid).get(); - auto pending_range_addresses = strat.get_range_addresses(locator::token_metadata(metadata_clone)).get0(); - metadata_clone->clear_gently().get(); + metadata_clone.update_topology(myid, myloc, locator::node::state::bootstrapping); + metadata_clone.update_normal_tokens(tokens, myid).get(); + auto pending_range_addresses = strat.get_range_addresses(metadata_clone).get0(); + metadata_clone.clear_gently().get(); //Collects the source that will have its range moved to the new node std::unordered_map range_sources; @@ -1669,7 +1669,7 @@ future<> repair_service::bootstrap_with_repair(locator::token_metadata2_ptr tmpt }); } -future<> repair_service::do_decommission_removenode_with_repair(locator::token_metadata2_ptr tmptr, gms::inet_address leaving_node, shared_ptr ops) { +future<> repair_service::do_decommission_removenode_with_repair(locator::token_metadata_ptr tmptr, gms::inet_address leaving_node, shared_ptr ops) { assert(this_shard_id() == 0); using inet_address = gms::inet_address; return seastar::async([this, tmptr = std::move(tmptr), leaving_node = std::move(leaving_node), ops] () mutable { @@ -1720,15 +1720,15 @@ future<> repair_service::do_decommission_removenode_with_repair(locator::token_m // Find (for each range) all nodes that store replicas for these ranges as well for (auto& r : ranges) { auto end_token = r.end() ? r.end()->value() : dht::maximum_token(); - auto eps = strat.calculate_natural_ips(end_token, tmptr).get0(); + auto eps = strat.calculate_natural_ips(end_token, *tmptr).get0(); current_replica_endpoints.emplace(r, std::move(eps)); seastar::thread::maybe_yield(); } - auto temp = locator::make_token_metadata2_ptr(tmptr->clone_after_all_left().get0()); + auto temp = tmptr->clone_after_all_left().get0(); // leaving_node might or might not be 'leaving'. If it was not leaving (that is, removenode // command was used), it is still present in temp and must be removed. - if (temp->is_normal_token_owner(leaving_node_id)) { - temp->remove_endpoint(leaving_node_id); + if (temp.is_normal_token_owner(leaving_node_id)) { + temp.remove_endpoint(leaving_node_id); } std::unordered_map range_sources; dht::token_range_vector ranges_for_removenode; @@ -1843,7 +1843,7 @@ future<> repair_service::do_decommission_removenode_with_repair(locator::token_m } } } - temp->clear_gently().get(); + temp.clear_gently().get(); if (reason == streaming::stream_reason::decommission) { container().invoke_on_all([nr_ranges_skipped] (repair_service& rs) { rs.get_metrics().decommission_finished_ranges += nr_ranges_skipped; @@ -1865,13 +1865,13 @@ future<> repair_service::do_decommission_removenode_with_repair(locator::token_m }); } -future<> repair_service::decommission_with_repair(locator::token_metadata2_ptr tmptr) { +future<> repair_service::decommission_with_repair(locator::token_metadata_ptr tmptr) { assert(this_shard_id() == 0); auto my_address = tmptr->get_topology().my_address(); return do_decommission_removenode_with_repair(std::move(tmptr), my_address, {}); } -future<> repair_service::removenode_with_repair(locator::token_metadata2_ptr tmptr, gms::inet_address leaving_node, shared_ptr ops) { +future<> repair_service::removenode_with_repair(locator::token_metadata_ptr tmptr, gms::inet_address leaving_node, shared_ptr ops) { assert(this_shard_id() == 0); return do_decommission_removenode_with_repair(std::move(tmptr), std::move(leaving_node), std::move(ops)).then([this] { rlogger.debug("Triggering off-strategy compaction for all non-system tables on removenode completion"); @@ -1884,7 +1884,7 @@ future<> repair_service::removenode_with_repair(locator::token_metadata2_ptr tmp }); } -future<> repair_service::do_rebuild_replace_with_repair(locator::token_metadata2_ptr tmptr, sstring op, sstring source_dc, streaming::stream_reason reason, std::unordered_set ignore_nodes) { +future<> repair_service::do_rebuild_replace_with_repair(locator::token_metadata_ptr tmptr, sstring op, sstring source_dc, streaming::stream_reason reason, std::unordered_set ignore_nodes) { assert(this_shard_id() == 0); return seastar::async([this, tmptr = std::move(tmptr), source_dc = std::move(source_dc), op = std::move(op), reason, ignore_nodes = std::move(ignore_nodes)] () mutable { auto& db = get_db().local(); @@ -1898,7 +1898,7 @@ future<> repair_service::do_rebuild_replace_with_repair(locator::token_metadata2 } auto& strat = erm->get_replication_strategy(); // Okay to yield since tm is immutable - dht::token_range_vector ranges = strat.get_ranges(myid, locator::token_metadata(tmptr)).get0(); + dht::token_range_vector ranges = strat.get_ranges(myid, tmptr).get0(); auto nr_tables = get_nr_tables(db, keyspace_name); nr_ranges_total += ranges.size() * nr_tables; @@ -1922,7 +1922,7 @@ future<> repair_service::do_rebuild_replace_with_repair(locator::token_metadata2 continue; } auto& strat = erm->get_replication_strategy(); - dht::token_range_vector ranges = strat.get_ranges(myid, locator::token_metadata(tmptr)).get0(); + dht::token_range_vector ranges = strat.get_ranges(myid, *tmptr).get0(); auto& topology = erm->get_token_metadata().get_topology(); std::unordered_map range_sources; auto nr_tables = get_nr_tables(db, keyspace_name); @@ -1931,7 +1931,7 @@ future<> repair_service::do_rebuild_replace_with_repair(locator::token_metadata2 auto& r = *it; seastar::thread::maybe_yield(); auto end_token = r.end() ? r.end()->value() : dht::maximum_token(); - auto neighbors = boost::copy_range>(strat.calculate_natural_ips(end_token, tmptr).get0() | + auto neighbors = boost::copy_range>(strat.calculate_natural_ips(end_token, *tmptr).get0() | boost::adaptors::filtered([myip, &source_dc, &topology, &ignore_nodes] (const gms::inet_address& node) { if (node == myip) { return false; @@ -1969,7 +1969,7 @@ future<> repair_service::do_rebuild_replace_with_repair(locator::token_metadata2 }); } -future<> repair_service::rebuild_with_repair(locator::token_metadata2_ptr tmptr, sstring source_dc) { +future<> repair_service::rebuild_with_repair(locator::token_metadata_ptr tmptr, sstring source_dc) { assert(this_shard_id() == 0); auto op = sstring("rebuild_with_repair"); if (source_dc.empty()) { @@ -1985,7 +1985,7 @@ future<> repair_service::rebuild_with_repair(locator::token_metadata2_ptr tmptr, }); } -future<> repair_service::replace_with_repair(locator::token_metadata2_ptr tmptr, std::unordered_set replacing_tokens, std::unordered_set ignore_nodes) { +future<> repair_service::replace_with_repair(locator::token_metadata_ptr tmptr, std::unordered_set replacing_tokens, std::unordered_set ignore_nodes) { assert(this_shard_id() == 0); auto cloned_tm = co_await tmptr->clone_async(); auto op = sstring("replace_with_repair"); @@ -1994,7 +1994,7 @@ future<> repair_service::replace_with_repair(locator::token_metadata2_ptr tmptr, auto reason = streaming::stream_reason::replace; // update a cloned version of tmptr // no need to set the original version - auto cloned_tmptr = make_token_metadata2_ptr(std::move(cloned_tm)); + auto cloned_tmptr = make_token_metadata_ptr(std::move(cloned_tm)); cloned_tmptr->update_topology(tmptr->get_my_id(), myloc, locator::node::state::replacing); co_await cloned_tmptr->update_normal_tokens(replacing_tokens, tmptr->get_my_id()); co_return co_await do_rebuild_replace_with_repair(std::move(cloned_tmptr), std::move(op), myloc.dc, reason, std::move(ignore_nodes)); diff --git a/repair/row_level.cc b/repair/row_level.cc index 3b3a621edc..63615e6b40 100644 --- a/repair/row_level.cc +++ b/repair/row_level.cc @@ -669,7 +669,7 @@ void flush_rows(schema_ptr s, std::list& rows, lw_shared_ptr last_dk; bool do_small_table_optimization = erm && small_table_optimization; auto* strat = do_small_table_optimization ? &erm->get_replication_strategy() : nullptr; - auto tm = do_small_table_optimization ? erm->get_token_metadata_ptr() : nullptr; + auto* tm = do_small_table_optimization ? &erm->get_token_metadata() : nullptr; auto myip = do_small_table_optimization ? erm->get_topology().my_address() : gms::inet_address(); for (auto& r : rows) { thread::maybe_yield(); @@ -679,7 +679,7 @@ void flush_rows(schema_ptr s, std::list& rows, lw_shared_ptrdk; if (do_small_table_optimization) { // Check if the token is owned by the node - auto eps = strat->calculate_natural_ips(dk.token(), tm).get0(); + auto eps = strat->calculate_natural_ips(dk.token(), *tm).get0(); if (!eps.contains(myip)) { rlogger.trace("master: ignore row, token={}", dk.token()); continue; @@ -1900,7 +1900,7 @@ public: } if (small_table_optimization) { auto& strat = erm.get_replication_strategy(); - auto& tm = erm.get_token_metadata_ptr(); + const auto& tm = erm.get_token_metadata(); std::list tmp; for (auto& row : row_diff) { repair_row r = std::move(row); diff --git a/repair/row_level.hh b/repair/row_level.hh index d82ac83f9b..e21f4df2d2 100644 --- a/repair/row_level.hh +++ b/repair/row_level.hh @@ -138,14 +138,14 @@ public: // The tokens are the tokens assigned to the bootstrap node. // all repair-based node operation entry points must be called on shard 0 - future<> bootstrap_with_repair(locator::token_metadata2_ptr tmptr, std::unordered_set bootstrap_tokens); - future<> decommission_with_repair(locator::token_metadata2_ptr tmptr); - future<> removenode_with_repair(locator::token_metadata2_ptr tmptr, gms::inet_address leaving_node, shared_ptr ops); - future<> rebuild_with_repair(locator::token_metadata2_ptr tmptr, sstring source_dc); - future<> replace_with_repair(locator::token_metadata2_ptr tmptr, std::unordered_set replacing_tokens, std::unordered_set ignore_nodes); + future<> bootstrap_with_repair(locator::token_metadata_ptr tmptr, std::unordered_set bootstrap_tokens); + future<> decommission_with_repair(locator::token_metadata_ptr tmptr); + future<> removenode_with_repair(locator::token_metadata_ptr tmptr, gms::inet_address leaving_node, shared_ptr ops); + future<> rebuild_with_repair(locator::token_metadata_ptr tmptr, sstring source_dc); + future<> replace_with_repair(locator::token_metadata_ptr tmptr, std::unordered_set replacing_tokens, std::unordered_set ignore_nodes); private: - future<> do_decommission_removenode_with_repair(locator::token_metadata2_ptr tmptr, gms::inet_address leaving_node, shared_ptr ops); - future<> do_rebuild_replace_with_repair(locator::token_metadata2_ptr tmptr, sstring op, sstring source_dc, streaming::stream_reason reason, std::unordered_set ignore_nodes); + future<> do_decommission_removenode_with_repair(locator::token_metadata_ptr tmptr, gms::inet_address leaving_node, shared_ptr ops); + future<> do_rebuild_replace_with_repair(locator::token_metadata_ptr tmptr, sstring op, sstring source_dc, streaming::stream_reason reason, std::unordered_set ignore_nodes); // Must be called on shard 0 future<> sync_data_using_repair(sstring keyspace, diff --git a/replica/database.hh b/replica/database.hh index 0d51032835..bff06cbd54 100644 --- a/replica/database.hh +++ b/replica/database.hh @@ -1561,7 +1561,7 @@ public: } const locator::shared_token_metadata& get_shared_token_metadata() const { return _shared_token_metadata; } - const locator::token_metadata2& get_token_metadata() const { return *_shared_token_metadata.get(); } + const locator::token_metadata& get_token_metadata() const { return *_shared_token_metadata.get(); } wasm::manager& wasm() noexcept { return _wasm; } const wasm::manager& wasm() const noexcept { return _wasm; } diff --git a/service/forward_service.cc b/service/forward_service.cc index 44ed7e7ae7..91513a1e6e 100644 --- a/service/forward_service.cc +++ b/service/forward_service.cc @@ -303,7 +303,7 @@ public: } }; -locator::token_metadata2_ptr forward_service::get_token_metadata_ptr() const noexcept { +locator::token_metadata_ptr forward_service::get_token_metadata_ptr() const noexcept { return _shared_token_metadata.get(); } diff --git a/service/forward_service.hh b/service/forward_service.hh index 8bcec3db97..169deb0b63 100644 --- a/service/forward_service.hh +++ b/service/forward_service.hh @@ -159,7 +159,7 @@ private: // Used to execute a `forward_request` on a shard. future execute_on_this_shard(query::forward_request req, std::optional tr_info); - locator::token_metadata2_ptr get_token_metadata_ptr() const noexcept; + locator::token_metadata_ptr get_token_metadata_ptr() const noexcept; void register_metrics(); void init_messaging_service(); diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc index 9b507a72d0..ee038d3b3d 100644 --- a/service/storage_proxy.cc +++ b/service/storage_proxy.cc @@ -2286,7 +2286,7 @@ bool paxos_response_handler::learned(gms::inet_address ep) { } static inet_address_vector_replica_set -replica_ids_to_endpoints(const locator::token_metadata2& tm, const std::vector& replica_ids) { +replica_ids_to_endpoints(const locator::token_metadata& tm, const std::vector& replica_ids) { inet_address_vector_replica_set endpoints; endpoints.reserve(replica_ids.size()); @@ -2300,7 +2300,7 @@ replica_ids_to_endpoints(const locator::token_metadata2& tm, const std::vector -endpoints_to_replica_ids(const locator::token_metadata2& tm, const inet_address_vector_replica_set& endpoints) { +endpoints_to_replica_ids(const locator::token_metadata& tm, const inet_address_vector_replica_set& endpoints) { std::vector replica_ids; replica_ids.reserve(endpoints.size()); @@ -6545,7 +6545,7 @@ storage_proxy::stop() { return make_ready_future<>(); } -locator::token_metadata2_ptr storage_proxy::get_token_metadata_ptr() const noexcept { +locator::token_metadata_ptr storage_proxy::get_token_metadata_ptr() const noexcept { return _shared_token_metadata.get(); } diff --git a/service/storage_proxy.hh b/service/storage_proxy.hh index 476e1b713f..8d5f517e2b 100644 --- a/service/storage_proxy.hh +++ b/service/storage_proxy.hh @@ -220,7 +220,7 @@ public: return _erm_factory; } - locator::token_metadata2_ptr get_token_metadata_ptr() const noexcept; + locator::token_metadata_ptr get_token_metadata_ptr() const noexcept; query::max_result_size get_max_result_size(const query::partition_slice& slice) const; query::tombstone_limit get_tombstone_limit() const; diff --git a/service/storage_service.cc b/service/storage_service.cc index cf3bd3fc83..d4d46f4aad 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -283,7 +283,7 @@ static future<> set_gossip_tokens(gms::gossiper& g, }); } -static std::unordered_map get_token_to_endpoint(const locator::token_metadata2& tm) { +static std::unordered_map get_token_to_endpoint(const locator::token_metadata& tm) { const auto& map = tm.get_token_to_endpoint(); std::unordered_map result; result.reserve(map.size()); @@ -405,7 +405,7 @@ future<> storage_service::topology_state_load() { co_await _messaging.local().ban_host(locator::host_id{id.uuid()}); } - co_await mutate_token_metadata(seastar::coroutine::lambda([this, &id2ip, &am] (mutable_token_metadata2_ptr tmptr) -> future<> { + co_await mutate_token_metadata(seastar::coroutine::lambda([this, &id2ip, &am] (mutable_token_metadata_ptr tmptr) -> future<> { co_await tmptr->clear_gently(); // drop previous state tmptr->set_version(_topology_state_machine._topology.version); @@ -925,11 +925,11 @@ class topology_coordinator { // True if an ongoing topology change should be rolled back bool _rollback = false; - const locator::token_metadata2& get_token_metadata() const noexcept { + const locator::token_metadata& get_token_metadata() const noexcept { return *_shared_tm.get(); } - locator::token_metadata2_ptr get_token_metadata_ptr() const noexcept { + locator::token_metadata_ptr get_token_metadata_ptr() const noexcept { return _shared_tm.get(); } @@ -1207,7 +1207,7 @@ class topology_coordinator { // If there's a bootstrapping node, its tokens should be included in the new generation. // Pass them and a reference to the bootstrapping node's replica_state through `binfo`. future>> prepare_new_cdc_generation_data( - locator::token_metadata2_ptr tmptr, const group0_guard& guard, std::optional binfo) { + locator::token_metadata_ptr tmptr, const group0_guard& guard, std::optional binfo) { auto get_sharding_info = [&] (dht::token end) -> std::pair { if (binfo && binfo->bootstrap_tokens.contains(end)) { return {binfo->rs.shard_count, binfo->rs.ignore_msb}; @@ -1269,7 +1269,7 @@ class topology_coordinator { // (bootstrapping is quick if there is no data in the cluster, but usually if one has 100 nodes they // have tons of data, so indeed streaming/repair will take much longer (hours/days)). future> prepare_and_broadcast_cdc_generation_data( - locator::token_metadata2_ptr tmptr, group0_guard guard, std::optional binfo) { + locator::token_metadata_ptr tmptr, group0_guard guard, std::optional binfo) { auto [gen_uuid, gen_mutations] = co_await prepare_new_cdc_generation_data(tmptr, guard, binfo); if (gen_mutations.empty()) { @@ -3414,7 +3414,7 @@ future<> storage_service::join_token_ring(sharded future<> { + co_await mutate_token_metadata([this, &bootstrap_tokens] (mutable_token_metadata_ptr tmptr) -> future<> { // This node must know about its chosen tokens before other nodes do // since they may start sending writes to this node after it gossips status = NORMAL. // Therefore, in case we haven't updated _token_metadata with our tokens yet, do it now. @@ -3493,7 +3493,7 @@ future<> storage_service::mark_existing_views_as_built() { }); } -std::unordered_set storage_service::parse_node_list(sstring comma_separated_list, const token_metadata2& tm) { +std::unordered_set storage_service::parse_node_list(sstring comma_separated_list, const token_metadata& tm) { std::vector ignore_nodes_strs = utils::split_comma_separated_list(std::move(comma_separated_list)); std::unordered_set ignore_nodes; for (const sstring& n : ignore_nodes_strs) { @@ -3555,7 +3555,7 @@ future<> storage_service::bootstrap(std::unordered_set& bootstrap_tokens, // When is_repair_based_node_ops_enabled is true, the bootstrap node // will use node_ops_cmd to bootstrap, node_ops_cmd will update the pending ranges. slogger.debug("bootstrap: update pending ranges: endpoint={} bootstrap_tokens={}", get_broadcast_address(), bootstrap_tokens); - mutate_token_metadata([this, &bootstrap_tokens] (mutable_token_metadata2_ptr tmptr) { + mutate_token_metadata([this, &bootstrap_tokens] (mutable_token_metadata_ptr tmptr) { auto endpoint = get_broadcast_address(); tmptr->update_topology(tmptr->get_my_id(), _snitch.local()->get_location(), locator::node::state::bootstrapping); tmptr->add_bootstrap_tokens(bootstrap_tokens, tmptr->get_my_id()); @@ -3650,25 +3650,20 @@ future<> storage_service::handle_state_bootstrap(inet_address endpoint, gms::per // continue. auto tmlock = co_await get_token_metadata_lock(); auto tmptr = co_await get_mutable_token_metadata_ptr(); - auto update_tm = [&](locator::generic_token_metadata& tm, NodeId n, std::optional dc_rack) { - if (tm.is_normal_token_owner(n)) { - // If isLeaving is false, we have missed both LEAVING and LEFT. However, if - // isLeaving is true, we have only missed LEFT. Waiting time between completing - // leave operation and rebootstrapping is relatively short, so the latter is quite - // common (not enough time for gossip to spread). Therefore we report only the - // former in the log. - if (!tm.is_leaving(n)) { - slogger.info("Node {} state jump to bootstrap", n); - } - tm.remove_endpoint(n); - } - - tm.update_topology(n, dc_rack, locator::node::state::bootstrapping); - tm.add_bootstrap_tokens(tokens, n); - }; - const auto dc_rack = get_dc_rack_for(endpoint); const auto host_id = _gossiper.get_host_id(endpoint); - update_tm(*tmptr, host_id, dc_rack); + if (tmptr->is_normal_token_owner(host_id)) { + // If isLeaving is false, we have missed both LEAVING and LEFT. However, if + // isLeaving is true, we have only missed LEFT. Waiting time between completing + // leave operation and rebootstrapping is relatively short, so the latter is quite + // common (not enough time for gossip to spread). Therefore we report only the + // former in the log. + if (!tmptr->is_leaving(host_id)) { + slogger.info("Node {} state jump to bootstrap", host_id); + } + tmptr->remove_endpoint(host_id); + } + tmptr->update_topology(host_id, get_dc_rack_for(endpoint), locator::node::state::bootstrapping); + tmptr->add_bootstrap_tokens(tokens, host_id); tmptr->update_host_id(host_id, endpoint); co_await update_topology_change_info(tmptr, ::format("handle_state_bootstrap {}", endpoint)); @@ -4302,13 +4297,13 @@ future<> storage_service::join_cluster(sharded& co_return co_await join_token_ring(sys_dist_ks, proxy, std::move(initial_contact_nodes), std::move(loaded_endpoints), std::move(loaded_peer_features), get_ring_delay()); } -future<> storage_service::replicate_to_all_cores(mutable_token_metadata2_ptr tmptr) noexcept { +future<> storage_service::replicate_to_all_cores(mutable_token_metadata_ptr tmptr) noexcept { assert(this_shard_id() == 0); slogger.debug("Replicating token_metadata to all cores"); std::exception_ptr ex; - std::vector pending_token_metadata_ptr; + std::vector pending_token_metadata_ptr; pending_token_metadata_ptr.resize(smp::count); std::vector> pending_effective_replication_maps; pending_effective_replication_maps.resize(smp::count); @@ -4339,7 +4334,7 @@ future<> storage_service::replicate_to_all_cores(mutable_token_metadata2_ptr tmp pending_token_metadata_ptr[base_shard] = tmptr; // clone a local copy of updated token_metadata on all other shards co_await smp::invoke_on_others(base_shard, [&, tmptr] () -> future<> { - pending_token_metadata_ptr[this_shard_id()] = make_token_metadata2_ptr(co_await tmptr->clone_async()); + pending_token_metadata_ptr[this_shard_id()] = make_token_metadata_ptr(co_await tmptr->clone_async()); }); // Precalculate new effective_replication_map for all keyspaces @@ -5496,7 +5491,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad slogger.warn("{}", msg); throw std::runtime_error(msg); } - mutate_token_metadata([coordinator, &req, this] (mutable_token_metadata2_ptr tmptr) mutable { + mutate_token_metadata([coordinator, &req, this] (mutable_token_metadata_ptr tmptr) mutable { for (auto& node : req.leaving_nodes) { slogger.info("removenode[{}]: Added node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator); tmptr->add_leaving_endpoint(tmptr->get_host_id(node)); @@ -5504,7 +5499,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad return update_topology_change_info(tmptr, ::format("removenode {}", req.leaving_nodes)); }).get(); node_ops_insert(ops_uuid, coordinator, std::move(req.ignore_nodes), [this, coordinator, req = std::move(req)] () mutable { - return mutate_token_metadata([this, coordinator, req = std::move(req)] (mutable_token_metadata2_ptr tmptr) mutable { + return mutate_token_metadata([this, coordinator, req = std::move(req)] (mutable_token_metadata_ptr tmptr) mutable { for (auto& node : req.leaving_nodes) { slogger.info("removenode[{}]: Removed node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator); tmptr->del_leaving_endpoint(tmptr->get_host_id(node)); @@ -5544,7 +5539,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad slogger.warn("{}", msg); throw std::runtime_error(msg); } - mutate_token_metadata([coordinator, &req, this] (mutable_token_metadata2_ptr tmptr) mutable { + mutate_token_metadata([coordinator, &req, this] (mutable_token_metadata_ptr tmptr) mutable { for (auto& node : req.leaving_nodes) { slogger.info("decommission[{}]: Added node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator); tmptr->add_leaving_endpoint(tmptr->get_host_id(node)); @@ -5552,7 +5547,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad return update_topology_change_info(tmptr, ::format("decommission {}", req.leaving_nodes)); }).get(); node_ops_insert(ops_uuid, coordinator, std::move(req.ignore_nodes), [this, coordinator, req = std::move(req)] () mutable { - return mutate_token_metadata([this, coordinator, req = std::move(req)] (mutable_token_metadata2_ptr tmptr) mutable { + return mutate_token_metadata([this, coordinator, req = std::move(req)] (mutable_token_metadata_ptr tmptr) mutable { for (auto& node : req.leaving_nodes) { slogger.info("decommission[{}]: Removed node={} as leaving node, coordinator={}", req.ops_uuid, node, coordinator); tmptr->del_leaving_endpoint(tmptr->get_host_id(node)); @@ -5605,7 +5600,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad if (!coordinator_host_id) { throw std::runtime_error("Coordinator host_id not found"); } - mutate_token_metadata([coordinator, coordinator_host_id, &req, this] (mutable_token_metadata2_ptr tmptr) mutable { + mutate_token_metadata([coordinator, coordinator_host_id, &req, this] (mutable_token_metadata_ptr tmptr) mutable { for (auto& x: req.replace_nodes) { auto existing_node = x.first; auto replacing_node = x.second; @@ -5631,7 +5626,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad return make_ready_future<>(); }).get(); node_ops_insert(ops_uuid, coordinator, std::move(req.ignore_nodes), [this, coordinator, coordinator_host_id, req = std::move(req)] () mutable { - return mutate_token_metadata([this, coordinator, coordinator_host_id, req = std::move(req)] (mutable_token_metadata2_ptr tmptr) mutable { + return mutate_token_metadata([this, coordinator, coordinator_host_id, req = std::move(req)] (mutable_token_metadata_ptr tmptr) mutable { for (auto& x: req.replace_nodes) { auto existing_node = x.first; auto replacing_node = x.second; @@ -5682,7 +5677,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad if (!coordinator_host_id) { throw std::runtime_error("Coordinator host_id not found"); } - mutate_token_metadata([coordinator, coordinator_host_id, &req, this] (mutable_token_metadata2_ptr tmptr) mutable { + mutate_token_metadata([coordinator, coordinator_host_id, &req, this] (mutable_token_metadata_ptr tmptr) mutable { for (auto& x: req.bootstrap_nodes) { auto& endpoint = x.first; auto tokens = std::unordered_set(x.second.begin(), x.second.end()); @@ -5697,7 +5692,7 @@ future storage_service::node_ops_cmd_handler(gms::inet_ad return update_topology_change_info(tmptr, ::format("bootstrap {}", req.bootstrap_nodes)); }).get(); node_ops_insert(ops_uuid, coordinator, std::move(req.ignore_nodes), [this, coordinator, req = std::move(req)] () mutable { - return mutate_token_metadata([this, coordinator, req = std::move(req)] (mutable_token_metadata2_ptr tmptr) mutable { + return mutate_token_metadata([this, coordinator, req = std::move(req)] (mutable_token_metadata_ptr tmptr) mutable { for (auto& x: req.bootstrap_nodes) { auto& endpoint = x.first; auto tokens = std::unordered_set(x.second.begin(), x.second.end()); @@ -5914,12 +5909,12 @@ storage_service::get_changed_ranges_for_leaving(locator::vnode_effective_replica co_await coroutine::maybe_yield(); } - auto temp = locator::make_token_metadata2_ptr(co_await get_token_metadata_ptr()->clone_after_all_left()); + auto temp = co_await get_token_metadata_ptr()->clone_after_all_left(); // endpoint might or might not be 'leaving'. If it was not leaving (that is, removenode // command was used), it is still present in temp and must be removed. - if (const auto host_id = temp->get_host_id_if_known(endpoint); host_id && temp->is_normal_token_owner(*host_id)) { - temp->remove_endpoint(*host_id); + if (const auto host_id = temp.get_host_id_if_known(endpoint); host_id && temp.is_normal_token_owner(*host_id)) { + temp.remove_endpoint(*host_id); } std::unordered_multimap changed_ranges; @@ -5960,7 +5955,7 @@ storage_service::get_changed_ranges_for_leaving(locator::vnode_effective_replica // E.g. everywhere_replication_strategy co_await coroutine::maybe_yield(); } - co_await temp->clear_gently(); + co_await temp.clear_gently(); co_return changed_ranges; } @@ -6080,7 +6075,7 @@ future<> storage_service::excise(std::unordered_set tokens, inet_address future<> storage_service::leave_ring() { co_await _cdc_gens.local().leave_ring(); co_await _sys_ks.local().set_bootstrap_state(db::system_keyspace::bootstrap_state::NEEDS_BOOTSTRAP); - co_await mutate_token_metadata([this] (mutable_token_metadata2_ptr tmptr) { + co_await mutate_token_metadata([this] (mutable_token_metadata_ptr tmptr) { auto endpoint = get_broadcast_address(); const auto my_id = tmptr->get_my_id(); tmptr->remove_endpoint(my_id); @@ -6242,7 +6237,7 @@ future storage_service::get_token_metadata_lock() // db::schema_tables::do_merge_schema. // // Note: must be called on shard 0. -future<> storage_service::mutate_token_metadata(std::function (mutable_token_metadata2_ptr)> func, acquire_merge_lock acquire_merge_lock) noexcept { +future<> storage_service::mutate_token_metadata(std::function (mutable_token_metadata_ptr)> func, acquire_merge_lock acquire_merge_lock) noexcept { assert(this_shard_id() == 0); std::optional tmlock; @@ -6254,7 +6249,7 @@ future<> storage_service::mutate_token_metadata(std::function (mutable_ co_await replicate_to_all_cores(std::move(tmptr)); } -future<> storage_service::update_topology_change_info(mutable_token_metadata2_ptr tmptr, sstring reason) { +future<> storage_service::update_topology_change_info(mutable_token_metadata_ptr tmptr, sstring reason) { assert(this_shard_id() == 0); try { @@ -6282,7 +6277,7 @@ future<> storage_service::update_topology_change_info(mutable_token_metadata2_pt } future<> storage_service::update_topology_change_info(sstring reason, acquire_merge_lock acquire_merge_lock) { - return mutate_token_metadata([this, reason = std::move(reason)] (mutable_token_metadata2_ptr tmptr) mutable { + return mutate_token_metadata([this, reason = std::move(reason)] (mutable_token_metadata_ptr tmptr) mutable { return update_topology_change_info(std::move(tmptr), std::move(reason)); }, acquire_merge_lock); } @@ -6324,7 +6319,7 @@ future<> storage_service::load_tablet_metadata() { future<> storage_service::snitch_reconfigured() { assert(this_shard_id() == 0); auto& snitch = _snitch.local(); - co_await mutate_token_metadata([&snitch] (mutable_token_metadata2_ptr tmptr) -> future<> { + co_await mutate_token_metadata([&snitch] (mutable_token_metadata_ptr tmptr) -> future<> { // re-read local rack and DC info tmptr->update_topology(tmptr->get_my_id(), snitch->get_location()); return make_ready_future<>(); diff --git a/service/storage_service.hh b/service/storage_service.hh index 6f9d739e5c..5d21f241e6 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -113,12 +113,9 @@ private: using endpoint_details = dht::endpoint_details; using boot_strapper = dht::boot_strapper; using token_metadata = locator::token_metadata; - using token_metadata2 = locator::token_metadata2; using shared_token_metadata = locator::shared_token_metadata; using token_metadata_ptr = locator::token_metadata_ptr; - using token_metadata2_ptr = locator::token_metadata2_ptr; using mutable_token_metadata_ptr = locator::mutable_token_metadata_ptr; - using mutable_token_metadata2_ptr = locator::mutable_token_metadata2_ptr; using token_metadata_lock = locator::token_metadata_lock; using application_state = gms::application_state; using inet_address = gms::inet_address; @@ -214,24 +211,24 @@ private: // db::schema_tables::do_merge_schema. // // Note: must be called on shard 0. - future<> mutate_token_metadata(std::function (mutable_token_metadata2_ptr)> func, acquire_merge_lock aml = acquire_merge_lock::yes) noexcept; + future<> mutate_token_metadata(std::function (mutable_token_metadata_ptr)> func, acquire_merge_lock aml = acquire_merge_lock::yes) noexcept; // Update pending ranges locally and then replicate to all cores. // Should be serialized under token_metadata_lock. // Must be called on shard 0. - future<> update_topology_change_info(mutable_token_metadata2_ptr tmptr, sstring reason); + future<> update_topology_change_info(mutable_token_metadata_ptr tmptr, sstring reason); future<> update_topology_change_info(sstring reason, acquire_merge_lock aml = acquire_merge_lock::yes); future<> keyspace_changed(const sstring& ks_name); void register_metrics(); future<> snitch_reconfigured(); - future get_mutable_token_metadata_ptr() noexcept { - return _shared_token_metadata.get()->clone_async().then([] (token_metadata2 tm) { + future get_mutable_token_metadata_ptr() noexcept { + return _shared_token_metadata.get()->clone_async().then([] (token_metadata tm) { // bump the token_metadata ring_version // to invalidate cached token/replication mappings // when the modified token_metadata is committed. tm.invalidate_cached_rings(); - return make_ready_future(make_token_metadata2_ptr(std::move(tm))); + return make_ready_future(make_token_metadata_ptr(std::move(tm))); }); } @@ -258,11 +255,11 @@ public: return _erm_factory; } - token_metadata2_ptr get_token_metadata_ptr() const noexcept { + token_metadata_ptr get_token_metadata_ptr() const noexcept { return _shared_token_metadata.get(); } - const locator::token_metadata2& get_token_metadata() const noexcept { + const locator::token_metadata& get_token_metadata() const noexcept { return *_shared_token_metadata.get(); } @@ -328,7 +325,7 @@ private: public: - static std::unordered_set parse_node_list(sstring comma_separated_list, const locator::token_metadata2& tm); + static std::unordered_set parse_node_list(sstring comma_separated_list, const locator::token_metadata& tm); future<> check_for_endpoint_collision(std::unordered_set initial_contact_nodes, const std::unordered_map& loaded_peer_features); @@ -480,7 +477,7 @@ private: std::optional get_dc_rack_for(inet_address endpoint); private: // Should be serialized under token_metadata_lock. - future<> replicate_to_all_cores(mutable_token_metadata2_ptr tmptr) noexcept; + future<> replicate_to_all_cores(mutable_token_metadata_ptr tmptr) noexcept; sharded& _sys_ks; sharded& _sys_dist_ks; locator::snitch_signal_slot_t _snitch_reconfigure; diff --git a/service/tablet_allocator.cc b/service/tablet_allocator.cc index e66f9127c9..b29e825ede 100644 --- a/service/tablet_allocator.cc +++ b/service/tablet_allocator.cc @@ -199,7 +199,7 @@ class load_balancer { std::optional target_load_sketch; - future get_load_sketch(const token_metadata2_ptr& tm) { + future get_load_sketch(const token_metadata_ptr& tm) { if (!target_load_sketch) { target_load_sketch.emplace(tm); co_await target_load_sketch->populate(id); @@ -255,7 +255,7 @@ class load_balancer { const size_t max_write_streaming_load = 2; const size_t max_read_streaming_load = 4; - token_metadata2_ptr _tm; + token_metadata_ptr _tm; load_balancer_stats_manager& _stats; private: tablet_replica_set get_replicas_for_tablet_load(const tablet_info& ti, const tablet_transition_info* trinfo) const { @@ -290,7 +290,7 @@ private: } public: - load_balancer(token_metadata2_ptr tm, load_balancer_stats_manager& stats) + load_balancer(token_metadata_ptr tm, load_balancer_stats_manager& stats) : _tm(std::move(tm)) , _stats(stats) { } @@ -819,7 +819,7 @@ public: _stopped = true; } - future balance_tablets(token_metadata2_ptr tm) { + future balance_tablets(token_metadata_ptr tm) { load_balancer lb(tm, _load_balancer_stats); co_return co_await lb.make_plan(); } @@ -869,7 +869,7 @@ future<> tablet_allocator::stop() { return impl().stop(); } -future tablet_allocator::balance_tablets(locator::token_metadata2_ptr tm) { +future tablet_allocator::balance_tablets(locator::token_metadata_ptr tm) { return impl().balance_tablets(tm); } diff --git a/service/tablet_allocator.hh b/service/tablet_allocator.hh index 3b90e6d208..4402f40cb1 100644 --- a/service/tablet_allocator.hh +++ b/service/tablet_allocator.hh @@ -90,7 +90,7 @@ public: /// /// The algorithm takes care of limiting the streaming load on the system, also by taking active migrations into account. /// - future balance_tablets(locator::token_metadata2_ptr); + future balance_tablets(locator::token_metadata_ptr); /// Should be called when the node is no longer a leader. void on_leadership_lost(); diff --git a/test/boost/locator_topology_test.cc b/test/boost/locator_topology_test.cc index d6a2a9bc1f..65065087e4 100644 --- a/test/boost/locator_topology_test.cc +++ b/test/boost/locator_topology_test.cc @@ -267,7 +267,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_sketch) { } }); - stm.mutate_token_metadata([&] (token_metadata2& tm) { + stm.mutate_token_metadata([&] (token_metadata& tm) { tm.update_host_id(host1, ip1); tm.update_host_id(host2, ip2); tm.update_host_id(host3, ip3); @@ -312,7 +312,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_sketch) { std::vector node3_shards(node3_shard_count, 0); - stm.mutate_token_metadata([&] (token_metadata2& tm) { + stm.mutate_token_metadata([&] (token_metadata& tm) { tablet_metadata tab_meta; tablet_map tmap(4); diff --git a/test/boost/network_topology_strategy_test.cc b/test/boost/network_topology_strategy_test.cc index c93ce2d00c..128d1078ff 100644 --- a/test/boost/network_topology_strategy_test.cc +++ b/test/boost/network_topology_strategy_test.cc @@ -72,7 +72,7 @@ static void check_ranges_are_sorted(vnode_effective_replication_map_ptr erm, gms void strategy_sanity_check( replication_strategy_ptr ars_ptr, - const token_metadata2_ptr& tm, + const token_metadata_ptr& tm, const std::map& options) { const network_topology_strategy* nts_ptr = @@ -95,7 +95,7 @@ void strategy_sanity_check( void endpoints_check( replication_strategy_ptr ars_ptr, - const token_metadata2_ptr& tm, + const token_metadata_ptr& tm, const inet_address_vector_replica_set& endpoints, const locator::topology& topo) { @@ -156,7 +156,7 @@ auto d2t = [](double d) -> int64_t { void full_ring_check(const std::vector& ring_points, const std::map& options, replication_strategy_ptr ars_ptr, - locator::token_metadata2_ptr tmptr) { + locator::token_metadata_ptr tmptr) { auto& tm = *tmptr; const auto& topo = tm.get_topology(); strategy_sanity_check(ars_ptr, tmptr, options); @@ -190,7 +190,7 @@ void full_ring_check(const std::vector& ring_points, void full_ring_check(const tablet_map& tmap, const std::map& options, replication_strategy_ptr rs_ptr, - locator::token_metadata2_ptr tmptr) { + locator::token_metadata_ptr tmptr) { auto& tm = *tmptr; const auto& topo = tm.get_topology(); @@ -250,7 +250,7 @@ void simple_test() { }; // Initialize the token_metadata - stm.mutate_token_metadata([&] (token_metadata2& tm) -> future<> { + stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { auto& topo = tm.get_topology(); for (const auto& [ring_point, endpoint, id] : ring_points) { std::unordered_set tokens; @@ -292,7 +292,7 @@ void simple_test() { // points will be taken from the cache when it shouldn't and the // corresponding check will fail. // - stm.mutate_token_metadata([] (token_metadata2& tm) { + stm.mutate_token_metadata([] (token_metadata& tm) { tm.invalidate_cached_rings(); return make_ready_future<>(); }).get(); @@ -357,7 +357,7 @@ void heavy_origin_test() { } } - stm.mutate_token_metadata([&] (token_metadata2& tm) -> future<> { + stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { auto& topo = tm.get_topology(); for (const auto& [ring_point, endpoint, id] : ring_points) { topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal); @@ -413,7 +413,7 @@ SEASTAR_THREAD_TEST_CASE(NetworkTopologyStrategy_tablets_test) { }; // Initialize the token_metadata - stm.mutate_token_metadata([&] (token_metadata2& tm) -> future<> { + stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { auto& topo = tm.get_topology(); for (const auto& [ring_point, endpoint, id] : ring_points) { std::unordered_set tokens; @@ -524,7 +524,7 @@ static bool has_sufficient_replicas( } static locator::host_id_set calculate_natural_endpoints( - const token& search_token, const token_metadata2& tm, + const token& search_token, const token_metadata& tm, const locator::topology& topo, const std::unordered_map& datacenters) { // @@ -650,7 +650,7 @@ static void test_equivalence(const shared_token_metadata& stm, const locator::to return std::make_pair(p.first, to_sstring(p.second)); }))); - const token_metadata2& tm = *stm.get(); + const token_metadata& tm = *stm.get(); for (size_t i = 0; i < 1000; ++i) { auto token = dht::token::get_random_token(); auto expected = calculate_natural_endpoints(token, tm, topo, datacenters); @@ -737,7 +737,7 @@ SEASTAR_THREAD_TEST_CASE(testCalculateEndpoints) { } } - stm.mutate_token_metadata([&] (token_metadata2& tm) -> future<> { + stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { generate_topology(tm.get_topology(), datacenters, nodes); for (auto&& i : endpoint_tokens) { co_await tm.update_normal_tokens(std::move(i.second), i.first); @@ -835,7 +835,7 @@ SEASTAR_THREAD_TEST_CASE(test_topology_compare_endpoints) { semaphore sem(1); shared_token_metadata stm([&sem] () noexcept { return get_units(sem, 1); }, tm_cfg); - stm.mutate_token_metadata([&] (token_metadata2& tm) { + stm.mutate_token_metadata([&] (token_metadata& tm) { auto& topo = tm.get_topology(); generate_topology(topo, datacenters, nodes); @@ -882,7 +882,7 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) { BOOST_REQUIRE(stm.get()->get_topology().get_location() == ip1_dc_rack); - stm.mutate_token_metadata([&] (token_metadata2& tm) { + stm.mutate_token_metadata([&] (token_metadata& tm) { tm.update_host_id(host2, ip2); tm.update_host_id(host1, ip1); // this_node added last on purpose return make_ready_future<>(); @@ -905,7 +905,7 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) { // Removing local node - stm.mutate_token_metadata([&] (token_metadata2& tm) { + stm.mutate_token_metadata([&] (token_metadata& tm) { tm.remove_endpoint(host1); tm.update_host_id(host3, ip3); return make_ready_future<>(); @@ -918,7 +918,7 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) { // Removing node with no local node - stm.mutate_token_metadata([&] (token_metadata2& tm) { + stm.mutate_token_metadata([&] (token_metadata& tm) { tm.remove_endpoint(host2); return make_ready_future<>(); }).get(); @@ -930,7 +930,7 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) { // Repopulate after clear_gently() - stm.mutate_token_metadata([&] (token_metadata2& tm) -> future<> { + stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { co_await tm.clear_gently(); tm.update_host_id(host2, ip2); tm.update_host_id(host1, ip1); // this_node added last on purpose @@ -953,7 +953,7 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) { // get_location() should pick up endpoint_dc_rack from node info - stm.mutate_token_metadata([&] (token_metadata2& tm) -> future<> { + stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { co_await tm.clear_gently(); tm.get_topology().add_or_update_endpoint(ip1, host1, ip1_dc_rack_v2, node::state::being_decommissioned); }).get(); diff --git a/test/boost/storage_proxy_test.cc b/test/boost/storage_proxy_test.cc index d0d2bb94ad..0006d7604c 100644 --- a/test/boost/storage_proxy_test.cc +++ b/test/boost/storage_proxy_test.cc @@ -41,7 +41,7 @@ SEASTAR_TEST_CASE(test_get_restricted_ranges) { std::vector ring = make_ring(s, 10); - auto check = [&s](locator::token_metadata2_ptr tmptr, dht::partition_range input, + auto check = [&s](locator::token_metadata_ptr tmptr, dht::partition_range input, dht::partition_range_vector expected) { query_ranges_to_vnodes_generator ranges_to_vnodes(locator::make_splitter(tmptr), s, {input}); auto actual = ranges_to_vnodes(1000); @@ -54,7 +54,7 @@ SEASTAR_TEST_CASE(test_get_restricted_ranges) { { // Ring with minimum token - auto tmptr = locator::make_token_metadata2_ptr(locator::token_metadata::config{}); + auto tmptr = locator::make_token_metadata_ptr(locator::token_metadata::config{}); const auto host_id = locator::host_id{utils::UUID(0, 1)}; tmptr->update_topology(host_id, locator::endpoint_dc_rack{"dc1", "rack1"}); tmptr->update_normal_tokens(std::unordered_set({dht::minimum_token()}), host_id).get(); @@ -69,7 +69,7 @@ SEASTAR_TEST_CASE(test_get_restricted_ranges) { } { - auto tmptr = locator::make_token_metadata2_ptr(locator::token_metadata::config{}); + auto tmptr = locator::make_token_metadata_ptr(locator::token_metadata::config{}); const auto id1 = locator::host_id{utils::UUID(0, 1)}; const auto id2 = locator::host_id{utils::UUID(0, 2)}; tmptr->update_topology(id1, locator::endpoint_dc_rack{"dc1", "rack1"}); diff --git a/test/boost/tablets_test.cc b/test/boost/tablets_test.cc index ba56ce3a68..6537e204dd 100644 --- a/test/boost/tablets_test.cc +++ b/test/boost/tablets_test.cc @@ -433,7 +433,7 @@ SEASTAR_TEST_CASE(test_sharder) { auto table1 = table_id(utils::UUID_gen::get_time_UUID()); - token_metadata2 tokm(token_metadata::config{ .topo_cfg{ .this_host_id = h1 } }); + token_metadata tokm(token_metadata::config{ .topo_cfg{ .this_host_id = h1 } }); tokm.get_topology().add_or_update_endpoint(tokm.get_topology().my_address(), h1); std::vector tablet_ids; @@ -591,7 +591,7 @@ SEASTAR_THREAD_TEST_CASE(test_token_ownership_splitting) { // Reflects the plan in a given token metadata as if the migrations were fully executed. static -void apply_plan(token_metadata2& tm, const migration_plan& plan) { +void apply_plan(token_metadata& tm, const migration_plan& plan) { for (auto&& mig : plan.migrations()) { tablet_map& tmap = tm.tablets().get_tablet_map(mig.tablet.table); auto tinfo = tmap.get_tablet_info(mig.tablet.tablet); @@ -611,7 +611,7 @@ tablet_transition_info migration_to_transition_info(const tablet_migration_info& // Reflects the plan in a given token metadata as if the migrations were started but not yet executed. static -void apply_plan_as_in_progress(token_metadata2& tm, const migration_plan& plan) { +void apply_plan_as_in_progress(token_metadata& tm, const migration_plan& plan) { for (auto&& mig : plan.migrations()) { tablet_map& tmap = tm.tablets().get_tablet_map(mig.tablet.table); auto tinfo = tmap.get_tablet_info(mig.tablet.tablet); @@ -626,7 +626,7 @@ void rebalance_tablets(tablet_allocator& talloc, shared_token_metadata& stm) { if (plan.empty()) { break; } - stm.mutate_token_metadata([&] (token_metadata2& tm) { + stm.mutate_token_metadata([&] (token_metadata& tm) { apply_plan(tm, plan); return make_ready_future<>(); }).get(); @@ -640,7 +640,7 @@ void rebalance_tablets_as_in_progress(tablet_allocator& talloc, shared_token_met if (plan.empty()) { break; } - stm.mutate_token_metadata([&] (token_metadata2& tm) { + stm.mutate_token_metadata([&] (token_metadata& tm) { apply_plan_as_in_progress(tm, plan); return make_ready_future<>(); }).get(); @@ -650,7 +650,7 @@ void rebalance_tablets_as_in_progress(tablet_allocator& talloc, shared_token_met // Completes any in progress tablet migrations. static void execute_transitions(shared_token_metadata& stm) { - stm.mutate_token_metadata([&] (token_metadata2& tm) { + stm.mutate_token_metadata([&] (token_metadata& tm) { for (auto&& [tablet, tmap_] : tm.tablets().all_tables()) { auto& tmap = tmap_; for (auto&& [tablet, trinfo]: tmap.transitions()) { @@ -689,7 +689,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_empty_node) { } }); - stm.mutate_token_metadata([&] (token_metadata2& tm) { + stm.mutate_token_metadata([&] (token_metadata& tm) { tm.update_host_id(host1, ip1); tm.update_host_id(host2, ip2); tm.update_host_id(host3, ip3); @@ -783,7 +783,7 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rf_met) { } }); - stm.mutate_token_metadata([&](token_metadata2& tm) { + stm.mutate_token_metadata([&](token_metadata& tm) { const unsigned shard_count = 2; tm.update_host_id(host1, ip1); @@ -839,7 +839,7 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rf_met) { BOOST_REQUIRE(load.get_avg_shard_load(host3) == 0); } - stm.mutate_token_metadata([&](token_metadata2& tm) { + stm.mutate_token_metadata([&](token_metadata& tm) { tm.update_topology(host3, locator::endpoint_dc_rack::default_location, node::state::left); return make_ready_future<>(); }).get(); @@ -885,7 +885,7 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_two_racks) { } }); - stm.mutate_token_metadata([&](token_metadata2& tm) { + stm.mutate_token_metadata([&](token_metadata& tm) { const unsigned shard_count = 1; tm.update_host_id(host1, ip1); @@ -986,7 +986,7 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rack_load_failure) { } }); - stm.mutate_token_metadata([&](token_metadata2& tm) { + stm.mutate_token_metadata([&](token_metadata& tm) { const unsigned shard_count = 1; tm.update_host_id(host1, ip1); @@ -1060,7 +1060,7 @@ SEASTAR_THREAD_TEST_CASE(test_decommission_rf_not_met) { } }); - stm.mutate_token_metadata([&](token_metadata2& tm) { + stm.mutate_token_metadata([&](token_metadata& tm) { const unsigned shard_count = 2; tm.update_host_id(host1, ip1); @@ -1117,7 +1117,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_works_with_in_progress_transitions) } }); - stm.mutate_token_metadata([&] (token_metadata2& tm) { + stm.mutate_token_metadata([&] (token_metadata& tm) { tm.update_host_id(host1, ip1); tm.update_host_id(host2, ip2); tm.update_host_id(host3, ip3); @@ -1186,7 +1186,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancer_shuffle_mode) { } }); - stm.mutate_token_metadata([&] (token_metadata2& tm) { + stm.mutate_token_metadata([&] (token_metadata& tm) { tm.update_host_id(host1, ip1); tm.update_host_id(host2, ip2); tm.update_host_id(host3, ip3); @@ -1249,7 +1249,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_two_empty_nodes) { } }); - stm.mutate_token_metadata([&] (token_metadata2& tm) { + stm.mutate_token_metadata([&] (token_metadata& tm) { tm.update_host_id(host1, ip1); tm.update_host_id(host2, ip2); tm.update_host_id(host3, ip3); @@ -1405,7 +1405,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_random_load) { }); size_t total_tablet_count = 0; - stm.mutate_token_metadata([&](token_metadata2& tm) { + stm.mutate_token_metadata([&](token_metadata& tm) { tablet_metadata tmeta; int i = 0; diff --git a/test/boost/token_metadata_test.cc b/test/boost/token_metadata_test.cc index 5f5454abf1..d7e497f4a2 100644 --- a/test/boost/token_metadata_test.cc +++ b/test/boost/token_metadata_test.cc @@ -28,8 +28,8 @@ namespace { }; } - mutable_token_metadata2_ptr create_token_metadata(host_id this_host_id) { - return make_lw_shared(token_metadata::config { + mutable_token_metadata_ptr create_token_metadata(host_id this_host_id) { + return make_lw_shared(token_metadata::config { topology::config { .this_host_id = this_host_id, .local_dc_rack = get_dc_rack(this_host_id) @@ -38,7 +38,7 @@ namespace { } template - mutable_vnode_erm_ptr create_erm(mutable_token_metadata2_ptr tmptr, replication_strategy_config_options opts = {}) { + mutable_vnode_erm_ptr create_erm(mutable_token_metadata_ptr tmptr, replication_strategy_config_options opts = {}) { dc_rack_fn get_dc_rack_fn = get_dc_rack; tmptr->update_topology_change_info(get_dc_rack_fn).get(); auto strategy = seastar::make_shared(std::move(opts)); diff --git a/test/lib/cql_test_env.cc b/test/lib/cql_test_env.cc index 13f3038796..fe6709eb4d 100644 --- a/test/lib/cql_test_env.cc +++ b/test/lib/cql_test_env.cc @@ -640,7 +640,7 @@ private: host_id = linfo.host_id; _sys_ks.local().save_local_info(std::move(linfo), _snitch.local()->get_location(), my_address, my_address).get(); } - locator::shared_token_metadata::mutate_on_all_shards(_token_metadata, [hostid = host_id, &cfg_in] (locator::token_metadata2& tm) { + locator::shared_token_metadata::mutate_on_all_shards(_token_metadata, [hostid = host_id, &cfg_in] (locator::token_metadata& tm) { auto& topo = tm.get_topology(); topo.set_host_id_cfg(hostid); topo.add_or_update_endpoint(cfg_in.broadcast_address, From 8c551f9104fbfc90fb6f99aafca63e9b7d8e713d Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Tue, 7 Nov 2023 17:39:06 +0400 Subject: [PATCH 47/51] dc_rack_fn: make it non-template --- locator/token_metadata.cc | 6 +++--- locator/token_metadata.hh | 2 +- locator/types.hh | 3 +-- service/storage_service.cc | 2 +- test/boost/token_metadata_test.cc | 2 +- 5 files changed, 7 insertions(+), 8 deletions(-) diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index 0b2517e83f..1e3ecda75f 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -241,7 +241,7 @@ public: static range interval_to_range(boost::icl::interval::interval_type i); public: - future<> update_topology_change_info(dc_rack_fn& get_dc_rack); + future<> update_topology_change_info(dc_rack_fn& get_dc_rack); const std::optional& get_topology_change_info() const { return _topology_change_info; } @@ -725,7 +725,7 @@ token_metadata_impl::interval_to_range(boost::icl::interval::interval_typ return range({{i.lower(), start_inclusive}}, {{i.upper(), end_inclusive}}); } -future<> token_metadata_impl::update_topology_change_info(dc_rack_fn& get_dc_rack) { +future<> token_metadata_impl::update_topology_change_info(dc_rack_fn& get_dc_rack) { if (_bootstrap_tokens.empty() && _leaving_endpoints.empty() && _replacing_endpoints.empty()) { co_await utils::clear_gently(_topology_change_info); _topology_change_info.reset(); @@ -1111,7 +1111,7 @@ token_metadata::interval_to_range(boost::icl::interval::interval_type i) } future<> -token_metadata::update_topology_change_info(dc_rack_fn& get_dc_rack) { +token_metadata::update_topology_change_info(dc_rack_fn& get_dc_rack) { return _impl->update_topology_change_info(get_dc_rack); } diff --git a/locator/token_metadata.hh b/locator/token_metadata.hh index e77df66a2d..f79d0ee8c5 100644 --- a/locator/token_metadata.hh +++ b/locator/token_metadata.hh @@ -256,7 +256,7 @@ public: static boost::icl::interval::interval_type range_to_interval(range r); static range interval_to_range(boost::icl::interval::interval_type i); - future<> update_topology_change_info(dc_rack_fn& get_dc_rack); + future<> update_topology_change_info(dc_rack_fn& get_dc_rack); const std::optional& get_topology_change_info() const; diff --git a/locator/types.hh b/locator/types.hh index 96b491dafc..3f2783f3fe 100644 --- a/locator/types.hh +++ b/locator/types.hh @@ -31,7 +31,6 @@ struct endpoint_dc_rack { bool operator==(const endpoint_dc_rack&) const = default; }; -template -using dc_rack_fn = seastar::noncopyable_function(NodeId)>; +using dc_rack_fn = seastar::noncopyable_function(host_id)>; } // namespace locator diff --git a/service/storage_service.cc b/service/storage_service.cc index d4d46f4aad..a53264c052 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -6253,7 +6253,7 @@ future<> storage_service::update_topology_change_info(mutable_token_metadata_ptr assert(this_shard_id() == 0); try { - locator::dc_rack_fn get_dc_rack_by_host_id([this, &tm = *tmptr] (locator::host_id host_id) -> std::optional { + locator::dc_rack_fn get_dc_rack_by_host_id([this, &tm = *tmptr] (locator::host_id host_id) -> std::optional { if (_raft_topology_change_enabled) { const auto server_id = raft::server_id(host_id.uuid()); const auto* node = _topology_state_machine._topology.find(server_id); diff --git a/test/boost/token_metadata_test.cc b/test/boost/token_metadata_test.cc index d7e497f4a2..94b8f5c1d4 100644 --- a/test/boost/token_metadata_test.cc +++ b/test/boost/token_metadata_test.cc @@ -39,7 +39,7 @@ namespace { template mutable_vnode_erm_ptr create_erm(mutable_token_metadata_ptr tmptr, replication_strategy_config_options opts = {}) { - dc_rack_fn get_dc_rack_fn = get_dc_rack; + dc_rack_fn get_dc_rack_fn = get_dc_rack; tmptr->update_topology_change_info(get_dc_rack_fn).get(); auto strategy = seastar::make_shared(std::move(opts)); return calculate_effective_replication_map(std::move(strategy), tmptr).get0(); From 3b59919a9c5fe23bd138bbb6cda5df4b5bcb377d Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Tue, 7 Nov 2023 19:14:11 +0400 Subject: [PATCH 48/51] topology: drop key_kind, host_id is now the primary key --- locator/token_metadata.cc | 4 +-- locator/topology.cc | 38 ++++++++--------------------- locator/topology.hh | 9 +------ test/boost/locator_topology_test.cc | 15 ++++++------ 4 files changed, 21 insertions(+), 45 deletions(-) diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index 1e3ecda75f..3447cb00d0 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -92,9 +92,9 @@ private: struct shallow_copy {}; public: token_metadata_impl(shallow_copy, const token_metadata_impl& o) noexcept - : _topology(topology::config{}, topology::key_kind::host_id) + : _topology(topology::config{}) {} - token_metadata_impl(token_metadata::config cfg) noexcept : _topology(std::move(cfg.topo_cfg), topology::key_kind::host_id) {}; + token_metadata_impl(token_metadata::config cfg) noexcept : _topology(std::move(cfg.topo_cfg)) {}; token_metadata_impl(const token_metadata_impl&) = delete; // it's too huge for direct copy, use clone_async() token_metadata_impl(token_metadata_impl&&) noexcept = default; const std::vector& sorted_tokens() const; diff --git a/locator/topology.cc b/locator/topology.cc index 0b10606414..ff237952a5 100644 --- a/locator/topology.cc +++ b/locator/topology.cc @@ -70,11 +70,10 @@ future<> topology::clear_gently() noexcept { co_await utils::clear_gently(_nodes); } -topology::topology(config cfg, key_kind k) +topology::topology(config cfg) : _shard(this_shard_id()) , _cfg(cfg) , _sort_by_proximity(!cfg.disable_proximity_sorting) - , _key_kind(k) { tlogger.trace("topology[{}]: constructing using config: endpoint={} dc={} rack={}", fmt::ptr(this), cfg.this_endpoint, cfg.local_dc_rack.dc, cfg.local_dc_rack.rack); @@ -93,7 +92,6 @@ topology::topology(topology&& o) noexcept , _dc_racks(std::move(o._dc_racks)) , _sort_by_proximity(o._sort_by_proximity) , _datacenters(std::move(o._datacenters)) - , _key_kind(o._key_kind) { assert(_shard == this_shard_id()); tlogger.trace("topology[{}]: move from [{}]", fmt::ptr(this), fmt::ptr(&o)); @@ -114,7 +112,7 @@ topology& topology::operator=(topology&& o) noexcept { } future topology::clone_gently() const { - topology ret(_cfg, _key_kind); + topology ret(_cfg); tlogger.debug("topology[{}]: clone_gently to {} from shard {}", fmt::ptr(this), fmt::ptr(&ret), _shard); for (const auto& nptr : _nodes) { if (nptr) { @@ -452,30 +450,14 @@ const node* topology::add_or_update_endpoint(std::optional opt_ep, current_backtrace()); } - const node* n; - switch (_key_kind) { - case topology::key_kind::host_id: - if (!opt_id) { - on_internal_error(tlogger, format("topology: host_id is not set, ep={}", opt_ep)); - } - n = find_node(*opt_id); - if (n) { - return update_node(make_mutable(n), std::nullopt, opt_ep, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); - } else if (opt_ep && (n = find_node(*opt_ep))) { - return update_node(make_mutable(n), opt_id, std::nullopt, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); - } - break; - case topology::key_kind::inet_address: - if (!opt_ep) { - on_internal_error(tlogger, format("topology: endpoint is not set, id={}", opt_id)); - } - n = find_node(*opt_ep); - if (n) { - return update_node(make_mutable(n), opt_id, std::nullopt, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); - } else if (opt_id && (n = find_node(*opt_id))) { - return update_node(make_mutable(n), std::nullopt, opt_ep, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); - } - break; + if (!opt_id) { + on_internal_error(tlogger, format("topology: host_id is not set, ep={}", opt_ep)); + } + const auto* n = find_node(*opt_id); + if (n) { + return update_node(make_mutable(n), std::nullopt, opt_ep, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); + } else if (opt_ep && (n = find_node(*opt_ep))) { + return update_node(make_mutable(n), opt_id, std::nullopt, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); } return add_node(opt_id.value_or(host_id::create_null_id()), diff --git a/locator/topology.hh b/locator/topology.hh index d9adaf69d7..a73e665ca2 100644 --- a/locator/topology.hh +++ b/locator/topology.hh @@ -159,11 +159,6 @@ private: class topology { public: - enum class key_kind { - inet_address, - host_id, - }; - struct config { inet_address this_endpoint; inet_address this_cql_address; // corresponds to broadcast_rpc_address @@ -173,7 +168,7 @@ public: bool operator==(const config&) const = default; }; - topology(config cfg, key_kind k); + topology(config cfg); topology(topology&&) noexcept; topology& operator=(topology&&) noexcept; @@ -416,8 +411,6 @@ private: // pre-calculated std::unordered_set _datacenters; - key_kind _key_kind; - void calculate_datacenters(); const std::unordered_map& get_nodes_by_endpoint() const noexcept { diff --git a/test/boost/locator_topology_test.cc b/test/boost/locator_topology_test.cc index 65065087e4..b6d704278b 100644 --- a/test/boost/locator_topology_test.cc +++ b/test/boost/locator_topology_test.cc @@ -36,7 +36,7 @@ SEASTAR_THREAD_TEST_CASE(test_add_node) { .local_dc_rack = endpoint_dc_rack::default_location, }; - auto topo = topology(cfg, topology::key_kind::inet_address); + auto topo = topology(cfg); set_abort_on_internal_error(false); auto reset_on_internal_abort = seastar::defer([] { @@ -73,7 +73,7 @@ SEASTAR_THREAD_TEST_CASE(test_moving) { .local_dc_rack = endpoint_dc_rack::default_location, }; - auto topo = topology(cfg, topology::key_kind::inet_address); + auto topo = topology(cfg); topo.add_node(id1, ep1, endpoint_dc_rack::default_location, node::state::normal); @@ -99,22 +99,23 @@ SEASTAR_THREAD_TEST_CASE(test_update_node) { topology::config cfg = { .this_endpoint = ep1, + .this_host_id = id1, .local_dc_rack = endpoint_dc_rack::default_location, }; - auto topo = topology(cfg, topology::key_kind::inet_address); + auto topo = topology(cfg); set_abort_on_internal_error(false); auto reset_on_internal_abort = seastar::defer([] { set_abort_on_internal_error(true); }); - topo.add_or_update_endpoint(ep1, endpoint_dc_rack::default_location, node::state::normal); + topo.add_or_update_endpoint(std::nullopt, id1, endpoint_dc_rack::default_location, node::state::normal); auto node = topo.this_node(); auto mutable_node = const_cast(node); - node = topo.update_node(mutable_node, id1, std::nullopt, std::nullopt, std::nullopt); + node = topo.update_node(mutable_node, std::nullopt, ep1, std::nullopt, std::nullopt); BOOST_REQUIRE_EQUAL(topo.find_node(id1), node); mutable_node = const_cast(node); @@ -182,7 +183,7 @@ SEASTAR_THREAD_TEST_CASE(test_add_or_update_by_host_id) { // We need to make the second node 'being_decommissioned', so that // it gets removed from ip index and we don't get the non-unique IP error. - auto topo = topology({}, topology::key_kind::host_id); + auto topo = topology({}); //auto topo = topology({}); topo.add_node(id1, gms::inet_address{}, endpoint_dc_rack::default_location, node::state::normal); topo.add_node(id2, ep1, endpoint_dc_rack::default_location, node::state::being_decommissioned); @@ -226,7 +227,7 @@ SEASTAR_THREAD_TEST_CASE(test_remove_endpoint) { .local_dc_rack = dc_rack1 }; - auto topo = topology(cfg, topology::key_kind::inet_address); + auto topo = topology(cfg); topo.add_node(id1, ep1, dc_rack1, node::state::normal); topo.add_node(id2, ep2, dc_rack2, node::state::normal); From 11a4908683d17338a8b30ce119b06be7b2f835b9 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Tue, 5 Dec 2023 21:06:26 +0400 Subject: [PATCH 49/51] token_metadata: add_replacing_endpoint: forbid replacing node with itself This used to work before in replace-with-same-ip scenario, but with host_id-s it's no longer relevant. base_token_metadata has been removed from topology_change_info because the conditions needed for its creation are no longer met. --- locator/abstract_replication_strategy.cc | 13 ++++------ locator/token_metadata.cc | 31 +++--------------------- locator/token_metadata.hh | 2 -- 3 files changed, 9 insertions(+), 37 deletions(-) diff --git a/locator/abstract_replication_strategy.cc b/locator/abstract_replication_strategy.cc index a0ff02d898..aabb7fe128 100644 --- a/locator/abstract_replication_strategy.cc +++ b/locator/abstract_replication_strategy.cc @@ -381,16 +381,13 @@ future calculate_effective_replicat replication_map.reserve(depend_on_token ? sorted_tokens.size() : 1); if (const auto& topology_changes = tmptr->get_topology_change_info(); topology_changes) { const auto& all_tokens = topology_changes->all_tokens; - const auto& base_token_metadata = topology_changes->base_token_metadata - ? topology_changes->base_token_metadata - : tmptr; const auto& current_tokens = tmptr->get_token_to_endpoint(); for (size_t i = 0, size = all_tokens.size(); i < size; ++i) { co_await coroutine::maybe_yield(); const auto token = all_tokens[i]; - auto current_endpoints = co_await rs->calculate_natural_endpoints(token, *base_token_metadata); + auto current_endpoints = co_await rs->calculate_natural_endpoints(token, *tmptr); auto target_endpoints = co_await rs->calculate_natural_endpoints(token, *topology_changes->target_token_metadata); auto add_mapping = [&](ring_mapping& target, std::unordered_set&& endpoints) { @@ -421,21 +418,21 @@ future calculate_effective_replicat } } if (!endpoints_diff.empty()) { - add_mapping(pending_endpoints, resolve_endpoints(endpoints_diff, *base_token_metadata).extract_set()); + add_mapping(pending_endpoints, resolve_endpoints(endpoints_diff, *tmptr).extract_set()); } } // in order not to waste memory, we update read_endpoints only if the // new endpoints differs from the old one if (topology_changes->read_new && target_endpoints.get_vector() != current_endpoints.get_vector()) { - add_mapping(read_endpoints, resolve_endpoints(target_endpoints, *base_token_metadata).extract_set()); + add_mapping(read_endpoints, resolve_endpoints(target_endpoints, *tmptr).extract_set()); } if (!depend_on_token) { - replication_map.emplace(default_replication_map_key, resolve_endpoints(current_endpoints, *base_token_metadata).extract_vector()); + replication_map.emplace(default_replication_map_key, resolve_endpoints(current_endpoints, *tmptr).extract_vector()); break; } else if (current_tokens.contains(token)) { - replication_map.emplace(token, resolve_endpoints(current_endpoints, *base_token_metadata).extract_vector()); + replication_map.emplace(token, resolve_endpoints(current_endpoints, *tmptr).extract_vector()); } } } else if (depend_on_token) { diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index 3447cb00d0..1d931ec440 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -732,8 +732,6 @@ future<> token_metadata_impl::update_topology_change_info(dc_rack_fn& get_dc_rac co_return; } - // true if there is a node replaced with the same IP - bool replace_with_same_endpoint = false; // target_token_metadata incorporates all the changes from leaving, bootstrapping and replacing auto target_token_metadata = co_await clone_only_token_map(false); { @@ -748,11 +746,7 @@ future<> token_metadata_impl::update_topology_change_info(dc_rack_fn& get_dc_rac new_normal_tokens[it->second].insert(token); } for (const auto& [replace_from, replace_to]: _replacing_endpoints) { - if (replace_from == replace_to) { - replace_with_same_endpoint = true; - } else { - target_token_metadata->remove_endpoint(replace_from); - } + target_token_metadata->remove_endpoint(replace_from); } } for (const auto& [token, inet_address]: _bootstrap_tokens) { @@ -770,22 +764,6 @@ future<> token_metadata_impl::update_topology_change_info(dc_rack_fn& get_dc_rac target_token_metadata->sort_tokens(); } - // We require a distinct token_metadata instance when replace_from equals replace_to, - // as it ensures the node is included in pending_ranges. - // Otherwise, the node would be excluded from both pending_ranges and - // get_natural_endpoints_without_node_being_replaced, - // causing the coordinator to overlook it entirely. - std::unique_ptr base_token_metadata; - if (replace_with_same_endpoint) { - base_token_metadata = co_await clone_only_token_map(false); - for (const auto& [replace_from, replace_to]: _replacing_endpoints) { - if (replace_from == replace_to) { - base_token_metadata->remove_endpoint(replace_from); - } - } - base_token_metadata->sort_tokens(); - } - // merge tokens from token_to_endpoint and bootstrap_tokens, // preserving tokens of leaving endpoints auto all_tokens = std::vector(); @@ -799,7 +777,6 @@ future<> token_metadata_impl::update_topology_change_info(dc_rack_fn& get_dc_rac auto prev_value = std::move(_topology_change_info); _topology_change_info.emplace(make_lw_shared(std::move(target_token_metadata)), - base_token_metadata ? make_lw_shared(std::move(base_token_metadata)): nullptr, std::move(all_tokens), _read_new); co_await utils::clear_gently(prev_value); @@ -827,6 +804,9 @@ void token_metadata_impl::del_leaving_endpoint(host_id endpoint) { } void token_metadata_impl::add_replacing_endpoint(host_id existing_node, host_id replacing_node) { + if (existing_node == replacing_node) { + on_internal_error(tlogger, format("Can't replace node {} with itself")); + } tlogger.info("Added node {} as pending replacing endpoint which replaces existing node {}", replacing_node, existing_node); _replacing_endpoints[existing_node] = replacing_node; @@ -841,11 +821,9 @@ void token_metadata_impl::del_replacing_endpoint(host_id existing_node) { } topology_change_info::topology_change_info(lw_shared_ptr target_token_metadata_, - lw_shared_ptr base_token_metadata_, std::vector all_tokens_, token_metadata::read_new_t read_new_) : target_token_metadata(std::move(target_token_metadata_)) - , base_token_metadata(std::move(base_token_metadata_)) , all_tokens(std::move(all_tokens_)) , read_new(read_new_) { @@ -853,7 +831,6 @@ topology_change_info::topology_change_info(lw_shared_ptr target_ future<> topology_change_info::clear_gently() { co_await utils::clear_gently(target_token_metadata); - co_await utils::clear_gently(base_token_metadata); co_await utils::clear_gently(all_tokens); } diff --git a/locator/token_metadata.hh b/locator/token_metadata.hh index f79d0ee8c5..b798b47ab0 100644 --- a/locator/token_metadata.hh +++ b/locator/token_metadata.hh @@ -292,12 +292,10 @@ private: struct topology_change_info { lw_shared_ptr target_token_metadata; - lw_shared_ptr base_token_metadata; std::vector all_tokens; token_metadata::read_new_t read_new; topology_change_info(lw_shared_ptr target_token_metadata_, - lw_shared_ptr base_token_metadata_, std::vector all_tokens_, token_metadata::read_new_t read_new_); future<> clear_gently(); From fbf507b1ba8626a136c3425134ddd01d71446ba3 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Thu, 30 Nov 2023 11:21:53 +0400 Subject: [PATCH 50/51] token_metadata: topology: cleanup add_or_update_endpoint Make host_id parameter non-optional and move it to the beginning of the arguments list. Delete unused overloads of add_or_update_endpoint. Delete unused overload of token_metadata::update_topology with inet_address argument. --- locator/token_metadata.cc | 10 +++------- locator/topology.cc | 15 ++++++--------- locator/topology.hh | 14 +++----------- main.cc | 2 +- test/boost/locator_topology_test.cc | 4 ++-- test/boost/network_topology_strategy_test.cc | 2 +- test/boost/tablets_test.cc | 2 +- test/lib/cql_test_env.cc | 10 +++++----- 8 files changed, 22 insertions(+), 37 deletions(-) diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index 1d931ec440..67e1302e0a 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -115,12 +115,8 @@ public: return _bootstrap_tokens; } - void update_topology(inet_address ep, std::optional opt_dr, std::optional opt_st, std::optional shard_count = std::nullopt) { - _topology.add_or_update_endpoint(ep, std::nullopt, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); - } - - void update_topology(host_id ep, std::optional opt_dr, std::optional opt_st, std::optional shard_count = std::nullopt) { - _topology.add_or_update_endpoint(std::nullopt, ep, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); + void update_topology(host_id id, std::optional opt_dr, std::optional opt_st, std::optional shard_count = std::nullopt) { + _topology.add_or_update_endpoint(id, std::nullopt, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); } /** @@ -520,7 +516,7 @@ void token_metadata_impl::debug_show() const { } void token_metadata_impl::update_host_id(const host_id& host_id, inet_address endpoint) { - _topology.add_or_update_endpoint(endpoint, host_id); + _topology.add_or_update_endpoint(host_id, endpoint); } host_id token_metadata_impl::get_host_id(inet_address endpoint) const { diff --git a/locator/topology.cc b/locator/topology.cc index ff237952a5..d8900cf3fb 100644 --- a/locator/topology.cc +++ b/locator/topology.cc @@ -442,25 +442,22 @@ const node* topology::find_node(node::idx_type idx) const noexcept { return _nodes.at(idx).get(); } -const node* topology::add_or_update_endpoint(std::optional opt_ep, std::optional opt_id, std::optional opt_dr, std::optional opt_st, std::optional shard_count) +const node* topology::add_or_update_endpoint(host_id id, std::optional opt_ep, std::optional opt_dr, std::optional opt_st, std::optional shard_count) { if (tlogger.is_enabled(log_level::trace)) { - tlogger.trace("topology[{}]: add_or_update_endpoint: ep={} host_id={} dc={} rack={} state={} shards={}, at {}", fmt::ptr(this), - opt_ep, opt_id.value_or(host_id::create_null_id()), opt_dr.value_or(endpoint_dc_rack{}).dc, opt_dr.value_or(endpoint_dc_rack{}).rack, opt_st.value_or(node::state::none), shard_count, + tlogger.trace("topology[{}]: add_or_update_endpoint: host_id={} ep={} dc={} rack={} state={} shards={}, at {}", fmt::ptr(this), + id, opt_ep, opt_dr.value_or(endpoint_dc_rack{}).dc, opt_dr.value_or(endpoint_dc_rack{}).rack, opt_st.value_or(node::state::none), shard_count, current_backtrace()); } - if (!opt_id) { - on_internal_error(tlogger, format("topology: host_id is not set, ep={}", opt_ep)); - } - const auto* n = find_node(*opt_id); + const auto* n = find_node(id); if (n) { return update_node(make_mutable(n), std::nullopt, opt_ep, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); } else if (opt_ep && (n = find_node(*opt_ep))) { - return update_node(make_mutable(n), opt_id, std::nullopt, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); + return update_node(make_mutable(n), id, std::nullopt, std::move(opt_dr), std::move(opt_st), std::move(shard_count)); } - return add_node(opt_id.value_or(host_id::create_null_id()), + return add_node(id, opt_ep.value_or(inet_address{}), opt_dr.value_or(endpoint_dc_rack::default_location), opt_st.value_or(node::state::normal), diff --git a/locator/topology.hh b/locator/topology.hh index a73e665ca2..cb583b2704 100644 --- a/locator/topology.hh +++ b/locator/topology.hh @@ -234,19 +234,11 @@ public: * * Adds or updates a node with given endpoint */ - const node* add_or_update_endpoint(std::optional ep, std::optional opt_id, - std::optional opt_dr, - std::optional opt_st, + const node* add_or_update_endpoint(host_id id, std::optional opt_ep, + std::optional opt_dr = std::nullopt, + std::optional opt_st = std::nullopt, std::optional shard_count = std::nullopt); - // Legacy entry point from token_metadata::update_topology - const node* add_or_update_endpoint(inet_address ep, endpoint_dc_rack dr, std::optional opt_st) { - return add_or_update_endpoint(ep, std::nullopt, std::move(dr), std::move(opt_st), std::nullopt); - } - const node* add_or_update_endpoint(inet_address ep, host_id id) { - return add_or_update_endpoint(ep, id, std::nullopt, std::nullopt, std::nullopt); - } - /** * Removes current DC/rack assignment for ep * Returns true if the node was found and removed. diff --git a/main.cc b/main.cc index 1a01486031..adac226708 100644 --- a/main.cc +++ b/main.cc @@ -1211,7 +1211,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl // Raft topology discard the endpoint-to-id map, so the local id can // still be found in the config. tm.get_topology().set_host_id_cfg(host_id); - tm.get_topology().add_or_update_endpoint(endpoint, host_id); + tm.get_topology().add_or_update_endpoint(host_id, endpoint); return make_ready_future<>(); }).get(); diff --git a/test/boost/locator_topology_test.cc b/test/boost/locator_topology_test.cc index b6d704278b..8400128dd9 100644 --- a/test/boost/locator_topology_test.cc +++ b/test/boost/locator_topology_test.cc @@ -110,7 +110,7 @@ SEASTAR_THREAD_TEST_CASE(test_update_node) { set_abort_on_internal_error(true); }); - topo.add_or_update_endpoint(std::nullopt, id1, endpoint_dc_rack::default_location, node::state::normal); + topo.add_or_update_endpoint(id1, std::nullopt, endpoint_dc_rack::default_location, node::state::normal); auto node = topo.this_node(); auto mutable_node = const_cast(node); @@ -188,7 +188,7 @@ SEASTAR_THREAD_TEST_CASE(test_add_or_update_by_host_id) { topo.add_node(id1, gms::inet_address{}, endpoint_dc_rack::default_location, node::state::normal); topo.add_node(id2, ep1, endpoint_dc_rack::default_location, node::state::being_decommissioned); - topo.add_or_update_endpoint(ep1, id1, std::nullopt, node::state::bootstrapping); + topo.add_or_update_endpoint(id1, ep1, std::nullopt, node::state::bootstrapping); auto* n = topo.find_node(id1); BOOST_REQUIRE_EQUAL(n->get_state(), node::state::bootstrapping); diff --git a/test/boost/network_topology_strategy_test.cc b/test/boost/network_topology_strategy_test.cc index 128d1078ff..159fc3bcb2 100644 --- a/test/boost/network_topology_strategy_test.cc +++ b/test/boost/network_topology_strategy_test.cc @@ -955,7 +955,7 @@ SEASTAR_THREAD_TEST_CASE(test_topology_tracks_local_node) { stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> { co_await tm.clear_gently(); - tm.get_topology().add_or_update_endpoint(ip1, host1, ip1_dc_rack_v2, node::state::being_decommissioned); + tm.get_topology().add_or_update_endpoint(host1, ip1, ip1_dc_rack_v2, node::state::being_decommissioned); }).get(); n1 = stm.get()->get_topology().find_node(host1); diff --git a/test/boost/tablets_test.cc b/test/boost/tablets_test.cc index 6537e204dd..8755f1647f 100644 --- a/test/boost/tablets_test.cc +++ b/test/boost/tablets_test.cc @@ -434,7 +434,7 @@ SEASTAR_TEST_CASE(test_sharder) { auto table1 = table_id(utils::UUID_gen::get_time_UUID()); token_metadata tokm(token_metadata::config{ .topo_cfg{ .this_host_id = h1 } }); - tokm.get_topology().add_or_update_endpoint(tokm.get_topology().my_address(), h1); + tokm.get_topology().add_or_update_endpoint(h1, tokm.get_topology().my_address()); std::vector tablet_ids; { diff --git a/test/lib/cql_test_env.cc b/test/lib/cql_test_env.cc index fe6709eb4d..8ead224c20 100644 --- a/test/lib/cql_test_env.cc +++ b/test/lib/cql_test_env.cc @@ -643,11 +643,11 @@ private: locator::shared_token_metadata::mutate_on_all_shards(_token_metadata, [hostid = host_id, &cfg_in] (locator::token_metadata& tm) { auto& topo = tm.get_topology(); topo.set_host_id_cfg(hostid); - topo.add_or_update_endpoint(cfg_in.broadcast_address, - hostid, - std::nullopt, - locator::node::state::normal, - smp::count); + topo.add_or_update_endpoint(hostid, + cfg_in.broadcast_address, + std::nullopt, + locator::node::state::normal, + smp::count); return make_ready_future<>(); }).get(); From 9d93a518ac3c0d57aaa8eede33b0f3652e3ca690 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Wed, 6 Dec 2023 20:23:28 +0400 Subject: [PATCH 51/51] topology: remove_endpoint: remove inet_address overload The overload was used only in tests. --- locator/topology.cc | 11 ----------- locator/topology.hh | 6 ------ test/boost/locator_topology_test.cc | 4 ++-- 3 files changed, 2 insertions(+), 19 deletions(-) diff --git a/locator/topology.cc b/locator/topology.cc index d8900cf3fb..4f8c75cb97 100644 --- a/locator/topology.cc +++ b/locator/topology.cc @@ -464,17 +464,6 @@ const node* topology::add_or_update_endpoint(host_id id, std::optional opt_st = std::nullopt, std::optional shard_count = std::nullopt); - /** - * Removes current DC/rack assignment for ep - * Returns true if the node was found and removed. - */ - bool remove_endpoint(inet_address ep); - bool remove_endpoint(locator::host_id ep); /** diff --git a/test/boost/locator_topology_test.cc b/test/boost/locator_topology_test.cc index 8400128dd9..427e6d26fd 100644 --- a/test/boost/locator_topology_test.cc +++ b/test/boost/locator_topology_test.cc @@ -236,12 +236,12 @@ SEASTAR_THREAD_TEST_CASE(test_remove_endpoint) { BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{{"dc1", {{"rack1", {ep1}}, {"rack2", {ep2}}}}})); BOOST_REQUIRE_EQUAL(topo.get_datacenters(), (dcs_t{"dc1"})); - topo.remove_endpoint(ep2); + topo.remove_endpoint(id2); BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{{"dc1", {ep1}}})); BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{{"dc1", {{"rack1", {ep1}}}}})); BOOST_REQUIRE_EQUAL(topo.get_datacenters(), (dcs_t{"dc1"})); - topo.remove_endpoint(ep1); + topo.remove_endpoint(id1); BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{})); BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{})); BOOST_REQUIRE_EQUAL(topo.get_datacenters(), (dcs_t{}));