From 4ea67940cb7b95d20c2da1feffd2a4b98763c5d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Chojnowski?= Date: Sat, 4 Feb 2023 19:10:45 +0100 Subject: [PATCH] locator: token_metadata: get rid of a quadratic behaviour in get_address_ranges() Some callees of update_pending_ranges use the variant of get_address_ranges() which builds a hashmap of all pairs. For everywhere_topology, the size of this map is quadratic in the number of endpoints, making it big enough to cause contiguous allocations of tens of MiB for clusters of realistic size, potentially causing trouble for the allocator (as seen e.g. in #12724). This deserves a correction. This patch removes the quadratic variant of get_address_ranges() and replaces its uses with its linear counterpart. Refs #10337 Refs #10817 Refs #10836 Refs #10837 Fixes #12724 (cherry picked from commit 9e57b21e0c370be209016bc5aa041d5a557842cf) --- locator/abstract_replication_strategy.cc | 16 ---------------- locator/abstract_replication_strategy.hh | 1 - locator/token_metadata.cc | 15 ++++++--------- 3 files changed, 6 insertions(+), 26 deletions(-) diff --git a/locator/abstract_replication_strategy.cc b/locator/abstract_replication_strategy.cc index 5c72befb7a..d96ef93f73 100644 --- a/locator/abstract_replication_strategy.cc +++ b/locator/abstract_replication_strategy.cc @@ -210,22 +210,6 @@ effective_replication_map::get_primary_ranges_within_dc(inet_address ep) const { }); } -future> -abstract_replication_strategy::get_address_ranges(const token_metadata& tm) const { - std::unordered_multimap ret; - for (auto& t : tm.sorted_tokens()) { - dht::token_range_vector r = tm.get_primary_ranges_for(t); - auto eps = co_await calculate_natural_endpoints(t, tm); - rslogger.debug("token={}, primary_range={}, address={}", t, r, eps); - for (auto ep : eps) { - for (auto&& rng : r) { - ret.emplace(ep, rng); - } - } - } - co_return ret; -} - future> abstract_replication_strategy::get_address_ranges(const token_metadata& tm, inet_address endpoint) const { std::unordered_multimap ret; diff --git a/locator/abstract_replication_strategy.hh b/locator/abstract_replication_strategy.hh index 5a7e7bce3f..6cf1c73f10 100644 --- a/locator/abstract_replication_strategy.hh +++ b/locator/abstract_replication_strategy.hh @@ -118,7 +118,6 @@ public: future get_ranges(inet_address ep, token_metadata_ptr tmptr) const; public: - future> get_address_ranges(const token_metadata& tm) const; future> get_address_ranges(const token_metadata& tm, inet_address endpoint) const; // Caller must ensure that token_metadata will not change throughout the call. diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index 7f24815c93..661002a631 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -779,13 +779,12 @@ void token_metadata_impl::calculate_pending_ranges_for_leaving( if (_leaving_endpoints.empty()) { return; } - std::unordered_multimap address_ranges = strategy.get_address_ranges(unpimplified_this).get0(); // get all ranges that will be affected by leaving nodes std::unordered_set> affected_ranges; for (auto endpoint : _leaving_endpoints) { - auto r = address_ranges.equal_range(endpoint); - for (auto x = r.first; x != r.second; x++) { - affected_ranges.emplace(x->second); + auto r = strategy.get_address_ranges(unpimplified_this, endpoint).get0(); + for (const auto& x : r) { + affected_ranges.emplace(x.second); } } // for each of those ranges, find what new nodes will be responsible for the range when @@ -816,16 +815,14 @@ void token_metadata_impl::calculate_pending_ranges_for_replacing( if (_replacing_endpoints.empty()) { return; } - auto address_ranges = strategy.get_address_ranges(unpimplified_this).get0(); for (const auto& node : _replacing_endpoints) { auto existing_node = node.first; auto replacing_node = node.second; + auto address_ranges = strategy.get_address_ranges(unpimplified_this, existing_node).get0(); for (const auto& x : address_ranges) { seastar::thread::maybe_yield(); - if (x.first == existing_node) { - tlogger.debug("Node {} replaces {} for range {}", replacing_node, existing_node, x.second); - new_pending_ranges.emplace(x.second, replacing_node); - } + tlogger.debug("Node {} replaces {} for range {}", replacing_node, existing_node, x.second); + new_pending_ranges.emplace(x.second, replacing_node); } } }