From 0fe8bdd0db170f879da918b67bb38f2dc6ca5ba5 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Mon, 16 Dec 2024 14:39:02 +0200 Subject: [PATCH] locator/topology: sort_by_proximity: calculate distance only once And use a temporary vector to use the precalculated distances. A later patch will add some randomization to shuffle nodes at the same distance from the reference node. This improves the function performance by 50% for 3 replicas, from 77.4 ns to 39.2 ns, larger replica sets show greater improvement (over 4X for 15 nodes): Before: test iterations median mad min max allocs tasks inst cycles sort_by_proximity_topology.perf_sort_by_proximity 12808773 77.368ns 0.062ns 77.300ns 77.873ns 0.000 0.000 1194.2 231.6 After: sort_by_proximity_topology.perf_sort_by_proximity 25541973 39.225ns 0.114ns 38.966ns 39.339ns 0.000 0.000 588.5 116.6 Signed-off-by: Benny Halevy --- locator/topology.cc | 28 +++++++++----------- locator/topology.hh | 21 +++++++-------- test/boost/network_topology_strategy_test.cc | 12 +++++++-- 3 files changed, 33 insertions(+), 28 deletions(-) diff --git a/locator/topology.cc b/locator/topology.cc index a46b843f31..afe0a81bb0 100644 --- a/locator/topology.cc +++ b/locator/topology.cc @@ -573,16 +573,20 @@ void topology::sort_by_proximity(locator::host_id address, host_id_vector_replic } void topology::do_sort_by_proximity(locator::host_id address, host_id_vector_replica_set& addresses) const { - std::sort(addresses.begin(), addresses.end(), [this, &address](const locator::host_id& a1, const locator::host_id& a2) { - return compare_endpoints(address, a1, a2) < 0; - }); + const auto& loc = get_location(address); + struct info { + locator::host_id id; + int distance; + }; + auto host_infos = addresses | std::views::transform([&] (locator::host_id id) { + const auto& loc1 = get_location(id); + return info{ id, distance(address, loc, id, loc1) }; + }) | std::ranges::to>(); + std::ranges::sort(host_infos, std::ranges::less{}, std::mem_fn(&info::distance)); + std::ranges::copy(host_infos | std::ranges::views::transform(std::mem_fn(&info::id)), addresses.begin()); } -std::weak_ordering topology::compare_endpoints(const locator::host_id& address, const locator::host_id& a1, const locator::host_id& a2) const { - const auto& loc = get_location(address); - const auto& loc1 = get_location(a1); - const auto& loc2 = get_location(a2); - +int topology::distance(const locator::host_id& address, const endpoint_dc_rack& loc, const locator::host_id& a1, const endpoint_dc_rack& loc1) noexcept { // The farthest nodes from a given node are: // 1. Nodes in other DCs then the reference node // 2. Nodes in the other RACKs in the same DC as the reference node @@ -591,13 +595,7 @@ std::weak_ordering topology::compare_endpoints(const locator::host_id& address, int same_rack1 = same_dc1 & (loc1.rack == loc.rack); int same_node1 = a1 == address; int d1 = ((same_dc1 << 2) | (same_rack1 << 1) | same_node1) ^ 7; - - int same_dc2 = loc2.dc == loc.dc; - int same_rack2 = same_dc2 & (loc2.rack == loc.rack); - int same_node2 = a2 == address; - int d2 = ((same_dc2 << 2) | (same_rack2 << 1) | same_node2) ^ 7; - - return d1 <=> d2; + return d1; } void topology::for_each_node(std::function func) const { diff --git a/locator/topology.hh b/locator/topology.hh index 1521aae481..98f487314b 100644 --- a/locator/topology.hh +++ b/locator/topology.hh @@ -369,6 +369,16 @@ public: */ void do_sort_by_proximity(locator::host_id address, host_id_vector_replica_set& addresses) const; + /** + * Calculates topology-distance between two endpoints. + * + * The closest nodes to a given node are: + * 1. The node itself + * 2. Nodes in the same RACK + * 3. Nodes in the same DC + */ + static int distance(const locator::host_id& address, const endpoint_dc_rack& loc, const locator::host_id& address1, const endpoint_dc_rack& loc1) noexcept; + // Executes a function for each node in a state other than "none" and "left". void for_each_node(std::function func) const; @@ -413,17 +423,6 @@ private: return const_cast(nptr); } - /** - * compares two endpoints in relation to the target endpoint, returning as - * Comparator.compare would - * - * The closest nodes to a given node are: - * 1. The node itself - * 2. Nodes in the same RACK as the reference node - * 3. Nodes in the same DC as the reference node - */ - std::weak_ordering compare_endpoints(const locator::host_id& address, const locator::host_id& a1, const locator::host_id& a2) const; - unsigned _shard; config _cfg; const node* _this_node = nullptr; diff --git a/test/boost/network_topology_strategy_test.cc b/test/boost/network_topology_strategy_test.cc index 8d3df681d1..d80ddf27a8 100644 --- a/test/boost/network_topology_strategy_test.cc +++ b/test/boost/network_topology_strategy_test.cc @@ -946,6 +946,14 @@ SEASTAR_TEST_CASE(test_invalid_dcs) { namespace locator { +std::weak_ordering compare_endpoints(const locator::topology& topo, const locator::host_id& address, const locator::host_id& a1, const locator::host_id& a2) { + const auto& loc = topo.get_location(address); + const auto& loc1 = topo.get_location(a1); + const auto& loc2 = topo.get_location(a2); + + return topo.distance(address, loc, a1, loc1) <=> topo.distance(address, loc, a2, loc2); +} + void topology::test_compare_endpoints(const locator::host_id& address, const locator::host_id& a1, const locator::host_id& a2) const { std::optional expected; const auto& loc = get_location(address); @@ -976,7 +984,7 @@ void topology::test_compare_endpoints(const locator::host_id& address, const loc } } } - auto res = compare_endpoints(address, a1, a2); + auto res = compare_endpoints(*this, address, a1, a2); testlog.debug("compare_endpoint: address={} [{}/{}] a1={} [{}/{}] a2={} [{}/{}]: res={} expected={} expected_value={}", address, loc.dc, loc.rack, a1, loc1.dc, loc1.rack, @@ -1001,7 +1009,7 @@ void topology::test_sort_by_proximity(const locator::host_id& address, const hos } // Test sort monotonicity for (size_t i = 1; i < sorted_nodes.size(); ++i) { - BOOST_REQUIRE(compare_endpoints(address, sorted_nodes[i-1], sorted_nodes[i]) <= 0); + BOOST_REQUIRE(compare_endpoints(*this, address, sorted_nodes[i-1], sorted_nodes[i]) <= 0); } }