Merge '[backport 2025.1] token_range_vector: fragment' from Avi Kivity

token_range_vector is a sequence of intervals of tokens. It is used
to describe vnodes or token ranges owned by shards.

Since tokens are bloated (16 bytes instead of 8), and intervals are bloated
(40 byte of overhead instead of 8), and since we have plenty of token ranges,
such vectors can exceed our allocation unit of 128 kB and cause allocation stalls.

This series fixes that by first generalizing some helpers and then changing
token_range_vector to use chunked_vector.

Although this touches IDL, there is no compatibility problem since the encoding
for vector and chunked_vector are identical.

There is no performance concern since token_range_vector is never used on
any hot path (hot paths always contain a partition key).

Fixes #3335.
Fixes #24115.

Fixes #24156

Backport notes:

Due to compiler limitations in this toolchain, the template template parameters were replaced
by elaborate template metaprogramming, see patch 'partition_range_compat: generalize wrap/unwrap helpers'.

Closes scylladb/scylladb#25704

* github.com:scylladb/scylladb:
  dht: fragment token_range_vector
  partition_range_compat: generalize wrap/unwrap helpers
  utils: chunked_vector: add swap() method
  utils: chunked_vector: add range insert() overloads
This commit is contained in:
Nadav Har'El
2025-09-01 19:07:54 +03:00
8 changed files with 177 additions and 27 deletions

View File

@@ -10,6 +10,7 @@
#pragma once
#include <vector>
#include "interval.hh"
#include "utils/chunked_vector.hh"
namespace sstables {
@@ -29,7 +30,7 @@ using partition_range = interval<ring_position>;
using token_range = interval<token>;
using partition_range_vector = std::vector<partition_range>;
using token_range_vector = std::vector<token_range>;
using token_range_vector = utils::chunked_vector<token_range>;
class decorated_key;

View File

@@ -37,7 +37,7 @@ class stream_request {
sstring keyspace;
// For compatibility with <= 1.5, we use wrapping ranges
// (though we never send wraparounds; only allow receiving them)
std::vector<wrapping_interval<dht::token>> ranges_compat();
utils::chunked_vector<wrapping_interval<dht::token>> ranges_compat();
std::vector<sstring> column_families;
};

View File

@@ -25,6 +25,7 @@
#include "gms/gossip_address_map.hh"
#include "tasks/types.hh"
#include "utils/advanced_rpc_compressor.hh"
#include "utils/chunked_vector.hh"
#include <list>
#include <vector>
@@ -74,7 +75,7 @@ namespace dht {
class ring_position;
using partition_range = interval<ring_position>;
using token_range = interval<token>;
using token_range_vector = std::vector<token_range>;
using token_range_vector = utils::chunked_vector<token_range>;
}
namespace query {

View File

@@ -20,10 +20,16 @@ using wrapping_partition_range = wrapping_interval<dht::ring_position>;
// unwraps a vector of wrapping ranges into a vector of nonwrapping ranges
// if the vector happens to be sorted by the left bound, it remains sorted
template <typename T, typename Comparator>
std::vector<interval<T>>
unwrap(std::vector<wrapping_interval<T>>&& v, Comparator&& cmp) {
std::vector<interval<T>> ret;
template <template <typename...> class Container, typename T, typename Comparator>
requires std::ranges::range<Container<interval<T>>>
&& requires (Container<interval<T>> c, size_t s, interval<T> i) {
{ c.reserve(s) };
{ c.emplace_back(std::move(i)) };
{ c.insert(c.begin(), std::move(i)) };
}
Container<interval<T>>
unwrap(Container<wrapping_interval<T>>&& v, Comparator&& cmp) {
Container<interval<T>> ret;
ret.reserve(v.size() + 1);
for (auto&& wr : v) {
if (wr.is_wrap_around(cmp)) {
@@ -37,17 +43,86 @@ unwrap(std::vector<wrapping_interval<T>>&& v, Comparator&& cmp) {
return ret;
}
template <typename T>
struct is_wrapping_interval : std::false_type {};
template <typename T>
struct is_wrapping_interval<wrapping_interval<T>> : std::true_type {};
template <typename T>
constexpr bool is_wrapping_interval_v = is_wrapping_interval<T>::value;
template <typename T>
struct is_nonwrapping_interval : std::false_type {};
template <typename T>
struct is_nonwrapping_interval<interval<T>> : std::true_type {};
template <typename T>
constexpr bool is_nonwrapping_interval_v = is_nonwrapping_interval<T>::value;
template <typename T>
struct rebind_wrapping_interval_to_nonwrapping_interval;
template <typename T>
struct rebind_wrapping_interval_to_nonwrapping_interval<wrapping_interval<T>> {
using type = interval<T>;
};
template <typename T>
using rebind_wrapping_interval_to_nonwrapping_interval_t = typename rebind_wrapping_interval_to_nonwrapping_interval<T>::type;
template <typename T>
struct rebind_container_wrapping_to_nonwrapping_interval;
template <typename T>
using rebind_container_wrapping_to_nonwrapping_interval_t = rebind_container_wrapping_to_nonwrapping_interval<T>::type;
template <typename T>
struct rebind_container_wrapping_to_nonwrapping_interval<std::vector<wrapping_interval<T>>> {
using type = std::vector<interval<T>>;
};
template <typename T>
struct rebind_container_wrapping_to_nonwrapping_interval<utils::chunked_vector<wrapping_interval<T>>> {
using type = utils::chunked_vector<interval<T>>;
};
template <typename T>
struct rebind_container_nonwrapping_to_wrapping_interval;
template <typename T>
using rebind_container_nonwrapping_to_wrapping_interval_t = rebind_container_nonwrapping_to_wrapping_interval<T>::type;
template <typename T>
struct rebind_container_nonwrapping_to_wrapping_interval<std::vector<interval<T>>> {
using type = std::vector<wrapping_interval<T>>;
};
template <typename T>
struct rebind_container_nonwrapping_to_wrapping_interval<utils::chunked_vector<interval<T>>> {
using type = utils::chunked_vector<wrapping_interval<T>>;
};
// unwraps a vector of wrapping ranges into a vector of nonwrapping ranges
// if the vector happens to be sorted by the left bound, it remains sorted
template <typename T, typename Comparator>
std::vector<interval<T>>
unwrap(const std::vector<wrapping_interval<T>>& v, Comparator&& cmp) {
std::vector<interval<T>> ret;
template <typename Container, typename Comparator>
requires std::ranges::range<Container>
&& is_wrapping_interval_v<typename Container::value_type>
&& requires (Container c, size_t s, Container::value_type i) {
{ c.reserve(s) };
{ c.emplace_back(std::move(i)) };
{ c.insert(c.begin(), std::move(i)) };
}
rebind_container_wrapping_to_nonwrapping_interval_t<Container>
unwrap(const Container& v, Comparator&& cmp) {
rebind_container_wrapping_to_nonwrapping_interval_t<Container> ret;
using interval_t = rebind_wrapping_interval_to_nonwrapping_interval_t<typename Container::value_type>;
ret.reserve(v.size() + 1);
for (auto&& wr : v) {
if (wr.is_wrap_around(cmp)) {
auto&& p = wr.unwrap();
ret.insert(ret.begin(), interval<T>(p.first));
ret.insert(ret.begin(), interval_t(p.first));
ret.emplace_back(p.second);
} else {
ret.emplace_back(wr);
@@ -56,43 +131,59 @@ unwrap(const std::vector<wrapping_interval<T>>& v, Comparator&& cmp) {
return ret;
}
template <typename T>
std::vector<wrapping_interval<T>>
wrap(const std::vector<interval<T>>& v) {
template <typename Container>
requires std::ranges::range<Container>
&& is_nonwrapping_interval_v<typename Container::value_type>
&& requires (Container c, size_t s, Container::value_type i) {
{ c.reserve(s) };
{ c.emplace_back(std::move(i)) };
{ c.push_back(std::move(i)) };
}
rebind_container_nonwrapping_to_wrapping_interval_t<Container>
wrap(const Container& v) {
using ret_type = rebind_container_nonwrapping_to_wrapping_interval_t<Container>;
// re-wrap (-inf,x) ... (y, +inf) into (y, x):
if (v.size() >= 2 && !v.front().start() && !v.back().end()) {
auto ret = std::vector<wrapping_interval<T>>();
auto ret = ret_type();
ret.reserve(v.size() - 1);
std::copy(v.begin() + 1, v.end() - 1, std::back_inserter(ret));
ret.emplace_back(v.back().start(), v.front().end());
return ret;
}
return v | std::ranges::to<std::vector<wrapping_interval<T>>>();
return v | std::ranges::to<ret_type>();
}
template <typename T>
std::vector<wrapping_interval<T>>
wrap(std::vector<interval<T>>&& v) {
template <typename Container>
requires std::ranges::range<Container>
&& is_nonwrapping_interval_v<typename Container::value_type>
&& requires (Container c, size_t s, Container::value_type i) {
{ c.reserve(s) };
{ c.emplace_back(std::move(i)) };
{ c.push_back(std::move(i)) };
}
rebind_container_nonwrapping_to_wrapping_interval_t<Container>
wrap(Container&& v) {
using ret_type = rebind_container_nonwrapping_to_wrapping_interval_t<Container>;
// re-wrap (-inf,x) ... (y, +inf) into (y, x):
if (v.size() >= 2 && !v.front().start() && !v.back().end()) {
auto ret = std::vector<wrapping_interval<T>>();
auto ret = ret_type();
ret.reserve(v.size() - 1);
std::move(v.begin() + 1, v.end() - 1, std::back_inserter(ret));
ret.emplace_back(std::move(v.back()).start(), std::move(v.front()).end());
return ret;
}
return std::ranges::owning_view(std::move(v)) | std::ranges::to<std::vector>();
return std::ranges::owning_view(std::move(v)) | std::ranges::to<ret_type>();
}
inline
dht::token_range_vector
unwrap(const std::vector<wrapping_interval<dht::token>>& v) {
unwrap(const utils::chunked_vector<wrapping_interval<dht::token>>& v) {
return unwrap(v, dht::token_comparator());
}
inline
dht::token_range_vector
unwrap(std::vector<wrapping_interval<dht::token>>&& v) {
unwrap(utils::chunked_vector<wrapping_interval<dht::token>>&& v) {
return unwrap(std::move(v), dht::token_comparator());
}

View File

@@ -23,7 +23,7 @@ public:
sstring keyspace;
dht::token_range_vector ranges;
// For compatibility with <= 1.5, we send wrapping ranges (though they will never wrap).
std::vector<wrapping_interval<token>> ranges_compat() const {
utils::chunked_vector<wrapping_interval<token>> ranges_compat() const {
return ::compat::wrap(ranges);
}
std::vector<sstring> column_families;
@@ -33,7 +33,7 @@ public:
, ranges(std::move(_ranges))
, column_families(std::move(_column_families)) {
}
stream_request(sstring _keyspace, std::vector<wrapping_interval<token>> _ranges, std::vector<sstring> _column_families)
stream_request(sstring _keyspace, utils::chunked_vector<wrapping_interval<token>> _ranges, std::vector<sstring> _column_families)
: stream_request(std::move(_keyspace), ::compat::unwrap(std::move(_ranges)), std::move(_column_families)) {
}
};

View File

@@ -13,6 +13,7 @@
#include "streaming/stream_fwd.hh"
#include "streaming/stream_task.hh"
#include "dht/i_partitioner_fwd.hh"
#include "dht/i_partitioner.hh"
#include <seastar/core/semaphore.hh>
namespace streaming {

View File

@@ -32,7 +32,7 @@ using deque = std::deque<int>;
BOOST_AUTO_TEST_CASE(test_random_walk) {
auto rand = std::default_random_engine();
auto op_gen = std::uniform_int_distribution<unsigned>(0, 12);
auto op_gen = std::uniform_int_distribution<unsigned>(0, 13);
auto nr_dist = std::geometric_distribution<size_t>(0.7);
deque d;
disk_array c;
@@ -127,6 +127,15 @@ BOOST_AUTO_TEST_CASE(test_random_walk) {
d.erase(d.begin() + start, d.begin() + end);
break;
}
case 13: {
auto start = std::uniform_int_distribution<size_t>(0, d.size())(rand);
auto nr = std::uniform_int_distribution<size_t>(0, 20)(rand);
auto n = rand();
auto data = std::views::iota(n, n + nr);
c.insert(c.begin() + start, data.begin(), data.end());
d.insert(d.begin() + start, data.begin(), data.end());
break;
}
default:
abort();
}
@@ -552,3 +561,26 @@ BOOST_AUTO_TEST_CASE(test_erase_single) {
BOOST_REQUIRE_EQUAL(vec[0], 2);
BOOST_REQUIRE_EQUAL(vec[1], 8);
}
BOOST_AUTO_TEST_CASE(test_insert_range) {
auto vec = utils::chunked_vector<int, 8>();
vec.push_back(1);
vec.push_back(2);
vec.push_back(3);
vec.push_back(4);
auto data = std::views::iota(8, 12);
vec.insert(vec.begin() + 2, data.begin(), data.end());
BOOST_REQUIRE(std::ranges::equal(vec, std::array{1, 2, 8, 9, 10, 11, 3, 4}));
}
BOOST_AUTO_TEST_CASE(test_swap) {
auto v1 = utils::chunked_vector<int, 8>();
auto v2 = utils::chunked_vector<int, 8>();
v1.push_back(1);
v2.push_back(2);
v2.push_back(4);
v1.swap(v2);
BOOST_REQUIRE(std::ranges::equal(v1, std::array{2, 4}));
BOOST_REQUIRE(std::ranges::equal(v2, std::array{1}));
}

View File

@@ -324,12 +324,15 @@ public:
public:
iterator insert(const_iterator pos, const T& x);
iterator insert(const_iterator pos, T&& x);
template <typename Iterator>
iterator insert(const_iterator post, Iterator first, Iterator last);
template <typename... Args>
iterator emplace(const_iterator pos, Args&&... args);
iterator erase(iterator pos);
iterator erase(const_iterator pos);
iterator erase(iterator first, iterator last);
iterator erase(const_iterator first, const_iterator last);
void swap(chunked_vector& x) noexcept;
};
template<typename T, size_t max_contiguous_allocation>
@@ -622,6 +625,18 @@ chunked_vector<T, max_contiguous_allocation>::insert(const_iterator pos, T&& x)
return begin() + insert_idx;
}
template <typename T, size_t max_contiguous_allocation>
template <typename Iterator>
typename chunked_vector<T, max_contiguous_allocation>::iterator
chunked_vector<T, max_contiguous_allocation>::insert(const_iterator pos, Iterator first, Iterator last) {
auto insert_idx = pos - begin();
auto n_insert = std::distance(first, last);
reserve(size() + n_insert);
std::copy(first, last, std::back_inserter(*this));
std::rotate(begin() + insert_idx, end() - n_insert, end());
return begin() + insert_idx;
}
template <typename T, size_t max_contiguous_allocation>
template <typename... Args>
typename chunked_vector<T, max_contiguous_allocation>::iterator
@@ -660,6 +675,15 @@ chunked_vector<T, max_contiguous_allocation>::erase(iterator pos) {
return erase(const_iterator(pos));
}
template <typename T, size_t max_contiguous_allocation>
void
chunked_vector<T, max_contiguous_allocation>::swap(chunked_vector& x) noexcept {
using std::swap;
swap(_chunks, x._chunks);
swap(_size, x._size);
swap(_capacity, x._capacity);
}
template <typename T, size_t max_contiguous_allocation>
std::ostream& operator<<(std::ostream& os, const chunked_vector<T, max_contiguous_allocation>& v) {
fmt::print(os, "{}", v);