Files
scylladb/dht/i_partitioner.cc

379 lines
13 KiB
C++

/*
* Copyright (C) 2015 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#include "i_partitioner.hh"
#include "sharder.hh"
#include <seastar/core/seastar.hh>
#include "dht/token-sharding.hh"
#include "utils/class_registrator.hh"
#include "types.hh"
#include "utils/murmur_hash.hh"
#include "utils/div_ceil.hh"
#include <deque>
#include <boost/range/adaptor/map.hpp>
#include <boost/range/irange.hpp>
#include <boost/range/adaptor/transformed.hpp>
#include "sstables/key.hh"
#include <seastar/core/thread.hh>
namespace dht {
sharder::sharder(unsigned shard_count, unsigned sharding_ignore_msb_bits)
: _shard_count(shard_count)
// if one shard, ignore sharding_ignore_msb_bits as they will just cause needless
// range breaks
, _sharding_ignore_msb_bits(shard_count > 1 ? sharding_ignore_msb_bits : 0)
, _shard_start(init_zero_based_shard_start(_shard_count, _sharding_ignore_msb_bits))
{}
unsigned
sharder::shard_of(const token& t) const {
return dht::shard_of(_shard_count, _sharding_ignore_msb_bits, t);
}
token
sharder::token_for_next_shard(const token& t, shard_id shard, unsigned spans) const {
return dht::token_for_next_shard(_shard_start, _shard_count, _sharding_ignore_msb_bits, t, shard, spans);
}
std::ostream& operator<<(std::ostream& out, const decorated_key& dk) {
return out << "{key: " << dk._key << ", token:" << dk._token << "}";
}
std::ostream& operator<<(std::ostream& out, partition_ranges_view v) {
out << "{";
if (v.empty()) {
out << " }";
return out;
}
auto it = v.begin();
out << *it;
++it;
for (;it != v.end(); ++it) {
out << ", " << *it;
}
out << "}";
return out;
}
std::unique_ptr<dht::i_partitioner> make_partitioner(sstring partitioner_name) {
try {
return create_object<i_partitioner>(partitioner_name);
} catch (std::exception& e) {
auto supported_partitioners = ::join(", ", class_registry<i_partitioner>::classes() |
boost::adaptors::map_keys);
throw std::runtime_error(format("Partitioner {} is not supported, supported partitioners = {{ {} }} : {}",
partitioner_name, supported_partitioners, e.what()));
}
}
bool
decorated_key::equal(const schema& s, const decorated_key& other) const {
if (_token == other._token) {
return _key.legacy_equal(s, other._key);
}
return false;
}
std::strong_ordering
decorated_key::tri_compare(const schema& s, const decorated_key& other) const {
auto r = dht::tri_compare(_token, other._token);
if (r != 0) {
return r;
} else {
return _key.legacy_tri_compare(s, other._key) <=> 0;
}
}
std::strong_ordering
decorated_key::tri_compare(const schema& s, const ring_position& other) const {
auto r = dht::tri_compare(_token, other.token());
if (r != 0) {
return r;
} else if (other.has_key()) {
return _key.legacy_tri_compare(s, *other.key()) <=> 0;
}
return 0 <=> other.relation_to_keys();
}
bool
decorated_key::less_compare(const schema& s, const ring_position& other) const {
return tri_compare(s, other) < 0;
}
bool
decorated_key::less_compare(const schema& s, const decorated_key& other) const {
return tri_compare(s, other) < 0;
}
decorated_key::less_comparator::less_comparator(schema_ptr s)
: s(std::move(s))
{ }
bool
decorated_key::less_comparator::operator()(const decorated_key& lhs, const decorated_key& rhs) const {
return lhs.less_compare(*s, rhs);
}
bool
decorated_key::less_comparator::operator()(const ring_position& lhs, const decorated_key& rhs) const {
return rhs.tri_compare(*s, lhs) > 0;
}
bool
decorated_key::less_comparator::operator()(const decorated_key& lhs, const ring_position& rhs) const {
return lhs.tri_compare(*s, rhs) < 0;
}
std::ostream& operator<<(std::ostream& out, const ring_position_ext& pos) {
return out << (ring_position_view)pos;
}
std::ostream& operator<<(std::ostream& out, const ring_position& pos) {
out << "{" << pos.token();
if (pos.has_key()) {
out << ", " << *pos.key();
} else {
out << ", " << ((pos.relation_to_keys() < 0) ? "start" : "end");
}
return out << "}";
}
std::ostream& operator<<(std::ostream& out, ring_position_view pos) {
out << "{" << *pos._token;
if (pos._key) {
out << ", " << *pos._key;
}
out << ", w=" << static_cast<int>(pos._weight);
return out << "}";
}
std::ostream& operator<<(std::ostream& out, const i_partitioner& p) {
out << "{partitioner name = " << p.name();
return out << "}";
}
unsigned shard_of(const schema& s, const token& t) {
return s.get_sharder().shard_of(t);
}
std::optional<dht::token_range>
selective_token_range_sharder::next() {
if (_done) {
return {};
}
while (_range.overlaps(dht::token_range(_start_boundary, {}), dht::token_comparator())
&& !(_start_boundary && _start_boundary->value() == maximum_token())) {
auto end_token = _sharder.token_for_next_shard(_start_token, _next_shard);
auto candidate = dht::token_range(std::move(_start_boundary), range_bound<dht::token>(end_token, false));
auto intersection = _range.intersection(std::move(candidate), dht::token_comparator());
_start_token = _sharder.token_for_next_shard(end_token, _shard);
_start_boundary = range_bound<dht::token>(_start_token);
if (intersection) {
return *intersection;
}
}
_done = true;
return {};
}
std::optional<ring_position_range_and_shard>
ring_position_range_sharder::next(const schema& s) {
if (_done) {
return {};
}
auto shard = _range.start() ? _sharder.shard_of(_range.start()->value().token()) : token::shard_of_minimum_token();
auto next_shard = shard + 1 < _sharder.shard_count() ? shard + 1 : 0;
auto shard_boundary_token = _sharder.token_for_next_shard(_range.start() ? _range.start()->value().token() : minimum_token(), next_shard);
auto shard_boundary = ring_position::starting_at(shard_boundary_token);
if ((!_range.end() || shard_boundary.less_compare(s, _range.end()->value()))
&& shard_boundary_token != maximum_token()) {
// split the range at end_of_shard
auto start = _range.start();
auto end = range_bound<ring_position>(shard_boundary, false);
_range = dht::partition_range(
range_bound<ring_position>(std::move(shard_boundary), true),
std::move(_range.end()));
return ring_position_range_and_shard{dht::partition_range(std::move(start), std::move(end)), shard};
}
_done = true;
return ring_position_range_and_shard{std::move(_range), shard};
}
ring_position_range_vector_sharder::ring_position_range_vector_sharder(const sharder& sharder, dht::partition_range_vector ranges)
: _ranges(std::move(ranges))
, _sharder(sharder)
, _current_range(_ranges.begin()) {
next_range();
}
std::optional<ring_position_range_and_shard_and_element>
ring_position_range_vector_sharder::next(const schema& s) {
if (!_current_sharder) {
return std::nullopt;
}
auto range_and_shard = _current_sharder->next(s);
while (!range_and_shard && _current_range != _ranges.end()) {
next_range();
range_and_shard = _current_sharder->next(s);
}
auto ret = std::optional<ring_position_range_and_shard_and_element>();
if (range_and_shard) {
ret.emplace(std::move(*range_and_shard), _current_range - _ranges.begin() - 1);
}
return ret;
}
future<utils::chunked_vector<partition_range>>
split_range_to_single_shard(const schema& s, const partition_range& pr, shard_id shard) {
const sharder& sharder = s.get_sharder();
auto next_shard = shard + 1 == sharder.shard_count() ? 0 : shard + 1;
auto start_token = pr.start() ? pr.start()->value().token() : minimum_token();
auto start_shard = sharder.shard_of(start_token);
auto start_boundary = start_shard == shard ? pr.start() : range_bound<ring_position>(ring_position::starting_at(sharder.token_for_next_shard(start_token, shard)));
return repeat_until_value([&sharder,
&pr,
cmp = ring_position_comparator(s),
ret = utils::chunked_vector<partition_range>(),
start_token,
start_boundary,
shard,
next_shard] () mutable {
if (pr.overlaps(partition_range(start_boundary, {}), cmp)
&& !(start_boundary && start_boundary->value().token() == maximum_token())) {
auto end_token = sharder.token_for_next_shard(start_token, next_shard);
auto candidate = partition_range(std::move(start_boundary), range_bound<ring_position>(ring_position::starting_at(end_token), false));
auto intersection = pr.intersection(std::move(candidate), cmp);
if (intersection) {
ret.push_back(std::move(*intersection));
}
start_token = sharder.token_for_next_shard(end_token, shard);
start_boundary = range_bound<ring_position>(ring_position::starting_at(start_token));
return make_ready_future<std::optional<utils::chunked_vector<partition_range>>>();
}
return make_ready_future<std::optional<utils::chunked_vector<partition_range>>>(std::move(ret));
});
}
std::strong_ordering ring_position::tri_compare(const schema& s, const ring_position& o) const {
return ring_position_comparator(s)(*this, o);
}
std::strong_ordering token_comparator::operator()(const token& t1, const token& t2) const {
return tri_compare(t1, t2);
}
bool ring_position::equal(const schema& s, const ring_position& other) const {
return tri_compare(s, other) == 0;
}
bool ring_position::less_compare(const schema& s, const ring_position& other) const {
return tri_compare(s, other) < 0;
}
std::strong_ordering ring_position_tri_compare(const schema& s, ring_position_view lh, ring_position_view rh) {
auto token_cmp = tri_compare(*lh._token, *rh._token);
if (token_cmp != 0) {
return token_cmp;
}
if (lh._key && rh._key) {
auto c = lh._key->legacy_tri_compare(s, *rh._key);
if (c != 0) {
return c;
}
return (lh._weight - rh._weight) <=> 0;
}
if (!lh._key && !rh._key) {
return lh._weight - rh._weight <=> 0;
} else if (!lh._key) {
return lh._weight > 0 ? std::strong_ordering::greater : std::strong_ordering::less;
} else {
return rh._weight > 0 ? std::strong_ordering::less : std::strong_ordering::greater;
}
}
std::strong_ordering ring_position_comparator_for_sstables::operator()(ring_position_view lh, sstables::decorated_key_view rh) const {
auto token_cmp = tri_compare(*lh._token, rh.token());
if (token_cmp != 0) {
return token_cmp;
}
if (lh._key) {
auto rel = rh.key().tri_compare(s, *lh._key);
if (rel) {
return 0 <=> rel;
}
}
return lh._weight <=> 0;
}
std::strong_ordering ring_position_comparator_for_sstables::operator()(sstables::decorated_key_view a, ring_position_view b) const {
return 0 <=> (*this)(b, a);
}
dht::partition_range
to_partition_range(dht::token_range r) {
using bound_opt = std::optional<dht::partition_range::bound>;
auto start = r.start()
? bound_opt(dht::ring_position(r.start()->value(),
r.start()->is_inclusive()
? dht::ring_position::token_bound::start
: dht::ring_position::token_bound::end))
: bound_opt();
auto end = r.end()
? bound_opt(dht::ring_position(r.end()->value(),
r.end()->is_inclusive()
? dht::ring_position::token_bound::end
: dht::ring_position::token_bound::start))
: bound_opt();
return { std::move(start), std::move(end) };
}
dht::partition_range_vector to_partition_ranges(const dht::token_range_vector& ranges, utils::can_yield can_yield) {
dht::partition_range_vector prs;
prs.reserve(ranges.size());
for (auto& range : ranges) {
prs.push_back(dht::to_partition_range(range));
utils::maybe_yield(can_yield);
}
return prs;
}
std::map<unsigned, dht::partition_range_vector>
split_range_to_shards(dht::partition_range pr, const schema& s) {
std::map<unsigned, dht::partition_range_vector> ret;
auto sharder = dht::ring_position_range_sharder(s.get_sharder(), std::move(pr));
auto rprs = sharder.next(s);
while (rprs) {
ret[rprs->shard].emplace_back(rprs->ring_range);
rprs = sharder.next(s);
}
return ret;
}
}