Files
scylladb/dht/sharder.hh
Avi Kivity 0ae22a09d4 LICENSE: Update to version 1.1
Updated terms of non-commercial use (must be a never-customer).
2026-04-12 19:46:33 +03:00

150 lines
7.0 KiB
C++

/*
* Copyright (C) 2015-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
*/
#pragma once
#include "dht/ring_position.hh"
#include "dht/token-sharding.hh"
#include "utils/interval.hh"
#include "utils/chunked_vector.hh"
#include <vector>
namespace dht {
// Utilities for sharding ring partition_range:s
// A ring_position range's data is divided into sub-ranges, where each sub-range's data
// is owned by a single shard. Note that multiple non-overlapping sub-ranges may map to a
// single shard, and some shards may not receive any sub-range.
//
// This module provides utilities for determining the sub-ranges to shard mapping. The utilities
// generate optimal mappings: each range that you get is the largest possible, so you
// get the minimum number of ranges possible. You can get many ranges, so operate on them
// one (or a few) at a time, rather than accumulating them.
// A mapping between a partition_range and a shard. All positions within `ring_range` are
// owned by `shard`.
//
// The classes that return ring_position_range_and_shard make `ring_range` as large as
// possible (maximizing the number of tokens), so the total number of such ranges is minimized.
// Successive ranges therefore always have a different `shard` than the previous return.
// (classes that return ring_position_range_and_shard_and_element can have the same `shard`
// in successive returns, if `element` is different).
struct ring_position_range_and_shard {
dht::partition_range ring_range;
unsigned shard;
};
// Incrementally divides a `partition_range` into sub-ranges wholly owned by a single shard.
// During tablet migration uses a view on shard routing for reads.
class ring_position_range_sharder {
const sharder& _sharder;
dht::partition_range _range;
bool _done = false;
public:
// Initializes the ring_position_range_sharder with a given range to subdivide.
ring_position_range_sharder(const sharder& sharder, interval<ring_position> rrp)
: _sharder(sharder), _range(std::move(rrp)) {}
// Fetches the next range-shard mapping. When the input range is exhausted, std::nullopt is
// returned. The returned ranges are contiguous and non-overlapping, and together span the
// entire input range.
std::optional<ring_position_range_and_shard> next(const schema& s);
};
// A mapping between a partition_range and a shard (like ring_position_range_and_shard) extended
// by having a reference to input range index. See ring_position_range_vector_sharder for use.
//
// The classes that return ring_position_range_and_shard_and_element make `ring_range` as large as
// possible (maximizing the number of tokens), so the total number of such ranges is minimized.
// Successive ranges therefore always have a different `shard` than the previous return.
// (classes that return ring_position_range_and_shard_and_element can have the same `shard`
// in successive returns, if `element` is different).
struct ring_position_range_and_shard_and_element : ring_position_range_and_shard {
ring_position_range_and_shard_and_element(ring_position_range_and_shard&& rpras, unsigned element)
: ring_position_range_and_shard(std::move(rpras)), element(element) {
}
unsigned element;
};
// Incrementally divides several non-overlapping `partition_range`:s into sub-ranges wholly owned by
// a single shard.
//
// Similar to ring_position_range_sharder, but instead of stopping when the input range is exhausted,
// moves on to the next input range (input ranges are supplied in a vector).
//
// This has two use cases:
// 1. vnodes. A vnode cannot be described by a single range, since
// one vnode wraps around from the largest token back to the smallest token. Hence it must be
// described as a vector of two ranges, (largest_token, +inf) and (-inf, smallest_token].
// 2. sstable shard mappings. An sstable has metadata describing which ranges it owns, and this is
// used to see what shards these ranges map to (and therefore to see if the sstable is shared or
// not, and which shards share it).
//
// During migration uses a view on shard routing for reads.
class ring_position_range_vector_sharder {
using vec_type = utils::chunked_vector<dht::partition_range>;
vec_type _ranges;
const sharder& _sharder;
vec_type::iterator _current_range;
std::optional<ring_position_range_sharder> _current_sharder;
private:
void next_range() {
if (_current_range != _ranges.end()) {
_current_sharder.emplace(_sharder, std::move(*_current_range++));
}
}
public:
// Initializes the `ring_position_range_vector_sharder` with the ranges to be processesd.
// Input ranges should be non-overlapping (although nothing bad will happen if they do
// overlap).
ring_position_range_vector_sharder(const sharder& sharder, utils::chunked_vector<dht::partition_range> ranges);
// Fetches the next range-shard mapping. When the input range is exhausted, std::nullopt is
// returned. Within an input range, results are contiguous and non-overlapping (but since input
// ranges usually are discontiguous, overall the results are not contiguous). Together, the results
// span the input ranges.
//
// The result is augmented with an `element` field which indicates the index from the input vector
// that the result belongs to.
//
// Results are returned sorted by index within the vector first, then within each vector item
std::optional<ring_position_range_and_shard_and_element> next(const schema& s);
};
// Incrementally divides a `partition_range` into sub-ranges wholly owned by a single shard.
// Unlike ring_position_range_sharder, it only returns result for a shard number provided by the caller.
// During topology changes, reflects shard assignment for reads.
class selective_token_range_sharder {
const sharder& _sharder;
dht::token_range _range;
shard_id _shard;
bool _done = false;
shard_id _next_shard;
dht::token _start_token;
std::optional<interval_bound<dht::token>> _start_boundary;
public:
// Initializes the selective_token_range_sharder with a token range and shard_id of interest.
selective_token_range_sharder(const sharder& sharder, dht::token_range range, shard_id shard)
: _sharder(sharder)
, _range(std::move(range))
, _shard(shard)
, _next_shard(_shard + 1 == _sharder.shard_count() ? 0 : _shard + 1)
, _start_token(_range.start() ? _range.start()->value() : minimum_token())
, _start_boundary(_sharder.shard_for_reads(_start_token) == shard ?
_range.start_copy() : interval_bound<dht::token>(_sharder.token_for_next_shard_for_reads(_start_token, shard))) {
}
// Returns the next token_range that is both wholly contained within the input range and also
// wholly owned by the input shard_id. When the input range is exhausted, std::nullopt is returned.
// Note if the range does not intersect the shard at all, std::nullopt will be returned immediately.
std::optional<dht::token_range> next();
};
} // dht