mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-20 00:20:47 +00:00
157 lines
5.6 KiB
C++
157 lines
5.6 KiB
C++
/*
|
|
* Copyright (C) 2026-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
|
|
*/
|
|
|
|
#include <seastar/core/on_internal_error.hh>
|
|
|
|
#include "dht/fixed_shard.hh"
|
|
#include "dht/token.hh"
|
|
#include "schema/schema.hh"
|
|
#include "sstables/key.hh"
|
|
#include "utils/class_registrator.hh"
|
|
#include "keys/keys.hh"
|
|
#include "keys/compound_compat.hh"
|
|
#include "utils/murmur_hash.hh"
|
|
#include "utils/log.hh"
|
|
|
|
namespace dht {
|
|
|
|
static logging::logger fslog("fixed_shard");
|
|
|
|
const sstring fixed_shard_partitioner::classname = "com.scylladb.dht.FixedShardPartitioner";
|
|
|
|
const sstring fixed_shard_partitioner::name() const {
|
|
return classname;
|
|
}
|
|
|
|
dht::token fixed_shard_partitioner::token_for_shard(uint16_t shard, uint64_t hash_bits) {
|
|
int64_t token_value = (static_cast<int64_t>(shard) << shard_shift) | static_cast<int64_t>(hash_bits & hash_mask);
|
|
return dht::token(token_value);
|
|
}
|
|
|
|
unsigned fixed_shard_partitioner::shard_of(dht::token token) {
|
|
uint64_t token_bits = static_cast<uint64_t>(token.raw());
|
|
return static_cast<unsigned>(token_bits >> shard_shift);
|
|
}
|
|
|
|
// Called with the bytes of the first partition key component, representing the shard.
|
|
static uint16_t compute_shard(managed_bytes_view mb) {
|
|
if (mb.size() != sizeof(uint16_t)) {
|
|
on_internal_error(fslog, format("Invalid shard value size: expected {}, got {}", sizeof(uint16_t), mb.size()));
|
|
}
|
|
|
|
// No need to linearize, 2 bytes are represented as a single fragment
|
|
auto shard_bytes = mb.current_fragment();
|
|
uint16_t shard_value = net::ntoh(read_unaligned<uint16_t>(shard_bytes.begin()));
|
|
|
|
if (shard_value > fixed_shard_partitioner::max_shard) {
|
|
on_internal_error(fslog, format("Shard value {} exceeds maximum allowed shard {}", shard_value, fixed_shard_partitioner::max_shard));
|
|
}
|
|
|
|
return shard_value;
|
|
}
|
|
|
|
dht::token fixed_shard_partitioner::get_token(const schema& s, partition_key_view key) const {
|
|
uint16_t shard_value = compute_shard(*key.begin());
|
|
std::array<uint64_t, 2> hash;
|
|
auto&& legacy = key.legacy_form(s);
|
|
utils::murmur_hash::hash3_x64_128(legacy.begin(), legacy.size(), 0, hash);
|
|
auto token = fixed_shard_partitioner::token_for_shard(shard_value, hash[0]);
|
|
fslog.trace("get_token: shard={}, token={}", shard_value, token);
|
|
return token;
|
|
}
|
|
|
|
dht::token fixed_shard_partitioner::get_token(const sstables::key_view& key) const {
|
|
return key.with_linearized([&](bytes_view v) {
|
|
auto comp = composite_view(v, true);
|
|
uint16_t shard_value = compute_shard(comp.begin()->first);
|
|
std::array<uint64_t, 2> hash;
|
|
utils::murmur_hash::hash3_x64_128(v, 0, hash);
|
|
auto token = fixed_shard_partitioner::token_for_shard(shard_value, hash[0]);
|
|
fslog.trace("get_token: shard={}, token={}", shard_value, token);
|
|
return token;
|
|
});
|
|
}
|
|
|
|
using registry = class_registrator<dht::i_partitioner, fixed_shard_partitioner>;
|
|
static registry registrator(fixed_shard_partitioner::classname);
|
|
static registry registrator_short_name("FixedShardPartitioner");
|
|
|
|
fixed_shard_sharder& fixed_shard_sharder::instance() {
|
|
static thread_local fixed_shard_sharder sharder;
|
|
return sharder;
|
|
}
|
|
|
|
fixed_shard_sharder::fixed_shard_sharder()
|
|
: static_sharder(smp::count, 0)
|
|
{
|
|
}
|
|
|
|
unsigned fixed_shard_sharder::shard_of(const dht::token& t) const {
|
|
if (t.is_minimum()) {
|
|
return dht::token::shard_of_minimum_token();
|
|
}
|
|
if (t.is_maximum()) {
|
|
return shard_count() - 1;
|
|
}
|
|
auto shard = fixed_shard_partitioner::shard_of(t);
|
|
fslog.trace("shard_of({}) = {}", t, std::min(shard, shard_count() - 1));
|
|
return std::min(shard, shard_count() - 1);
|
|
}
|
|
|
|
std::optional<unsigned> fixed_shard_sharder::try_get_shard_for_reads(const dht::token& t) const {
|
|
return shard_of(t);
|
|
}
|
|
|
|
dht::shard_replica_set fixed_shard_sharder::shard_for_writes(const dht::token& t, std::optional<dht::write_replica_set_selector>) const {
|
|
// We don't support migrations of the data in raft tables for strongly consistent tables.
|
|
// When migrating a strongly consistent tablet, we'll need to move its metadata
|
|
// explicitly to the new shard along with its raft group data.
|
|
auto shard = try_get_shard_for_reads(t);
|
|
if (!shard) {
|
|
return {};
|
|
}
|
|
return { *shard };
|
|
}
|
|
|
|
dht::token fixed_shard_sharder::token_for_next_shard(const dht::token& t, shard_id shard, unsigned spans) const {
|
|
return token_for_next_shard_for_reads(t, shard, spans);
|
|
}
|
|
|
|
dht::token fixed_shard_sharder::token_for_next_shard_for_reads(const dht::token& t, shard_id shard, unsigned spans) const {
|
|
// With the fixed_shard_partitioner, there's only one token range per shard, so spans > 1 always overflows.
|
|
if (spans > 1 || shard >= shard_count() || t.is_maximum()) {
|
|
return dht::maximum_token();
|
|
}
|
|
|
|
int64_t token_value = t.is_minimum() ? 0 : t.raw();
|
|
int64_t start = static_cast<int64_t>(shard) << fixed_shard_partitioner::shard_shift;
|
|
if (token_value < start) {
|
|
return dht::token(start);
|
|
}
|
|
return dht::maximum_token();
|
|
}
|
|
|
|
std::optional<dht::shard_and_token> fixed_shard_sharder::next_shard(const dht::token& t) const {
|
|
auto shard = try_get_shard_for_reads(t);
|
|
if (!shard || *shard + 1 >= shard_count()) {
|
|
return std::nullopt;
|
|
}
|
|
auto next_shard = *shard + 1;
|
|
auto next_token = token_for_next_shard_for_reads(t, next_shard);
|
|
if (next_token.is_maximum()) {
|
|
return std::nullopt;
|
|
}
|
|
return dht::shard_and_token{next_shard, next_token};
|
|
}
|
|
|
|
std::optional<dht::shard_and_token> fixed_shard_sharder::next_shard_for_reads(const dht::token& t) const {
|
|
return next_shard(t);
|
|
}
|
|
|
|
} // namespace dht
|