mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-19 16:15:07 +00:00
storage_service: Support vnodes->tablets migrations w/ arbitrary tokens
The vnodes-to-tablets migration creates tablet maps that mirror the
vnode layout: one tablet per vnode, preserving token boundaries and
replica placement. However, due to tablet restrictions, the migration
requires vnode tokens to be a power of two and uniformly distributed
across the token ring.
In practice, this restriction is too limiting. Real clusters use
randomly generated tokens and a node's token assignment is immutable.
To solve this problem, prior work (01fb97ee78) has been done to relax
the tablet constraints by allowing arbitrary tablet boundaries, removing
the requirement for power-of-two sizing and uniform distribution.
This patch leverages the relaxed tablet constraints to enable tablet map
creation from arbitrary vnode tokens:
* Removes all token-related constraints.
* Handles wrap-around vnodes. If a vnode wraps (i.e., the highest vnode
token is not `dht::token::last()`), it is split into two tablets:
- (last_vnode_token, dht::token::last()]
- [dht::token::first(), first_vnode_token]
The migration ops guide has been updated to remove the power-of-two
constraint.
Signed-off-by: Nikos Dragazis <nikolaos.dragazis@scylladb.com>
This commit is contained in:
@@ -53,9 +53,6 @@ Limitations
|
||||
|
||||
The current migration procedure has the following limitations:
|
||||
|
||||
* The total number of **vnode tokens** in the cluster must be a **power of two**
|
||||
and the tokens must be **evenly spaced** across the token ring. This is
|
||||
verified automatically when starting the migration.
|
||||
* **No schema changes** during the migration. Do not create, alter, or drop
|
||||
tables in the migrating keyspace until the migration is finished.
|
||||
* **No topology changes** during the migration. Do not add, remove, decommission,
|
||||
|
||||
@@ -3912,13 +3912,6 @@ future<> storage_service::prepare_for_tablets_migration(const sstring& ks_name)
|
||||
throw std::runtime_error(fmt::format("Keyspace {} has no tables to migrate. To use tablets, recreate the keyspace with tablets enabled", ks_name));
|
||||
}
|
||||
|
||||
const auto& tm = get_token_metadata();
|
||||
const auto& sorted_tokens = tm.sorted_tokens();
|
||||
size_t tablet_count = sorted_tokens.size();
|
||||
if (!std::has_single_bit(tablet_count)) {
|
||||
throw std::runtime_error(fmt::format("Table migration requires vnodes to be a power of two. Current value: {}", tablet_count));
|
||||
}
|
||||
|
||||
auto topology = co_await get_system_keyspace().load_topology_state({});
|
||||
for (const auto& [server_id, replica_state]: topology.normal_nodes) {
|
||||
if (replica_state.storage_mode) {
|
||||
@@ -3948,16 +3941,33 @@ future<> storage_service::prepare_for_tablets_migration(const sstring& ks_name)
|
||||
|
||||
// Build a tablet_map from vnode token boundaries.
|
||||
//
|
||||
// The map contains one tablet per vnode. The replicas of each tablet are
|
||||
// the same as the replicas of the corresponding vnode. Shards are assigned
|
||||
// in round-robin fashion per node so that tablets are evenly distributed
|
||||
// within each node.
|
||||
// The map contains one tablet per vnode, plus one extra tablet for the
|
||||
// wrap-around range (last_vnode_token, MAX_TOKEN] when
|
||||
// last_vnode_token != MAX_TOKEN. Each tablet has the same replicas as
|
||||
// the corresponding vnode. Shards are assigned in round-robin fashion
|
||||
// per node so that tablets are evenly distributed within each node.
|
||||
// (FIXME: we should consider tablet sizes as well)
|
||||
//
|
||||
// This map will serve as a template for per-table tablet map mutations.
|
||||
// Each table in the keyspace receives its own tablet map, but all maps
|
||||
// have identical tablet boundaries and replica placement.
|
||||
|
||||
const auto& tm = get_token_metadata();
|
||||
const auto& sorted_tokens = tm.sorted_tokens();
|
||||
|
||||
utils::chunked_vector<dht::raw_token> last_tokens;
|
||||
size_t tablet_count = sorted_tokens.size();
|
||||
last_tokens.reserve(tablet_count + 1); // +1 for possible wrapping tablet
|
||||
for (const auto& t : sorted_tokens) {
|
||||
last_tokens.emplace_back(t);
|
||||
}
|
||||
// Add an extra tablet for the wrapping range if needed.
|
||||
auto needs_wrapping_tablet = sorted_tokens.back() != dht::token::last();
|
||||
if (needs_wrapping_tablet) {
|
||||
last_tokens.emplace_back(dht::token::last());
|
||||
tablet_count++;
|
||||
}
|
||||
|
||||
slogger.info("Building tablet maps for tables in keyspace {} with {} tablet(s)", ks_name, tablet_count);
|
||||
|
||||
// Stateful lambdas for round-robin shard assignment per node.
|
||||
@@ -3971,18 +3981,16 @@ future<> storage_service::prepare_for_tablets_migration(const sstring& ks_name)
|
||||
|
||||
auto erm = ks.get_static_effective_replication_map();
|
||||
|
||||
locator::tablet_map tmap(tablet_count);
|
||||
locator::tablet_map tmap(std::move(last_tokens));
|
||||
for (size_t i = 0; i < tablet_count; ++i) {
|
||||
auto tablet = locator::tablet_id(i);
|
||||
auto vnode_replica_hosts = erm->get_natural_replicas(sorted_tokens[i], true);
|
||||
auto vnode_token = needs_wrapping_tablet && i == tablet_count - 1 ? tmap.get_last_token(locator::tablet_id{0}) : tmap.get_last_token(tablet);
|
||||
auto vnode_replica_hosts = erm->get_natural_replicas(vnode_token, true);
|
||||
locator::tablet_replica_set tablet_replicas;
|
||||
for (auto host : vnode_replica_hosts) {
|
||||
tablet_replicas.push_back(locator::tablet_replica{host, next_shard_for[host]()});
|
||||
}
|
||||
tmap.set_tablet(tablet, locator::tablet_info(std::move(tablet_replicas)));
|
||||
if (tmap.get_last_token(tablet) != sorted_tokens[i]) {
|
||||
throw std::runtime_error(fmt::format("vnode token {} is not aligned; cannot be used as tablet boundary (expected: {})", sorted_tokens[i], tmap.get_last_token(tablet)));
|
||||
}
|
||||
}
|
||||
|
||||
// Build tablet map mutations for all tables and persist them to group0 (system.tablets)
|
||||
|
||||
Reference in New Issue
Block a user