From 098732ff7606aa6aaa6413b76839cc9136af4a56 Mon Sep 17 00:00:00 2001 From: Nikos Dragazis Date: Fri, 27 Mar 2026 14:45:33 +0200 Subject: [PATCH] storage_service: Support vnodes->tablets migrations w/ arbitrary tokens The vnodes-to-tablets migration creates tablet maps that mirror the vnode layout: one tablet per vnode, preserving token boundaries and replica placement. However, due to tablet restrictions, the migration requires vnode tokens to be a power of two and uniformly distributed across the token ring. In practice, this restriction is too limiting. Real clusters use randomly generated tokens and a node's token assignment is immutable. To solve this problem, prior work (01fb97ee78) has been done to relax the tablet constraints by allowing arbitrary tablet boundaries, removing the requirement for power-of-two sizing and uniform distribution. This patch leverages the relaxed tablet constraints to enable tablet map creation from arbitrary vnode tokens: * Removes all token-related constraints. * Handles wrap-around vnodes. If a vnode wraps (i.e., the highest vnode token is not `dht::token::last()`), it is split into two tablets: - (last_vnode_token, dht::token::last()] - [dht::token::first(), first_vnode_token] The migration ops guide has been updated to remove the power-of-two constraint. Signed-off-by: Nikos Dragazis --- .../migrate-vnodes-to-tablets.rst | 3 -- service/storage_service.cc | 40 +++++++++++-------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/docs/operating-scylla/procedures/config-change/migrate-vnodes-to-tablets.rst b/docs/operating-scylla/procedures/config-change/migrate-vnodes-to-tablets.rst index a9226e28a1..b549768bf6 100644 --- a/docs/operating-scylla/procedures/config-change/migrate-vnodes-to-tablets.rst +++ b/docs/operating-scylla/procedures/config-change/migrate-vnodes-to-tablets.rst @@ -53,9 +53,6 @@ Limitations The current migration procedure has the following limitations: -* The total number of **vnode tokens** in the cluster must be a **power of two** - and the tokens must be **evenly spaced** across the token ring. This is - verified automatically when starting the migration. * **No schema changes** during the migration. Do not create, alter, or drop tables in the migrating keyspace until the migration is finished. * **No topology changes** during the migration. Do not add, remove, decommission, diff --git a/service/storage_service.cc b/service/storage_service.cc index 49490b6464..bc13b01773 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -3912,13 +3912,6 @@ future<> storage_service::prepare_for_tablets_migration(const sstring& ks_name) throw std::runtime_error(fmt::format("Keyspace {} has no tables to migrate. To use tablets, recreate the keyspace with tablets enabled", ks_name)); } - const auto& tm = get_token_metadata(); - const auto& sorted_tokens = tm.sorted_tokens(); - size_t tablet_count = sorted_tokens.size(); - if (!std::has_single_bit(tablet_count)) { - throw std::runtime_error(fmt::format("Table migration requires vnodes to be a power of two. Current value: {}", tablet_count)); - } - auto topology = co_await get_system_keyspace().load_topology_state({}); for (const auto& [server_id, replica_state]: topology.normal_nodes) { if (replica_state.storage_mode) { @@ -3948,16 +3941,33 @@ future<> storage_service::prepare_for_tablets_migration(const sstring& ks_name) // Build a tablet_map from vnode token boundaries. // - // The map contains one tablet per vnode. The replicas of each tablet are - // the same as the replicas of the corresponding vnode. Shards are assigned - // in round-robin fashion per node so that tablets are evenly distributed - // within each node. + // The map contains one tablet per vnode, plus one extra tablet for the + // wrap-around range (last_vnode_token, MAX_TOKEN] when + // last_vnode_token != MAX_TOKEN. Each tablet has the same replicas as + // the corresponding vnode. Shards are assigned in round-robin fashion + // per node so that tablets are evenly distributed within each node. // (FIXME: we should consider tablet sizes as well) // // This map will serve as a template for per-table tablet map mutations. // Each table in the keyspace receives its own tablet map, but all maps // have identical tablet boundaries and replica placement. + const auto& tm = get_token_metadata(); + const auto& sorted_tokens = tm.sorted_tokens(); + + utils::chunked_vector last_tokens; + size_t tablet_count = sorted_tokens.size(); + last_tokens.reserve(tablet_count + 1); // +1 for possible wrapping tablet + for (const auto& t : sorted_tokens) { + last_tokens.emplace_back(t); + } + // Add an extra tablet for the wrapping range if needed. + auto needs_wrapping_tablet = sorted_tokens.back() != dht::token::last(); + if (needs_wrapping_tablet) { + last_tokens.emplace_back(dht::token::last()); + tablet_count++; + } + slogger.info("Building tablet maps for tables in keyspace {} with {} tablet(s)", ks_name, tablet_count); // Stateful lambdas for round-robin shard assignment per node. @@ -3971,18 +3981,16 @@ future<> storage_service::prepare_for_tablets_migration(const sstring& ks_name) auto erm = ks.get_static_effective_replication_map(); - locator::tablet_map tmap(tablet_count); + locator::tablet_map tmap(std::move(last_tokens)); for (size_t i = 0; i < tablet_count; ++i) { auto tablet = locator::tablet_id(i); - auto vnode_replica_hosts = erm->get_natural_replicas(sorted_tokens[i], true); + auto vnode_token = needs_wrapping_tablet && i == tablet_count - 1 ? tmap.get_last_token(locator::tablet_id{0}) : tmap.get_last_token(tablet); + auto vnode_replica_hosts = erm->get_natural_replicas(vnode_token, true); locator::tablet_replica_set tablet_replicas; for (auto host : vnode_replica_hosts) { tablet_replicas.push_back(locator::tablet_replica{host, next_shard_for[host]()}); } tmap.set_tablet(tablet, locator::tablet_info(std::move(tablet_replicas))); - if (tmap.get_last_token(tablet) != sorted_tokens[i]) { - throw std::runtime_error(fmt::format("vnode token {} is not aligned; cannot be used as tablet boundary (expected: {})", sorted_tokens[i], tmap.get_last_token(tablet))); - } } // Build tablet map mutations for all tables and persist them to group0 (system.tablets)