From 2f93ab281bf566e6dfd73aa5af0d0ea4c6219bb5 Mon Sep 17 00:00:00 2001 From: Nikos Dragazis Date: Sat, 21 Feb 2026 00:14:52 +0200 Subject: [PATCH] api: Add REST endpoint for upgrading nodes to tablets The endpoint is the following: POST /storage_service/vnode_tablet_migrations/node/storage_mode?intended_mode={tablets,vnodes} This endpoint is part of the vnodes-to-tablets migration process and controls a node's intended_storage_mode in system.topology. The storage mode represents the node-local data distribution model, i.e., how data are organized across shards. The node will apply the intended storage mode to migrating tables upon next restart by resharding their SSTables (either on vnode boundaries if intended_mode=tablets, or with the static sharder if intended_mode=vnodes). Note that this endpoint controls the intended_storage_mode of the local node only. This has the nice benefit that once the API call returns, the change has not only been committed to group0 but also applied to the local node's state machine. This guarantees that the change is part of the node's local copy upon next restart; no additional read barrier is needed. Signed-off-by: Nikos Dragazis --- api/api-doc/storage_service.json | 20 +++++++++ api/storage_service.cc | 16 +++++++ service/storage_service.cc | 75 ++++++++++++++++++++++++++++++++ service/storage_service.hh | 1 + test/pylib/rest_client.py | 8 ++++ 5 files changed, 120 insertions(+) diff --git a/api/api-doc/storage_service.json b/api/api-doc/storage_service.json index 519df14f69..45cf2011f2 100644 --- a/api/api-doc/storage_service.json +++ b/api/api-doc/storage_service.json @@ -3186,6 +3186,26 @@ ] }] }, + { + "path":"/storage_service/vnode_tablet_migrations/node/storage_mode", + "operations":[{ + "method":"PUT", + "summary":"Set the intended storage mode for this node during vnodes-to-tablets migration", + "type":"void", + "nickname":"set_vnode_tablet_migration_node_storage_mode", + "produces":["application/json"], + "parameters":[ + { + "name":"intended_mode", + "description":"Intended storage mode (tablets or vnodes)", + "required":true, + "allowMultiple":false, + "type":"string", + "paramType":"query" + } + ] + }] + }, { "path":"/storage_service/quiesce_topology", "operations":[ diff --git a/api/storage_service.cc b/api/storage_service.cc index 0bd2e49709..37f6d33436 100644 --- a/api/storage_service.cc +++ b/api/storage_service.cc @@ -30,6 +30,7 @@ #include #include "service/raft/raft_group0_client.hh" #include "service/storage_service.hh" +#include "service/topology_state_machine.hh" #include "service/load_meter.hh" #include "gms/feature_service.hh" #include "gms/gossiper.hh" @@ -1739,6 +1740,19 @@ rest_create_vnode_tablet_migration(http_context& ctx, sharded +rest_set_vnode_tablet_migration_node_storage_mode(http_context& ctx, sharded& ss, std::unique_ptr req) { + if (!ss.local().get_feature_service().vnodes_to_tablets_migrations) { + apilog.warn("set_vnode_tablet_migration_node_storage_mode: called before the cluster feature was enabled"); + throw std::runtime_error("vnodes-to-tablets migration requires all nodes to support the VNODES_TO_TABLETS_MIGRATIONS cluster feature"); + } + auto mode_str = req->get_query_param("intended_mode"); + auto mode = service::intended_storage_mode_from_string(mode_str); + co_await ss.local().set_node_intended_storage_mode(mode); + co_return json_void(); +} + static future rest_quiesce_topology(sharded& ss, std::unique_ptr req) { @@ -1890,6 +1904,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded storage_service::prepare_for_tablets_migration(const sstring& ks_name) } } +future<> storage_service::set_node_intended_storage_mode(intended_storage_mode mode) { + if (this_shard_id() != 0) { + co_return co_await container().invoke_on(0, [mode] (auto& ss) { + return ss.set_node_intended_storage_mode(mode); + }); + } + + auto& raft_server = _group0->group0_server(); + auto holder = _group0->hold_group0_gate(); + + slogger.info("Setting intended storage mode for node {} to {}", raft_server.id(), mode); + + while (true) { + auto guard = co_await _group0->client().start_operation(_group0_as, raft_timeout{}); + + // Make sure that a migration has been started, i.e., + // prepare_for_tablets_migration() has been called for at least one + // keyspace. prepare_for_tablets_migration() will fail if + // intended_storage_mode is already set for any node. + const auto& tablet_metadata = get_token_metadata().tablets(); + bool has_any_migrating_table = false; + for (const auto& ks : _db.local().get_non_system_keyspaces()) { + auto& keyspace = _db.local().find_keyspace(ks); + if (!keyspace.uses_tablets()) { + for (const auto& schema : keyspace.metadata()->tables()) { + if (tablet_metadata.has_tablet_map(schema->id())) { + has_any_migrating_table = true; + break; + } + } + } + if (has_any_migrating_table) { + break; + } + } + if (!has_any_migrating_table) { + throw std::runtime_error(::format("Cannot set intended storage mode to {}: no migration is in progress. You need to start a migration first.", mode)); + } + + auto it = _topology_state_machine._topology.find(raft_server.id()); + if (!it) { + throw std::runtime_error(::format("Node {} is not a member of the cluster", raft_server.id())); + } + + const auto& rs = it->second; + + if (rs.state != node_state::normal) { + throw std::runtime_error(::format("Node {} is not in the normal state (current state: {})", raft_server.id(), rs.state)); + } + + if (rs.storage_mode == mode) { + slogger.info("Node {} already has intended storage mode set to {}, skipping", raft_server.id(), mode); + co_return; + } + + topology_mutation_builder builder(guard.write_timestamp()); + builder.with_node(raft_server.id()) + .set("intended_storage_mode", mode); + + topology_change change{{builder.build()}}; + group0_command g0_cmd = _group0->client().prepare_command(std::move(change), guard, + ::format("set intended storage mode for node {} to {}", raft_server.id(), mode)); + + try { + co_await _group0->client().add_entry(std::move(g0_cmd), std::move(guard), _group0_as); + } catch (group0_concurrent_modification&) { + slogger.info("set_node_intended_storage_mode: concurrent modification, retrying"); + continue; + } + break; + } + + slogger.info("Successfully set intended storage mode for node {} to {}", raft_server.id(), mode); +} + future<> storage_service::process_tablet_split_candidate(table_id table) noexcept { tasks::task_info tablet_split_task_info; diff --git a/service/storage_service.hh b/service/storage_service.hh index 5ed8d5ade3..789334cf6d 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -288,6 +288,7 @@ public: // Builds tablet maps from vnode token boundaries for all tables and // persists them to group0. future<> prepare_for_tablets_migration(const sstring& ks_name); + future<> set_node_intended_storage_mode(intended_storage_mode mode); void start_tablet_split_monitor(); private: diff --git a/test/pylib/rest_client.py b/test/pylib/rest_client.py index 4bd581a419..2e75d5d2b9 100644 --- a/test/pylib/rest_client.py +++ b/test/pylib/rest_client.py @@ -328,6 +328,14 @@ class ScyllaRESTAPIClient: """Start vnodes-to-tablets migration for all tables in a keyspace""" await self.client.post(f"/storage_service/vnode_tablet_migrations/keyspaces/{ks}", host=node_ip) + async def upgrade_node_to_tablets(self, node_ip: str) -> None: + """Set the node's intended storage mode to tablets""" + await self.client.put_json(f"/storage_service/vnode_tablet_migrations/node/storage_mode?intended_mode=tablets", host=node_ip) + + async def downgrade_node_to_vnodes(self, node_ip: str) -> None: + """Set the node's intended storage mode to vnodes""" + await self.client.put_json(f"/storage_service/vnode_tablet_migrations/node/storage_mode?intended_mode=vnodes", host=node_ip) + async def keyspace_upgrade_sstables(self, node_ip: str, ks: str) -> None: await self.client.get(f"/storage_service/keyspace_upgrade_sstables/{ks}", host=node_ip)