api: Add REST endpoint for upgrading nodes to tablets

The endpoint is the following:

    POST /storage_service/vnode_tablet_migrations/node/storage_mode?intended_mode={tablets,vnodes}

This endpoint is part of the vnodes-to-tablets migration process and
controls a node's intended_storage_mode in system.topology. The storage
mode represents the node-local data distribution model, i.e., how data
are organized across shards. The node will apply the intended storage
mode to migrating tables upon next restart by resharding their SSTables
(either on vnode boundaries if intended_mode=tablets, or with the static
sharder if intended_mode=vnodes).

Note that this endpoint controls the intended_storage_mode of the local
node only. This has the nice benefit that once the API call returns, the
change has not only been committed to group0 but also applied to the
local node's state machine. This guarantees that the change is part of
the node's local copy upon next restart; no additional read barrier is
needed.

Signed-off-by: Nikos Dragazis <nikolaos.dragazis@scylladb.com>
This commit is contained in:
Nikos Dragazis
2026-02-21 00:14:52 +02:00
parent c4c3a95863
commit 2f93ab281b
5 changed files with 120 additions and 0 deletions

View File

@@ -3186,6 +3186,26 @@
]
}]
},
{
"path":"/storage_service/vnode_tablet_migrations/node/storage_mode",
"operations":[{
"method":"PUT",
"summary":"Set the intended storage mode for this node during vnodes-to-tablets migration",
"type":"void",
"nickname":"set_vnode_tablet_migration_node_storage_mode",
"produces":["application/json"],
"parameters":[
{
"name":"intended_mode",
"description":"Intended storage mode (tablets or vnodes)",
"required":true,
"allowMultiple":false,
"type":"string",
"paramType":"query"
}
]
}]
},
{
"path":"/storage_service/quiesce_topology",
"operations":[

View File

@@ -30,6 +30,7 @@
#include <fmt/ranges.h>
#include "service/raft/raft_group0_client.hh"
#include "service/storage_service.hh"
#include "service/topology_state_machine.hh"
#include "service/load_meter.hh"
#include "gms/feature_service.hh"
#include "gms/gossiper.hh"
@@ -1739,6 +1740,19 @@ rest_create_vnode_tablet_migration(http_context& ctx, sharded<service::storage_s
co_return json_void();
}
static
future<json::json_return_type>
rest_set_vnode_tablet_migration_node_storage_mode(http_context& ctx, sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
if (!ss.local().get_feature_service().vnodes_to_tablets_migrations) {
apilog.warn("set_vnode_tablet_migration_node_storage_mode: called before the cluster feature was enabled");
throw std::runtime_error("vnodes-to-tablets migration requires all nodes to support the VNODES_TO_TABLETS_MIGRATIONS cluster feature");
}
auto mode_str = req->get_query_param("intended_mode");
auto mode = service::intended_storage_mode_from_string(mode_str);
co_await ss.local().set_node_intended_storage_mode(mode);
co_return json_void();
}
static
future<json::json_return_type>
rest_quiesce_topology(sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
@@ -1890,6 +1904,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
ss::repair_tablet.set(r, rest_bind(rest_repair_tablet, ctx, ss));
ss::tablet_balancing_enable.set(r, rest_bind(rest_tablet_balancing_enable, ss));
ss::create_vnode_tablet_migration.set(r, rest_bind(rest_create_vnode_tablet_migration, ctx, ss));
ss::set_vnode_tablet_migration_node_storage_mode.set(r, rest_bind(rest_set_vnode_tablet_migration_node_storage_mode, ctx, ss));
ss::quiesce_topology.set(r, rest_bind(rest_quiesce_topology, ss));
sp::get_schema_versions.set(r, rest_bind(rest_get_schema_versions, ss));
ss::drop_quarantined_sstables.set(r, rest_bind(rest_drop_quarantined_sstables, ctx, ss));
@@ -1970,6 +1985,7 @@ void unset_storage_service(http_context& ctx, routes& r) {
ss::repair_tablet.unset(r);
ss::tablet_balancing_enable.unset(r);
ss::create_vnode_tablet_migration.unset(r);
ss::set_vnode_tablet_migration_node_storage_mode.unset(r);
ss::quiesce_topology.unset(r);
sp::get_schema_versions.unset(r);
ss::drop_quarantined_sstables.unset(r);

View File

@@ -4124,6 +4124,81 @@ future<> storage_service::prepare_for_tablets_migration(const sstring& ks_name)
}
}
future<> storage_service::set_node_intended_storage_mode(intended_storage_mode mode) {
if (this_shard_id() != 0) {
co_return co_await container().invoke_on(0, [mode] (auto& ss) {
return ss.set_node_intended_storage_mode(mode);
});
}
auto& raft_server = _group0->group0_server();
auto holder = _group0->hold_group0_gate();
slogger.info("Setting intended storage mode for node {} to {}", raft_server.id(), mode);
while (true) {
auto guard = co_await _group0->client().start_operation(_group0_as, raft_timeout{});
// Make sure that a migration has been started, i.e.,
// prepare_for_tablets_migration() has been called for at least one
// keyspace. prepare_for_tablets_migration() will fail if
// intended_storage_mode is already set for any node.
const auto& tablet_metadata = get_token_metadata().tablets();
bool has_any_migrating_table = false;
for (const auto& ks : _db.local().get_non_system_keyspaces()) {
auto& keyspace = _db.local().find_keyspace(ks);
if (!keyspace.uses_tablets()) {
for (const auto& schema : keyspace.metadata()->tables()) {
if (tablet_metadata.has_tablet_map(schema->id())) {
has_any_migrating_table = true;
break;
}
}
}
if (has_any_migrating_table) {
break;
}
}
if (!has_any_migrating_table) {
throw std::runtime_error(::format("Cannot set intended storage mode to {}: no migration is in progress. You need to start a migration first.", mode));
}
auto it = _topology_state_machine._topology.find(raft_server.id());
if (!it) {
throw std::runtime_error(::format("Node {} is not a member of the cluster", raft_server.id()));
}
const auto& rs = it->second;
if (rs.state != node_state::normal) {
throw std::runtime_error(::format("Node {} is not in the normal state (current state: {})", raft_server.id(), rs.state));
}
if (rs.storage_mode == mode) {
slogger.info("Node {} already has intended storage mode set to {}, skipping", raft_server.id(), mode);
co_return;
}
topology_mutation_builder builder(guard.write_timestamp());
builder.with_node(raft_server.id())
.set("intended_storage_mode", mode);
topology_change change{{builder.build()}};
group0_command g0_cmd = _group0->client().prepare_command(std::move(change), guard,
::format("set intended storage mode for node {} to {}", raft_server.id(), mode));
try {
co_await _group0->client().add_entry(std::move(g0_cmd), std::move(guard), _group0_as);
} catch (group0_concurrent_modification&) {
slogger.info("set_node_intended_storage_mode: concurrent modification, retrying");
continue;
}
break;
}
slogger.info("Successfully set intended storage mode for node {} to {}", raft_server.id(), mode);
}
future<> storage_service::process_tablet_split_candidate(table_id table) noexcept {
tasks::task_info tablet_split_task_info;

View File

@@ -288,6 +288,7 @@ public:
// Builds tablet maps from vnode token boundaries for all tables and
// persists them to group0.
future<> prepare_for_tablets_migration(const sstring& ks_name);
future<> set_node_intended_storage_mode(intended_storage_mode mode);
void start_tablet_split_monitor();
private:

View File

@@ -328,6 +328,14 @@ class ScyllaRESTAPIClient:
"""Start vnodes-to-tablets migration for all tables in a keyspace"""
await self.client.post(f"/storage_service/vnode_tablet_migrations/keyspaces/{ks}", host=node_ip)
async def upgrade_node_to_tablets(self, node_ip: str) -> None:
"""Set the node's intended storage mode to tablets"""
await self.client.put_json(f"/storage_service/vnode_tablet_migrations/node/storage_mode?intended_mode=tablets", host=node_ip)
async def downgrade_node_to_vnodes(self, node_ip: str) -> None:
"""Set the node's intended storage mode to vnodes"""
await self.client.put_json(f"/storage_service/vnode_tablet_migrations/node/storage_mode?intended_mode=vnodes", host=node_ip)
async def keyspace_upgrade_sstables(self, node_ip: str, ks: str) -> None:
await self.client.get(f"/storage_service/keyspace_upgrade_sstables/{ks}", host=node_ip)