Add support for nodetool refresh --skip-reshape

This patch adds the new option in nodetool, patches the
load_new_ss_tables REST request with a new parameter and
skips the reshape step in refresh if this flag is passed.

Signed-off-by: Robert Bindar <robert.bindar@scylladb.com>

Closes scylladb/scylladb#24409
Fixes: #24365
This commit is contained in:
Robert Bindar
2025-06-05 16:42:41 +03:00
committed by Pavel Emelyanov
parent 62fdebfe78
commit ca1a9c8d01
9 changed files with 59 additions and 10 deletions

View File

@@ -2153,6 +2153,14 @@
"type":"string",
"paramType":"query"
},
{
"name":"skip_reshape",
"description":"Don't reshape the loaded sstables. Invalid if load_and_stream is true",
"required":false,
"allowMultiple":false,
"type":"string",
"paramType":"query"
},
{
"name":"scope",
"description":"Defines the set of nodes to which mutations can be streamed",

View File

@@ -466,6 +466,8 @@ void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>&
bool primary_replica_only = primary_replica == "true" || primary_replica == "1";
bool skip_cleanup = skip_cleanup_p == "true" || skip_cleanup_p == "1";
auto scope = parse_stream_scope(req->get_query_param("scope"));
auto skip_reshape_p = req->get_query_param("skip_reshape");
auto skip_reshape = skip_reshape_p == "true" || skip_reshape_p == "1";
if (scope != sstables_loader::stream_scope::all && !load_and_stream) {
throw httpd::bad_param_exception("scope takes no effect without load-and-stream");
@@ -476,8 +478,8 @@ void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>&
auto coordinator = std::hash<sstring>()(cf) % smp::count;
return sst_loader.invoke_on(coordinator,
[ks = std::move(ks), cf = std::move(cf),
load_and_stream, primary_replica_only, skip_cleanup, scope] (sstables_loader& loader) {
return loader.load_new_sstables(ks, cf, load_and_stream, primary_replica_only, skip_cleanup, scope);
load_and_stream, primary_replica_only, skip_cleanup, skip_reshape, scope] (sstables_loader& loader) {
return loader.load_new_sstables(ks, cf, load_and_stream, primary_replica_only, skip_cleanup, skip_reshape, scope);
}).then_wrapped([] (auto&& f) {
if (f.failed()) {
auto msg = fmt::format("Failed to load new sstables: {}", f.get_exception());

View File

@@ -67,4 +67,14 @@ Skip cleanup
When loading an SSTable, Scylla will cleanup it from keys that the node is not responsible for. To skip this step, use the `--skip-cleanup` option.
See :ref:`nodetool cleanup <nodetool-cleanup-cmd>`.
Skip reshape
---------------
.. code::
nodetool refresh <my_keyspace> <my_table> [--skip-reshape]
When refreshing, the SSTables to load might be out of shape, Scylla will attempt to reshape them if that's the case. To skip this step, use the `--skip-reshape` option.
.. include:: nodetool-index.rst

View File

@@ -167,13 +167,13 @@ distributed_loader::make_sstables_available(sstables::sstable_directory& dir, sh
}
future<>
distributed_loader::process_upload_dir(distributed<replica::database>& db, sharded<db::view::view_builder>& vb, sstring ks, sstring cf, bool skip_cleanup) {
distributed_loader::process_upload_dir(distributed<replica::database>& db, sharded<db::view::view_builder>& vb, sstring ks, sstring cf, bool skip_cleanup, bool skip_reshape) {
const auto& rs = db.local().find_keyspace(ks).get_replication_strategy();
if (rs.is_per_table()) {
on_internal_error(dblog, "process_upload_dir is not supported with tablets");
}
return seastar::async([&db, &vb, ks = std::move(ks), cf = std::move(cf), skip_cleanup] {
return seastar::async([&db, &vb, ks = std::move(ks), cf = std::move(cf), skip_cleanup, skip_reshape] {
auto global_table = get_table_on_all_shards(db, ks, cf).get();
sharded<sstables::sstable_directory> directory;
@@ -219,8 +219,10 @@ distributed_loader::process_upload_dir(distributed<replica::database>& db, shard
const auto& erm = db.local().find_keyspace(ks).get_vnode_effective_replication_map();
auto owned_ranges_ptr = skip_cleanup ? lw_shared_ptr<dht::token_range_vector>(nullptr) : compaction::make_owned_ranges_ptr(db.local().get_keyspace_local_ranges(erm).get());
reshard(directory, db, ks, cf, make_sstable, owned_ranges_ptr).get();
reshape(directory, db, sstables::reshape_mode::strict, ks, cf, make_sstable,
[] (const sstables::shared_sstable&) { return true; }).get();
if (!skip_reshape) {
reshape(directory, db, sstables::reshape_mode::strict, ks, cf, make_sstable,
[] (const sstables::shared_sstable&) { return true; }).get();
}
// Move to staging directory to avoid clashes with future uploads. Unique generation number ensures no collisions.
const bool use_view_update_path = db::view::check_needs_view_update_path(vb.local(), erm->get_token_metadata_ptr(), *global_table, streaming::stream_reason::repair).get();

View File

@@ -91,7 +91,7 @@ public:
get_sstables_from_upload_dir(distributed<replica::database>& db, sstring ks, sstring cf, sstables::sstable_open_config cfg);
static future<std::tuple<table_id, std::vector<std::vector<sstables::shared_sstable>>>>
get_sstables_from_object_store(distributed<replica::database>& db, sstring ks, sstring cf, std::vector<sstring> sstables, sstring endpoint, sstring bucket, sstring prefix, sstables::sstable_open_config cfg, std::function<seastar::abort_source*()> = {});
static future<> process_upload_dir(distributed<replica::database>& db, sharded<db::view::view_builder>& vb, sstring ks_name, sstring cf_name, bool skip_cleanup);
static future<> process_upload_dir(distributed<replica::database>& db, sharded<db::view::view_builder>& vb, sstring ks_name, sstring cf_name, bool skip_cleanup, bool skip_reshape);
};
future<sstables::generation_type> highest_generation_seen(sharded<sstables::sstable_directory>& directory);

View File

@@ -542,7 +542,7 @@ future<> sstables_loader::load_and_stream(sstring ks_name, sstring cf_name,
// All the global operations are going to happen here, and just the reloading happens
// in there.
future<> sstables_loader::load_new_sstables(sstring ks_name, sstring cf_name,
bool load_and_stream, bool primary_replica_only, bool skip_cleanup, stream_scope scope) {
bool load_and_stream, bool primary_replica_only, bool skip_cleanup, bool skip_reshape, stream_scope scope) {
if (_loading_new_sstables) {
throw std::runtime_error("Already loading SSTables. Try again later");
} else {
@@ -562,6 +562,10 @@ future<> sstables_loader::load_new_sstables(sstring ks_name, sstring cf_name,
throw std::runtime_error("Skipping cleanup is not possible when doing load-and-stream");
}
if (load_and_stream && skip_reshape) {
throw std::runtime_error("Skipping reshape is not possible when doing load-and-stream");
}
llog.info("Loading new SSTables for keyspace={}, table={}, load_and_stream={}, primary_replica_only={}, skip_cleanup={}",
ks_name, cf_name, load_and_stream_desc, primary_replica_only, skip_cleanup);
try {
@@ -578,7 +582,7 @@ future<> sstables_loader::load_new_sstables(sstring ks_name, sstring cf_name,
co_await loader.load_and_stream(ks_name, cf_name, table_id, std::move(sstables_on_shards[this_shard_id()]), primary_replica_only, true, scope, {});
});
} else {
co_await replica::distributed_loader::process_upload_dir(_db, _view_builder, ks_name, cf_name, skip_cleanup);
co_await replica::distributed_loader::process_upload_dir(_db, _view_builder, ks_name, cf_name, skip_cleanup, skip_reshape);
}
} catch (...) {
llog.warn("Done loading new SSTables for keyspace={}, table={}, load_and_stream={}, primary_replica_only={}, status=failed: {}",

View File

@@ -104,10 +104,14 @@ public:
*
* @param ks_name the keyspace in which to search for new SSTables.
* @param cf_name the column family in which to search for new SSTables.
* @param load_and_stream load SSTables that do not belong to this node and stream them to the appropriate nodes.
* @param primary_replica_only whether to stream only to the primary replica that owns the data.
* @param skip_cleanup whether to skip the cleanup step when loading SSTables.
* @param skip_reshape whether to skip the reshape step when loading SSTables.
* @return a future<> when the operation finishes.
*/
future<> load_new_sstables(sstring ks_name, sstring cf_name,
bool load_and_stream, bool primary_replica_only, bool skip_cleanup, stream_scope scope);
bool load_and_stream, bool primary_replica_only, bool skip_cleanup, bool skip_reshape, stream_scope scope);
/**
* Download new SSTables not currently tracked by the system from object store

View File

@@ -95,3 +95,15 @@ def test_refresh_load_and_stream_scope(nodetool, load_and_stream_opt, scope_val)
expected_request("POST", "/storage_service/sstables/ks",
params={"cf": "tbl", "load_and_stream": "true", "scope": f"{scope_val}"})])
def test_refresh_skip_reshape(nodetool, scylla_only):
nodetool("refresh", "ks", "tbl", "--skip-reshape", expected_requests=[
expected_request("POST", "/storage_service/sstables/ks", params={"cf": "tbl", "skip_reshape": "true"})])
def test_refresh_skip_reshape_load_and_stream(nodetool, scylla_only):
check_nodetool_fails_with(
nodetool,
("refresh", "ks", "tbl", "--load-and-stream", "--skip-reshape"),
{"expected_requests": []},
["error processing arguments: --skip-reshape takes no effect with --load-and-stream|-las"])

View File

@@ -1577,6 +1577,12 @@ void refresh_operation(scylla_rest_client& client, const bpo::variables_map& vm)
}
params["skip_cleanup"] = "true";
}
if (vm.contains("skip-reshape")) {
if (vm.contains("load-and-stream")) {
throw std::invalid_argument("--skip-reshape takes no effect with --load-and-stream|-las");
}
params["skip_reshape"] = "true";
}
if (vm.contains("scope")) {
if (vm.contains("primary-replica-only")) {
throw std::invalid_argument("Scoped streaming of primary replica only is not supported yet");
@@ -4152,6 +4158,7 @@ For more information, see: {}"
typed_option<>("load-and-stream", "Allows loading sstables that do not belong to this node, in which case they are automatically streamed to the owning nodes"),
typed_option<>("primary-replica-only", "Load the sstables and stream to primary replica node that owns the data. Repair is needed after the load and stream process"),
typed_option<>("skip-cleanup", "Do not perform keys cleanup when loading sstables."),
typed_option<>("skip-reshape", "Do not perform sstable reshape when loading sstables."),
typed_option<sstring>("scope", "Load-and-stream scope (node, rack or dc)"),
},
{