From 8064d17f788fe6b3561f0e32d8c59ea93948f86a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20D=C3=A9nes?= Date: Tue, 12 Dec 2023 09:31:40 -0500 Subject: [PATCH 1/3] api: extract scrub_status into its own header So it can be shared with scylla-nodetool code. --- api/scrub_status.hh | 18 ++++++++++++++++++ api/storage_service.cc | 8 +------- 2 files changed, 19 insertions(+), 7 deletions(-) create mode 100644 api/scrub_status.hh diff --git a/api/scrub_status.hh b/api/scrub_status.hh new file mode 100644 index 0000000000..6cef20e8f8 --- /dev/null +++ b/api/scrub_status.hh @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2023-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace api { + +enum class scrub_status { + successful = 0, + aborted, + unable_to_cancel, // Not used in Scylla, included to ensure compatibility with nodetool api. + validation_errors, +}; + +} // namespace api diff --git a/api/storage_service.cc b/api/storage_service.cc index a879477495..64d39a5f7b 100644 --- a/api/storage_service.cc +++ b/api/storage_service.cc @@ -9,6 +9,7 @@ #include "storage_service.hh" #include "api/api-doc/storage_service.json.hh" #include "api/api-doc/storage_proxy.json.hh" +#include "api/scrub_status.hh" #include "db/config.hh" #include "db/schema_tables.hh" #include "utils/hash.hh" @@ -1480,13 +1481,6 @@ void unset_storage_service(http_context& ctx, routes& r) { sp::get_schema_versions.unset(r); } -enum class scrub_status { - successful = 0, - aborted, - unable_to_cancel, // Not used in Scylla, included to ensure compatibility with nodetool api. - validation_errors, -}; - void set_snapshot(http_context& ctx, routes& r, sharded& snap_ctl) { ss::get_snapshot_details.set(r, [&snap_ctl](std::unique_ptr req) { return snap_ctl.local().get_snapshot_details().then([] (std::unordered_map>&& result) { From 892683cacead140f99f32068a5bd48c4373489e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20D=C3=A9nes?= Date: Tue, 12 Dec 2023 09:32:16 -0500 Subject: [PATCH 2/3] test/nodetool: rest_api_mock.py: add missing "f" to error message f string --- test/nodetool/rest_api_mock.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/nodetool/rest_api_mock.py b/test/nodetool/rest_api_mock.py index c807db2576..416eda5ea0 100644 --- a/test/nodetool/rest_api_mock.py +++ b/test/nodetool/rest_api_mock.py @@ -178,7 +178,7 @@ class rest_server(aiohttp.abc.AbstractRouter): continue logger.error(f"unexpected request\nexpected {expected_req}\ngot {this_req}") - return aiohttp.web.Response(status=500, text="Expected {expected_req}, got {this_req}") + return aiohttp.web.Response(status=500, text=f"Expected {expected_req}, got {this_req}") if expected_req.multiple == expected_request.ONE: del self.expected_requests[0] From 47450ae4db0c6daf255681efa61cf0e7dfe0cebe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20D=C3=A9nes?= Date: Fri, 8 Dec 2023 09:26:45 -0500 Subject: [PATCH 3/3] tools/scylla-nodetool: implement the scrub command On top of the capabilities of the java-nodetool command, the following additional functionalit is implemented: * Expose quarantine-mode option of the scrub_keyspace REST API * Exit with error and print a message, when scrub finishes with abort or validation_errors return code --- test/nodetool/test_scrub.py | 169 ++++++++++++++++++++++++++++++++++++ tools/scylla-nodetool.cc | 75 ++++++++++++++++ 2 files changed, 244 insertions(+) create mode 100644 test/nodetool/test_scrub.py diff --git a/test/nodetool/test_scrub.py b/test/nodetool/test_scrub.py new file mode 100644 index 0000000000..8e1a63c9ba --- /dev/null +++ b/test/nodetool/test_scrub.py @@ -0,0 +1,169 @@ +# +# Copyright 2023-present ScyllaDB +# +# SPDX-License-Identifier: AGPL-3.0-or-later +# + +import enum +import pytest +from rest_api_mock import expected_request +import utils + + +class scrub_status(enum.Enum): + successful = 0 + aborted = 1 + unable_to_cancel = 2 # not used by ScyllaDB + validation_errors = 3 + + +def test_scrub_keyspace(nodetool): + nodetool("scrub", "ks", expected_requests=[ + expected_request("GET", "/storage_service/keyspaces", response=["ks"]), + expected_request("GET", "/storage_service/keyspace_scrub/ks", response=scrub_status.successful.value)]) + + +def test_scrub_one_table(nodetool): + nodetool("scrub", "ks", "tbl1", expected_requests=[ + expected_request("GET", "/storage_service/keyspaces", response=["ks"]), + expected_request("GET", "/storage_service/keyspace_scrub/ks", params={"cf": "tbl1"}, + response=scrub_status.successful.value)]) + + +def test_scrub_two_tables(nodetool): + nodetool("scrub", "ks", "tbl1", "tbl2", expected_requests=[ + expected_request("GET", "/storage_service/keyspaces", response=["ks"]), + expected_request("GET", "/storage_service/keyspace_scrub/ks", params={"cf": "tbl1,tbl2"}, + response=scrub_status.successful.value)]) + + +# Cassandra parser completely borks when positional args are missing +def test_scrub_nokeyspace(nodetool, scylla_only): + utils.check_nodetool_fails_with( + nodetool, + ("scrub",), + {}, + ["error processing arguments: missing mandatory positional argument: keyspace"]) + + +def test_scrub_non_existent_keyspace(nodetool): + utils.check_nodetool_fails_with( + nodetool, + ("scrub", "non_existent_ks"), + {"expected_requests": [expected_request("GET", "/storage_service/keyspaces", response=["ks"])]}, + ["nodetool: Keyspace [non_existent_ks] does not exist.", + "error processing arguments: keyspace non_existent_ks does not exist"]) + + +# We don't test all values for --mode and --qurantine-mode, they are passed as-is +# to the REST API, so their value make no difference when testing nodetool itself. +@pytest.mark.parametrize("table", [[], ["tbl1"], ["tbl1", "tbl2"]]) +@pytest.mark.parametrize("mode", [None, ("-m", "ABORT"), ("--mode", "ABORT"), "ABORT"]) +@pytest.mark.parametrize("quarantine_mode", [None, ("--quarantine-mode", "INCLUDE"), ("-q", "ONLY")]) +@pytest.mark.parametrize("disable_snapshot", [None, "--no-snapshot", "-ns"]) +def test_scrub_options(request, nodetool, table, mode, quarantine_mode, disable_snapshot): + args = ["scrub", "ks"] + table + expected_params = {} + + if table: + expected_params["cf"] = ",".join(table) + + if mode is not None: + if type(mode) is tuple: + args += list(mode) + expected_params["scrub_mode"] = mode[1] + else: + args += ["--mode", mode] + expected_params["scrub_mode"] = mode + + if quarantine_mode is not None: + if request.config.getoption("nodetool") == "scylla": + args += list(quarantine_mode) + expected_params["quarantine_mode"] = quarantine_mode[1] + else: + pytest.skip("--quarantine-mode only supported by scylla-nodetool") + + if disable_snapshot: + args.append(disable_snapshot) + expected_params["disable_snapshot"] = "true" + + nodetool(*args, expected_requests=[ + expected_request("GET", "/storage_service/keyspaces", response=["ks"]), + expected_request("GET", "/storage_service/keyspace_scrub/ks", params=expected_params, + response=scrub_status.successful.value)]) + + +def test_scrub_skip_corrupted(nodetool): + nodetool("scrub", "ks", "tbl1", "tbl2", "--skip-corrupted", expected_requests=[ + expected_request("GET", "/storage_service/keyspaces", response=["ks"]), + expected_request("GET", "/storage_service/keyspace_scrub/ks", params={"cf": "tbl1,tbl2", "scrub_mode": "SKIP"}, + response=scrub_status.successful.value)]) + + nodetool("scrub", "ks", "tbl1", "tbl2", "-s", expected_requests=[ + expected_request("GET", "/storage_service/keyspaces", response=["ks"]), + expected_request("GET", "/storage_service/keyspace_scrub/ks", params={"cf": "tbl1,tbl2", "scrub_mode": "SKIP"}, + response=scrub_status.successful.value)]) + + +def test_scrub_skip_corrupted_with_mode(nodetool): + utils.check_nodetool_fails_with( + nodetool, + ("scrub", "ks", "--skip-corrupted", "--mode", "ABORT"), + {"expected_requests": [expected_request("GET", "/storage_service/keyspaces", response=["ks"])]}, + ["nodetool: skipCorrupted and scrubMode must not be specified together", + "error processing arguments: cannot use --skip-corrupted when --mode is used"]) + + +@pytest.mark.parametrize("ignored_opt", ["--reinsert-overflowed-ttl", "-r", ("--jobs", "2"), "-j2", "--no-validate", + "-n"]) +def test_scrub_ignored_options(nodetool, ignored_opt): + args = ["scrub", "ks"] + if type(ignored_opt) is tuple: + args += list(ignored_opt) + else: + args.append(ignored_opt) + + nodetool(*args, expected_requests=[ + expected_request("GET", "/storage_service/keyspaces", response=["ks"]), + expected_request("GET", "/storage_service/keyspace_scrub/ks", response=scrub_status.successful.value)]) + + +# Cassandra nodetool ignores the returned status +@pytest.mark.parametrize("status", [scrub_status.successful, scrub_status.aborted, scrub_status.validation_errors]) +def test_scrub_return_status(nodetool, status, cassandra_only): + nodetool("scrub", "ks", "--mode=VALIDATE", expected_requests=[ + expected_request("GET", "/storage_service/keyspaces", response=["ks"]), + expected_request("GET", "/storage_service/keyspace_scrub/ks", params={"scrub_mode": "VALIDATE"}, + response=status.value)]) + + +def test_scrub_validation_errors_exit_code(nodetool, scylla_only): + nodetool("scrub", "ks", "--mode=VALIDATE", expected_requests=[ + expected_request("GET", "/storage_service/keyspaces", response=["ks"]), + expected_request("GET", "/storage_service/keyspace_scrub/ks", params={"scrub_mode": "VALIDATE"}, + response=scrub_status.successful.value)]) + + utils.check_nodetool_fails_with( + nodetool, + ("scrub", "ks", "--mode=VALIDATE"), + {"expected_requests": [ + expected_request("GET", "/storage_service/keyspaces", response=["ks"]), + expected_request("GET", "/storage_service/keyspace_scrub/ks", params={"scrub_mode": "VALIDATE"}, + response=scrub_status.validation_errors.value)]}, + ["scrub failed: there are invalid sstables"]) + + +def test_scrub_abort_exit_code(nodetool, scylla_only): + nodetool("scrub", "ks", "--mode=ABORT", expected_requests=[ + expected_request("GET", "/storage_service/keyspaces", response=["ks"]), + expected_request("GET", "/storage_service/keyspace_scrub/ks", params={"scrub_mode": "ABORT"}, + response=scrub_status.successful.value)]) + + utils.check_nodetool_fails_with( + nodetool, + ("scrub", "ks", "--mode=ABORT"), + {"expected_requests": [ + expected_request("GET", "/storage_service/keyspaces", response=["ks"]), + expected_request("GET", "/storage_service/keyspace_scrub/ks", params={"scrub_mode": "ABORT"}, + response=scrub_status.aborted.value)]}, + ["scrub failed: aborted"]) diff --git a/tools/scylla-nodetool.cc b/tools/scylla-nodetool.cc index 106a64816c..0a94a742db 100644 --- a/tools/scylla-nodetool.cc +++ b/tools/scylla-nodetool.cc @@ -18,6 +18,7 @@ #include #include +#include "api/scrub_status.hh" #include "db_clock.hh" #include "log.hh" #include "tools/utils.hh" @@ -36,6 +37,10 @@ const auto app_name = "scylla-nodetool"; logging::logger nlog(app_name); +struct operation_failed_on_scylladb : public std::runtime_error { + using std::runtime_error::runtime_error; +}; + class scylla_rest_client { sstring _host; uint16_t _port; @@ -606,6 +611,48 @@ void removenode_operation(scylla_rest_client& client, const bpo::variables_map& } } +void scrub_operation(scylla_rest_client& client, const bpo::variables_map& vm) { + const auto [keyspace, tables] = parse_keyspace_and_tables(client, vm, false); + if (keyspace.empty()) { + throw std::invalid_argument("missing mandatory positional argument: keyspace"); + } + if (vm.count("skip-corrupted") && vm.count("mode")) { + throw std::invalid_argument("cannot use --skip-corrupted when --mode is used"); + } + + std::unordered_map params; + + if (!tables.empty()) { + params["cf"] = fmt::to_string(fmt::join(tables.begin(), tables.end(), ",")); + } + + if (vm.count("mode")) { + params["scrub_mode"] = vm["mode"].as(); + } else if (vm.count("skip-corrupted")) { + params["scrub_mode"] = "SKIP"; + } + + if (vm.count("quarantine-mode")) { + params["quarantine_mode"] = vm["quarantine-mode"].as(); + } + + if (vm.count("no-snapshot")) { + params["disable_snapshot"] = "true"; + } + + auto res = client.get(format("/storage_service/keyspace_scrub/{}", keyspace), std::move(params)).GetInt(); + + switch (api::scrub_status(res)) { + case api::scrub_status::successful: + case api::scrub_status::unable_to_cancel: + return; + case api::scrub_status::aborted: + throw operation_failed_on_scylladb("scrub failed: aborted"); + case api::scrub_status::validation_errors: + throw operation_failed_on_scylladb("scrub failed: there are invalid sstables"); + } +} + void setlogginglevel_operation(scylla_rest_client& client, const bpo::variables_map& vm) { if (!vm.count("logger") || !vm.count("level")) { throw std::invalid_argument("resetting logger(s) is not supported yet, the logger and level parameters are required"); @@ -764,6 +811,8 @@ const std::map option_substitutions{ {"--kc.list", "--keyspace-table-list"}, {"-las", "--load-and-stream"}, {"-pro", "--primary-replica-only"}, + {"-ns", "--no-snapshot"}, + {"-m", "--mode"}, // FIXME: this clashes with seastar option for memory }; std::map get_operations_with_func() { @@ -1120,6 +1169,29 @@ Fore more information, see: https://opensource.docs.scylladb.com/stable/operatin }, removenode_operation }, + { + { + "scrub", + "Scrub the SSTable files in the specified keyspace or table(s)", +R"( +Fore more information, see: https://opensource.docs.scylladb.com/stable/operating-scylla/nodetool-commands/scrub.html +)", + { + typed_option<>("no-snapshot", "Do not take a snapshot of scrubbed tables before starting scrub (default false)"), + typed_option<>("skip-corrupted,s", "Skip corrupted rows or partitions, even when scrubbing counter tables (deprecated, use ‘–-mode’ instead, default false)"), + typed_option("mode,m", "How to handle corrupt data (one of: ABORT|SKIP|SEGREGATE|VALIDATE, default ABORT; overrides ‘–-skip-corrupted’)"), + typed_option("quarantine-mode,q", "How to handle quarantined sstables (one of: INCLUDE|EXCLUDE|ONLY, default INCLUDE)"), + typed_option<>("no-validate,n", "Do not validate columns using column validator (unused)"), + typed_option<>("reinsert-overflowed-ttl,r", "Rewrites rows with overflowed expiration date (unused)"), + typed_option("jobs,j", "The number of sstables to be scrubbed concurrently (unused)"), + }, + { + typed_option("keyspace", "The keyspace to scrub", 1), + typed_option>("table", "The table(s) to scrub (if unspecified, all tables in the keyspace are scrubbed)", -1), + }, + }, + scrub_operation + }, { { "setlogginglevel", @@ -1353,6 +1425,9 @@ For more information, see: https://opensource.docs.scylladb.com/stable/operating } catch (std::invalid_argument& e) { fmt::print(std::cerr, "error processing arguments: {}\n", e.what()); return 1; + } catch (operation_failed_on_scylladb& e) { + fmt::print(std::cerr, "{}\n", e.what()); + return 3; } catch (...) { fmt::print(std::cerr, "error running operation: {}\n", std::current_exception()); return 2;