s3/test: collect log on exit

the temporary directory holding the log file collecting the scylla subprocess's output is specified by the test itself, and it is `test_tempdir`. but unfortunately, cql-pytest/run.py is not aware of this. so `cleanup_all()` is not able to print out the logging messages at exit. as, please note, cql-pytest/run.py always collect "log" file under the directory created using `pid_to_dir()` where pid is the spawned subprocesses. but `object_store/run` uses the main process's pid for its reusable tempdir. so, with this change, we also register a cleanup func to printout the logging message when the test exits. Signed-off-by: Kefu Chai <kefu.chai@scylladb.com> Closes #13647
2023-04-24 13:53:25 +03:00
1476 changed files with 51424 additions and 70803 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,8 @@ tags
 testlog
 test/*/*.reject
 .vscode
+docs/_build
+docs/poetry.lock
 compile_commands.json
 .ccls-cache/
 .mypy_cache
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.27)
+cmake_minimum_required(VERSION 3.18)

 project(scylla)

@@ -8,19 +8,11 @@ list(APPEND CMAKE_MODULE_PATH
  ${CMAKE_CURRENT_SOURCE_DIR}/cmake
  ${CMAKE_CURRENT_SOURCE_DIR}/seastar/cmake)

+set(CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}" CACHE
+    STRING "Choose the type of build." FORCE)
 # Set the possible values of build type for cmake-gui
-set(scylla_build_types
-    "Debug" "Release" "Dev" "Sanitize" "Coverage")
 set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
-  ${scylla_build_types})
-if(NOT CMAKE_BUILD_TYPE)
-    set(CMAKE_BUILD_TYPE "Release" CACHE
-        STRING "Choose the type of build." FORCE)
-    message(WARNING "CMAKE_BUILD_TYPE not specified, Using 'Release'")
-elseif(NOT CMAKE_BUILD_TYPE IN_LIST scylla_build_types)
-    message(FATAL_ERROR "Unknown CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}. "
-        "Following types are supported: ${scylla_build_types}")
-endif()
+  "Debug" "Release" "Dev" "Sanitize")
 string(TOUPPER "${CMAKE_BUILD_TYPE}" build_mode)
 include(mode.${build_mode})
 include(mode.common)
@@ -34,9 +26,6 @@ set(CMAKE_CXX_EXTENSIONS ON CACHE INTERNAL "")
 set(CMAKE_CXX_VISIBILITY_PRESET hidden)

 set(Seastar_TESTING ON CACHE BOOL "" FORCE)
-set(Seastar_API_LEVEL 7 CACHE STRING "" FORCE)
-set(Seastar_APPS ON CACHE BOOL "" FORCE)
-set(Seastar_EXCLUDE_APPS_FROM_ALL ON CACHE BOOL "" FORCE)
 add_subdirectory(seastar)

 # System libraries dependencies
@@ -56,8 +45,6 @@ find_package(xxHash REQUIRED)
 set(scylla_gen_build_dir "${CMAKE_BINARY_DIR}/gen")
 file(MAKE_DIRECTORY "${scylla_gen_build_dir}")

-include(add_version_library)
-generate_scylla_version()

 add_library(scylla-main STATIC)
 target_sources(scylla-main
@@ -78,6 +65,7 @@ target_sources(scylla-main
    debug.cc
    init.cc
    keys.cc
+    message/messaging_service.cc
    multishard_mutation_query.cc
    mutation_query.cc
    partition_slice_builder.cc
@@ -123,10 +111,8 @@ add_subdirectory(index)
 add_subdirectory(interface)
 add_subdirectory(lang)
 add_subdirectory(locator)
-add_subdirectory(message)
 add_subdirectory(mutation)
 add_subdirectory(mutation_writer)
-add_subdirectory(node_ops)
 add_subdirectory(readers)
 add_subdirectory(redis)
 add_subdirectory(replica)
@@ -144,6 +130,7 @@ add_subdirectory(tracing)
 add_subdirectory(transport)
 add_subdirectory(types)
 add_subdirectory(utils)
+include(add_version_library)
 add_version_library(scylla_version
    release.cc)

@@ -165,7 +152,6 @@ target_link_libraries(scylla PRIVATE
    index
    lang
    locator
-    message
    mutation
    mutation_writer
    raft
@@ -194,8 +180,22 @@ target_link_libraries(scylla PRIVATE
    seastar
    Boost::program_options)

+# Force SHA1 build-id generation
+set(default_linker_flags "-Wl,--build-id=sha1")
+include(CheckLinkerFlag)
+foreach(linker "lld" "gold")
+    set(linker_flag "-fuse-ld=${linker}")
+    check_linker_flag(CXX ${linker_flag} "CXX_LINKER_HAVE_${linker}")
+    if(CXX_LINKER_HAVE_${linker})
+        string(APPEND default_linker_flags " ${linker_flag}")
+        break()
+    endif()
+endforeach()
+
+set(CMAKE_EXE_LINKER_FLAGS "${default_linker_flags}" CACHE INTERNAL "")
+
+# TODO: patch dynamic linker to match configure.py behavior
+
 target_include_directories(scylla PRIVATE
    "${CMAKE_CURRENT_SOURCE_DIR}"
    "${scylla_gen_build_dir}")
-
-add_subdirectory(dist)
--- a/12
+++ b/12
@@ -7,7 +7,6 @@ Options:
  -h|--help show this help message.
  -o|--output-dir PATH specify destination path at which the version files are to be created.
  -d|--date-stamp DATE manually set date for release parameter
-  -v|--verbose also print out the version number

 By default, the script will attempt to parse 'version' file
 in the current directory, which should contain a string of
@@ -34,7 +33,6 @@ END
 )

 DATE=""
-PRINT_VERSION=false

 while [ $# -gt 0 ]; do
 	opt="$1"
@@ -53,10 +51,6 @@ while [ $# -gt 0 ]; do
 			shift
 			shift
 			;;
-		-v|--verbose)
-			PRINT_VERSION=true
-			shift
-			;;
 		*)
 			echo "Unexpected argument found: $1"
 			echo
@@ -78,7 +72,7 @@ fi

 # Default scylla product/version tags
 PRODUCT=scylla
-VERSION=5.5.0-dev
+VERSION=5.3.0-dev

 if test -f version
 then
@@ -108,9 +102,7 @@ if [ -f "$OUTPUT_DIR/SCYLLA-RELEASE-FILE" ]; then
 	fi
 fi

-if $PRINT_VERSION; then
-	echo "$SCYLLA_VERSION-$SCYLLA_RELEASE"
-fi
+echo "$SCYLLA_VERSION-$SCYLLA_RELEASE"
 mkdir -p "$OUTPUT_DIR"
 echo "$SCYLLA_VERSION" > "$OUTPUT_DIR/SCYLLA-VERSION-FILE"
 echo "$SCYLLA_RELEASE" > "$OUTPUT_DIR/SCYLLA-RELEASE-FILE"
--- a/alternator/auth.cc
+++ b/alternator/auth.cc
@@ -53,7 +53,7 @@ future<std::string> get_key_from_roles(service::storage_proxy& proxy, std::strin
    if (result_set->empty()) {
        co_await coroutine::return_exception(api_error::unrecognized_client(format("User not found: {}", username)));
    }
-    const managed_bytes_opt& salted_hash = result_set->rows().front().front(); // We only asked for 1 row and 1 column
+    const bytes_opt& salted_hash = result_set->rows().front().front(); // We only asked for 1 row and 1 column
    if (!salted_hash) {
        co_await coroutine::return_exception(api_error::unrecognized_client(format("No password found for user: {}", username)));
    }
--- a/alternator/controller.cc
+++ b/alternator/controller.cc
@@ -76,16 +76,13 @@ future<> controller::start_server() {
        _ssg = create_smp_service_group(c).get0();

        rmw_operation::set_default_write_isolation(_config.alternator_write_isolation());
+        executor::set_default_timeout(std::chrono::milliseconds(_config.alternator_timeout_in_ms()));

        net::inet_address addr = utils::resolve(_config.alternator_address, family).get0();

        auto get_cdc_metadata = [] (cdc::generation_service& svc) { return std::ref(svc.get_cdc_metadata()); };
-        auto get_timeout_in_ms = [] (const db::config& cfg) -> utils::updateable_value<uint32_t> {
-            return cfg.alternator_timeout_in_ms;
-        };
-        _executor.start(std::ref(_gossiper), std::ref(_proxy), std::ref(_mm), std::ref(_sys_dist_ks),
-                        sharded_parameter(get_cdc_metadata, std::ref(_cdc_gen_svc)), _ssg.value(),
-                        sharded_parameter(get_timeout_in_ms, std::ref(_config))).get();
+
+        _executor.start(std::ref(_gossiper), std::ref(_proxy), std::ref(_mm), std::ref(_sys_dist_ks), sharded_parameter(get_cdc_metadata, std::ref(_cdc_gen_svc)), _ssg.value()).get();
        _server.start(std::ref(_executor), std::ref(_proxy), std::ref(_gossiper), std::ref(_auth_service), std::ref(_sl_controller)).get();
        // Note: from this point on, if start_server() throws for any reason,
        // it must first call stop_server() to stop the executor and server
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
@@ -6,6 +6,8 @@
 * SPDX-License-Identifier: AGPL-3.0-or-later
 */

+#include <regex>
+
 #include "utils/base64.hh"

 #include <seastar/core/sleep.hh>
@@ -38,6 +40,7 @@
 #include <seastar/json/json_elements.hh>
 #include <boost/algorithm/cxx11/any_of.hpp>
 #include "collection_mutation.hh"
+#include "db/query_context.hh"
 #include "schema/schema.hh"
 #include "db/tags/extension.hh"
 #include "db/tags/utils.hh"
@@ -59,28 +62,7 @@ logging::logger elogger("alternator-executor");

 namespace alternator {

-enum class table_status {
-    active = 0,
-    creating,
-    updating,
-    deleting
-};
-
-static sstring_view table_status_to_sstring(table_status tbl_status) {
-    switch(tbl_status) {
-        case table_status::active:
-            return "ACTIVE";
-        case table_status::creating:
-            return "CREATING";
-        case table_status::updating:
-            return "UPDATING";
-        case table_status::deleting:
-            return "DELETING";
-    }
-    return "UKNOWN";
-}
-
-static lw_shared_ptr<keyspace_metadata> create_keyspace_metadata(std::string_view keyspace_name, service::storage_proxy& sp, gms::gossiper& gossiper, api::timestamp_type);
+static future<std::vector<mutation>> create_keyspace(std::string_view keyspace_name, service::storage_proxy& sp, service::migration_manager& mm, gms::gossiper& gossiper, api::timestamp_type);

 static map_type attrs_type() {
    static thread_local auto t = map_type_impl::get_instance(utf8_type, bytes_type, true);
@@ -108,20 +90,17 @@ json::json_return_type make_streamed(rjson::value&& value) {
        // move objects to coroutine frame.
        auto los = std::move(os);
        auto lrs = std::move(rs);
-        std::exception_ptr ex;
        try {
            co_await rjson::print(*lrs, los);
+            co_await los.flush();
+            co_await los.close();
        } catch (...) {
            // at this point, we cannot really do anything. HTTP headers and return code are
            // already written, and quite potentially a portion of the content data.
            // just log + rethrow. It is probably better the HTTP server closes connection
            // abruptly or something...
-            ex = std::current_exception();
-            elogger.error("Exception during streaming HTTP response: {}", ex);
-        }
-        co_await los.close();
-        if (ex) {
-            co_await coroutine::return_exception_ptr(std::move(ex));
+            elogger.error("Unhandled exception in data streaming: {}", std::current_exception());
+            throw;
        }
        co_return;
    };
@@ -211,8 +190,9 @@ static std::string lsi_name(const std::string& table_name, std::string_view inde

 /** Extract table name from a request.
 *  Most requests expect the table's name to be listed in a "TableName" field.
- *  This convenience function returns the name or api_error in case the
- *  table name is missing or not a string.
+ *  This convenience function returns the name, with appropriate validation
+ *  and api_error in case the table name is missing or not a string, or
+ *  doesn't pass validate_table_name().
 */
 static std::optional<std::string> find_table_name(const rjson::value& request) {
    const rjson::value* table_name_value = rjson::find(request, "TableName");
@@ -223,6 +203,7 @@ static std::optional<std::string> find_table_name(const rjson::value& request) {
        throw api_error::validation("Non-string TableName field in request");
    }
    std::string table_name = table_name_value->GetString();
+    validate_table_name(table_name);
    return table_name;
 }

@@ -249,10 +230,6 @@ schema_ptr executor::find_table(service::storage_proxy& proxy, const rjson::valu
    try {
        return proxy.data_dictionary().find_schema(sstring(executor::KEYSPACE_NAME_PREFIX) + sstring(*table_name), *table_name);
    } catch(data_dictionary::no_such_column_family&) {
-        // DynamoDB returns validation error even when table does not exist
-        // and the table name is invalid.
-        validate_table_name(table_name.value());
-
        throw api_error::resource_not_found(
                format("Requested resource not found: Table: {} not found", *table_name));
    }
@@ -303,10 +280,6 @@ get_table_or_view(service::storage_proxy& proxy, const rjson::value& request) {
        try {
            return { proxy.data_dictionary().find_schema(sstring(internal_ks_name), sstring(internal_table_name)), type };
        } catch (data_dictionary::no_such_column_family&) {
-            // DynamoDB returns validation error even when table does not exist
-            // and the table name is invalid.
-            validate_table_name(table_name);
-
            throw api_error::resource_not_found(
                format("Requested resource not found: Internal table: {}.{} not found", internal_ks_name, internal_table_name));
        }
@@ -442,91 +415,6 @@ static rjson::value generate_arn_for_index(const schema& schema, std::string_vie
        schema.ks_name(), schema.cf_name(), index_name));
 }

-static rjson::value fill_table_description(schema_ptr schema, table_status tbl_status, service::storage_proxy const& proxy)
-{
-    rjson::value table_description = rjson::empty_object();
-    rjson::add(table_description, "TableName", rjson::from_string(schema->cf_name()));
-    // FIXME: take the tables creation time, not the current time!
-    size_t creation_date_seconds = std::chrono::duration_cast<std::chrono::seconds>(gc_clock::now().time_since_epoch()).count();
-    // FIXME: In DynamoDB the CreateTable implementation is asynchronous, and
-    // the table may be in "Creating" state until creating is finished.
-    // We don't currently do this in Alternator - instead CreateTable waits
-    // until the table is really available. So/ DescribeTable returns either
-    // ACTIVE or doesn't exist at all (and DescribeTable returns an error).
-    // The states CREATING and UPDATING are not currently returned.
-    rjson::add(table_description, "TableStatus", rjson::from_string(table_status_to_sstring(tbl_status)));
-    rjson::add(table_description, "TableArn", generate_arn_for_table(*schema));
-    rjson::add(table_description, "TableId", rjson::from_string(schema->id().to_sstring()));
-    // FIXME: Instead of hardcoding, we should take into account which mode was chosen
-    // when the table was created. But, Spark jobs expect something to be returned
-    // and PAY_PER_REQUEST seems closer to reality than PROVISIONED.
-    rjson::add(table_description, "BillingModeSummary", rjson::empty_object());
-    rjson::add(table_description["BillingModeSummary"], "BillingMode", "PAY_PER_REQUEST");
-    rjson::add(table_description["BillingModeSummary"], "LastUpdateToPayPerRequestDateTime", rjson::value(creation_date_seconds));
-    // In PAY_PER_REQUEST billing mode, provisioned capacity should return 0
-    rjson::add(table_description, "ProvisionedThroughput", rjson::empty_object());
-    rjson::add(table_description["ProvisionedThroughput"], "ReadCapacityUnits", 0);
-    rjson::add(table_description["ProvisionedThroughput"], "WriteCapacityUnits", 0);
-    rjson::add(table_description["ProvisionedThroughput"], "NumberOfDecreasesToday", 0);
-
-   
-
-    data_dictionary::table t = proxy.data_dictionary().find_column_family(schema);
-    
-    if (tbl_status != table_status::deleting) {
-        rjson::add(table_description, "CreationDateTime", rjson::value(creation_date_seconds));
-        std::unordered_map<std::string,std::string> key_attribute_types;
-        // Add base table's KeySchema and collect types for AttributeDefinitions:
-        executor::describe_key_schema(table_description, *schema, key_attribute_types);
-        if (!t.views().empty()) {
-            rjson::value gsi_array = rjson::empty_array();
-            rjson::value lsi_array = rjson::empty_array();
-            for (const view_ptr& vptr : t.views()) {
-                rjson::value view_entry = rjson::empty_object();
-                const sstring& cf_name = vptr->cf_name();
-                size_t delim_it = cf_name.find(':');
-                if (delim_it == sstring::npos) {
-                    elogger.error("Invalid internal index table name: {}", cf_name);
-                    continue;
-                }
-                sstring index_name = cf_name.substr(delim_it + 1);
-                rjson::add(view_entry, "IndexName", rjson::from_string(index_name));
-                rjson::add(view_entry, "IndexArn", generate_arn_for_index(*schema, index_name));
-                // Add indexes's KeySchema and collect types for AttributeDefinitions:
-                executor::describe_key_schema(view_entry, *vptr, key_attribute_types);
-                // Add projection type
-                rjson::value projection = rjson::empty_object();
-                rjson::add(projection, "ProjectionType", "ALL");
-                // FIXME: we have to get ProjectionType from the schema when it is added
-                rjson::add(view_entry, "Projection", std::move(projection));
-                // Local secondary indexes are marked by an extra '!' sign occurring before the ':' delimiter
-                rjson::value& index_array = (delim_it > 1 && cf_name[delim_it-1] == '!') ? lsi_array : gsi_array;
-                rjson::push_back(index_array, std::move(view_entry));
-            }
-            if (!lsi_array.Empty()) {
-                rjson::add(table_description, "LocalSecondaryIndexes", std::move(lsi_array));
-            }
-            if (!gsi_array.Empty()) {
-                rjson::add(table_description, "GlobalSecondaryIndexes", std::move(gsi_array));
-            }
-        }
-        // Use map built by describe_key_schema() for base and indexes to produce
-        // AttributeDefinitions for all key columns:
-        rjson::value attribute_definitions = rjson::empty_array();
-        for (auto& type : key_attribute_types) {
-            rjson::value key = rjson::empty_object();
-            rjson::add(key, "AttributeName", rjson::from_string(type.first));
-            rjson::add(key, "AttributeType", rjson::from_string(type.second));
-            rjson::push_back(attribute_definitions, std::move(key));
-        }
-        rjson::add(table_description, "AttributeDefinitions", std::move(attribute_definitions));
-    }
-    executor::supplement_table_stream_info(table_description, *schema, proxy);
-
-    // FIXME: still missing some response fields (issue #5026)
-    return table_description;
-}
-
 bool is_alternator_keyspace(const sstring& ks_name) {
    return ks_name.find(executor::KEYSPACE_NAME_PREFIX) == 0;
 }
@@ -543,7 +431,85 @@ future<executor::request_return_type> executor::describe_table(client_state& cli

    tracing::add_table_name(trace_state, schema->ks_name(), schema->cf_name());

-    rjson::value table_description = fill_table_description(schema, table_status::active, _proxy);
+    rjson::value table_description = rjson::empty_object();
+    rjson::add(table_description, "TableName", rjson::from_string(schema->cf_name()));
+    // FIXME: take the tables creation time, not the current time!
+    size_t creation_date_seconds = std::chrono::duration_cast<std::chrono::seconds>(gc_clock::now().time_since_epoch()).count();
+    rjson::add(table_description, "CreationDateTime", rjson::value(creation_date_seconds));
+    // FIXME: In DynamoDB the CreateTable implementation is asynchronous, and
+    // the table may be in "Creating" state until creating is finished.
+    // We don't currently do this in Alternator - instead CreateTable waits
+    // until the table is really available. So/ DescribeTable returns either
+    // ACTIVE or doesn't exist at all (and DescribeTable returns an error).
+    // The other states (CREATING, UPDATING, DELETING) are not currently
+    // returned.
+    rjson::add(table_description, "TableStatus", "ACTIVE");
+    rjson::add(table_description, "TableArn", generate_arn_for_table(*schema));
+    rjson::add(table_description, "TableId", rjson::from_string(schema->id().to_sstring()));
+    // FIXME: Instead of hardcoding, we should take into account which mode was chosen
+    // when the table was created. But, Spark jobs expect something to be returned
+    // and PAY_PER_REQUEST seems closer to reality than PROVISIONED.
+    rjson::add(table_description, "BillingModeSummary", rjson::empty_object());
+    rjson::add(table_description["BillingModeSummary"], "BillingMode", "PAY_PER_REQUEST");
+    rjson::add(table_description["BillingModeSummary"], "LastUpdateToPayPerRequestDateTime", rjson::value(creation_date_seconds));
+    // In PAY_PER_REQUEST billing mode, provisioned capacity should return 0
+    rjson::add(table_description, "ProvisionedThroughput", rjson::empty_object());
+    rjson::add(table_description["ProvisionedThroughput"], "ReadCapacityUnits", 0);
+    rjson::add(table_description["ProvisionedThroughput"], "WriteCapacityUnits", 0);
+    rjson::add(table_description["ProvisionedThroughput"], "NumberOfDecreasesToday", 0);
+
+    std::unordered_map<std::string,std::string> key_attribute_types;
+    // Add base table's KeySchema and collect types for AttributeDefinitions:
+    describe_key_schema(table_description, *schema, key_attribute_types);
+
+    data_dictionary::table t = _proxy.data_dictionary().find_column_family(schema);
+    if (!t.views().empty()) {
+        rjson::value gsi_array = rjson::empty_array();
+        rjson::value lsi_array = rjson::empty_array();
+        for (const view_ptr& vptr : t.views()) {
+            rjson::value view_entry = rjson::empty_object();
+            const sstring& cf_name = vptr->cf_name();
+            size_t delim_it = cf_name.find(':');
+            if (delim_it == sstring::npos) {
+                elogger.error("Invalid internal index table name: {}", cf_name);
+                continue;
+            }
+            sstring index_name = cf_name.substr(delim_it + 1);
+            rjson::add(view_entry, "IndexName", rjson::from_string(index_name));
+            rjson::add(view_entry, "IndexArn", generate_arn_for_index(*schema, index_name));
+            // Add indexes's KeySchema and collect types for AttributeDefinitions:
+            describe_key_schema(view_entry, *vptr, key_attribute_types);
+            // Add projection type
+            rjson::value projection = rjson::empty_object();
+            rjson::add(projection, "ProjectionType", "ALL");
+            // FIXME: we have to get ProjectionType from the schema when it is added
+            rjson::add(view_entry, "Projection", std::move(projection));
+            // Local secondary indexes are marked by an extra '!' sign occurring before the ':' delimiter
+            rjson::value& index_array = (delim_it > 1 && cf_name[delim_it-1] == '!') ? lsi_array : gsi_array;
+            rjson::push_back(index_array, std::move(view_entry));
+        }
+        if (!lsi_array.Empty()) {
+            rjson::add(table_description, "LocalSecondaryIndexes", std::move(lsi_array));
+        }
+        if (!gsi_array.Empty()) {
+            rjson::add(table_description, "GlobalSecondaryIndexes", std::move(gsi_array));
+        }
+    }
+    // Use map built by describe_key_schema() for base and indexes to produce
+    // AttributeDefinitions for all key columns:
+    rjson::value attribute_definitions = rjson::empty_array();
+    for (auto& type : key_attribute_types) {
+        rjson::value key = rjson::empty_object();
+        rjson::add(key, "AttributeName", rjson::from_string(type.first));
+        rjson::add(key, "AttributeType", rjson::from_string(type.second));
+        rjson::push_back(attribute_definitions, std::move(key));
+    }
+    rjson::add(table_description, "AttributeDefinitions", std::move(attribute_definitions));
+
+    supplement_table_stream_info(table_description, *schema, _proxy);
+    
+    // FIXME: still missing some response fields (issue #5026)
+
    rjson::value response = rjson::empty_object();
    rjson::add(response, "Table", std::move(table_description));
    elogger.trace("returning {}", response);
@@ -555,17 +521,10 @@ future<executor::request_return_type> executor::delete_table(client_state& clien
    elogger.trace("Deleting table {}", request);

    std::string table_name = get_table_name(request);
-    // DynamoDB returns validation error even when table does not exist
-    // and the table name is invalid.
-    validate_table_name(table_name);
-
    std::string keyspace_name = executor::KEYSPACE_NAME_PREFIX + table_name;
    tracing::add_table_name(trace_state, keyspace_name, table_name);
    auto& p = _proxy.container();

-    schema_ptr schema = get_table(_proxy, request);
-    rjson::value table_description = fill_table_description(schema, table_status::deleting, _proxy);
-
    co_await _mm.container().invoke_on(0, [&] (service::migration_manager& mm) -> future<> {
        // FIXME: the following needs to be in a loop. If mm.announce() below
        // fails, we need to retry the whole thing.
@@ -575,14 +534,18 @@ future<executor::request_return_type> executor::delete_table(client_state& clien
            throw api_error::resource_not_found(format("Requested resource not found: Table: {} not found", table_name));
        }

-        auto m = co_await service::prepare_column_family_drop_announcement(_proxy, keyspace_name, table_name, group0_guard.write_timestamp(), service::drop_views::yes);
-        auto m2 = co_await service::prepare_keyspace_drop_announcement(_proxy.local_db(), keyspace_name, group0_guard.write_timestamp());
+        auto m = co_await mm.prepare_column_family_drop_announcement(keyspace_name, table_name, group0_guard.write_timestamp(), service::migration_manager::drop_views::yes);
+        auto m2 = mm.prepare_keyspace_drop_announcement(keyspace_name, group0_guard.write_timestamp());

        std::move(m2.begin(), m2.end(), std::back_inserter(m));

-        co_await mm.announce(std::move(m), std::move(group0_guard), format("alternator-executor: delete {} table", table_name));
+        co_await mm.announce(std::move(m), std::move(group0_guard));
    });

+    // FIXME: need more attributes?
+    rjson::value table_description = rjson::empty_object();
+    rjson::add(table_description, "TableName", rjson::from_string(table_name));
+    rjson::add(table_description, "TableStatus", "DELETING");
    rjson::value response = rjson::empty_object();
    rjson::add(response, "TableDescription", std::move(table_description));
    elogger.trace("returning {}", response);
@@ -867,6 +830,17 @@ future<executor::request_return_type> executor::list_tags_of_resource(client_sta
    return make_ready_future<executor::request_return_type>(make_jsonable(std::move(ret)));
 }

+static future<> wait_for_schema_agreement(service::migration_manager& mm, db::timeout_clock::time_point deadline) {
+    return do_until([&mm, deadline] {
+        if (db::timeout_clock::now() > deadline) {
+            throw std::runtime_error("Unable to reach schema agreement");
+        }
+        return mm.have_schema_agreement();
+    }, [] {
+        return seastar::sleep(500ms);
+    });
+}
+
 static void verify_billing_mode(const rjson::value& request) {
        // Alternator does not yet support billing or throughput limitations, but
    // let's verify that BillingMode is at least legal.
@@ -884,38 +858,6 @@ static void verify_billing_mode(const rjson::value& request) {
    }
 }

-// Validate that a AttributeDefinitions parameter in CreateTable is valid, and
-// throws user-facing api_error::validation if it's not.
-// In particular, verify that the same AttributeName doesn't appear more than
-// once (Issue #13870).
-static void validate_attribute_definitions(const rjson::value& attribute_definitions){
-    if (!attribute_definitions.IsArray()) {
-        throw api_error::validation("AttributeDefinitions must be an array");
-    }
-    std::unordered_set<std::string> seen_attribute_names;
-    for (auto it = attribute_definitions.Begin(); it != attribute_definitions.End(); ++it) {
-        const rjson::value* attribute_name = rjson::find(*it, "AttributeName");
-        if (!attribute_name) {
-            throw api_error::validation("AttributeName missing in AttributeDefinitions");
-        }
-        if (!attribute_name->IsString()) {
-            throw api_error::validation("AttributeName in AttributeDefinitions must be a string");
-        }
-        auto [it2, added] = seen_attribute_names.emplace(rjson::to_string_view(*attribute_name));
-        if (!added) {
-            throw api_error::validation(format("Duplicate AttributeName={} in AttributeDefinitions",
-                rjson::to_string_view(*attribute_name)));
-        }
-        const rjson::value* attribute_type = rjson::find(*it, "AttributeType");
-        if (!attribute_type) {
-            throw api_error::validation("AttributeType missing in AttributeDefinitions");
-        }
-        if (!attribute_type->IsString()) {
-            throw api_error::validation("AttributeType in AttributeDefinitions must be a string");
-        }
-    }
-}
-
 static future<executor::request_return_type> create_table_on_shard0(tracing::trace_state_ptr trace_state, rjson::value request, service::storage_proxy& sp, service::migration_manager& mm, gms::gossiper& gossiper) {
    assert(this_shard_id() == 0);

@@ -924,14 +866,11 @@ static future<executor::request_return_type> create_table_on_shard0(tracing::tra
    // (e.g., verify that this table doesn't already exist) - we can only
    // do this further down - after taking group0_guard.
    std::string table_name = get_table_name(request);
-    validate_table_name(table_name);
-
    if (table_name.find(executor::INTERNAL_TABLE_PREFIX) == 0) {
        co_return api_error::validation(format("Prefix {} is reserved for accessing internal tables", executor::INTERNAL_TABLE_PREFIX));
    }
    std::string keyspace_name = executor::KEYSPACE_NAME_PREFIX + table_name;
    const rjson::value& attribute_definitions = request["AttributeDefinitions"];
-    validate_attribute_definitions(attribute_definitions);

    tracing::add_table_name(trace_state, keyspace_name, table_name);

@@ -1121,9 +1060,8 @@ static future<executor::request_return_type> create_table_on_shard0(tracing::tra
    auto group0_guard = co_await mm.start_group0_operation();
    auto ts = group0_guard.write_timestamp();
    std::vector<mutation> schema_mutations;
-    auto ksm = create_keyspace_metadata(keyspace_name, sp, gossiper, ts);
    try {
-        schema_mutations = service::prepare_new_keyspace_announcement(sp.local_db(), ksm, ts);
+        schema_mutations = co_await create_keyspace(keyspace_name, sp, mm, gossiper, ts);
    } catch (exceptions::already_exists_exception&) {
        if (sp.data_dictionary().has_schema(keyspace_name, table_name)) {
            co_return api_error::resource_in_use(format("Table {} already exists", table_name));
@@ -1133,14 +1071,22 @@ static future<executor::request_return_type> create_table_on_shard0(tracing::tra
        // This should never happen, the ID is supposed to be unique
        co_return api_error::internal(format("Table with ID {} already exists", schema->id()));
    }
-    co_await service::prepare_new_column_family_announcement(schema_mutations, sp, *ksm, schema, ts);
+    db::schema_tables::add_table_or_view_to_schema_mutation(schema, ts, true, schema_mutations);
+    // we must call before_create_column_family callbacks - which allow
+    // listeners to modify our schema_mutations. For example, CDC may add
+    // another table (the CDC log table) to the same keyspace.
+    // Unfortunately the convention is that this callback must be run in
+    // a Seastar thread.
+    co_await seastar::async([&] {
+        mm.get_notifier().before_create_column_family(*schema, schema_mutations, ts);
+    });
    for (schema_builder& view_builder : view_builders) {
        db::schema_tables::add_table_or_view_to_schema_mutation(
            view_ptr(view_builder.build()), ts, true, schema_mutations);
    }
-    co_await mm.announce(std::move(schema_mutations), std::move(group0_guard), format("alternator-executor: create {} table", table_name));
+    co_await mm.announce(std::move(schema_mutations), std::move(group0_guard));

-    co_await mm.wait_for_schema_agreement(sp.local_db(), db::timeout_clock::now() + 10s, nullptr);
+    co_await wait_for_schema_agreement(mm, db::timeout_clock::now() + 10s);
    rjson::value status = rjson::empty_object();
    executor::supplement_table_info(request, *schema, sp);
    rjson::add(status, "TableDescription", std::move(request));
@@ -1203,11 +1149,11 @@ future<executor::request_return_type> executor::update_table(client_state& clien

        auto schema = builder.build();

-        auto m = co_await service::prepare_column_family_update_announcement(p.local(), schema, false,  std::vector<view_ptr>(), group0_guard.write_timestamp());
+        auto m = co_await mm.prepare_column_family_update_announcement(schema, false,  std::vector<view_ptr>(), group0_guard.write_timestamp());

-        co_await mm.announce(std::move(m), std::move(group0_guard), format("alternator-executor: update {} table", tab->cf_name()));
+        co_await mm.announce(std::move(m), std::move(group0_guard));

-        co_await mm.wait_for_schema_agreement(p.local().local_db(), db::timeout_clock::now() + 10s, nullptr);
+        co_await wait_for_schema_agreement(mm, db::timeout_clock::now() + 10s);

        rjson::value status = rjson::empty_object();
        supplement_table_info(request, *schema, p.local());
@@ -1419,11 +1365,14 @@ mutation put_or_delete_item::build(schema_ptr schema, api::timestamp_type ts) co

 // The DynamoDB API doesn't let the client control the server's timeout, so
 // we have a global default_timeout() for Alternator requests. The value of
-// s_default_timeout_ms is overwritten in alternator::controller::start_server()
+// s_default_timeout is overwritten in alternator::controller::start_server()
 // based on the "alternator_timeout_in_ms" configuration parameter.
-thread_local utils::updateable_value<uint32_t> executor::s_default_timeout_in_ms{10'000};
+db::timeout_clock::duration executor::s_default_timeout = 10s;
+void executor::set_default_timeout(db::timeout_clock::duration timeout) {
+    s_default_timeout = timeout;
+}
 db::timeout_clock::time_point executor::default_timeout() {
-    return db::timeout_clock::now() + std::chrono::milliseconds(s_default_timeout_in_ms);
+    return db::timeout_clock::now() + s_default_timeout;
 }
        
 static future<std::unique_ptr<rjson::value>> get_previous_item(
@@ -1643,7 +1592,7 @@ static parsed::condition_expression get_parsed_condition_expression(rjson::value
        throw api_error::validation("ConditionExpression must not be empty");
    }
    try {
-        return parse_condition_expression(rjson::to_string_view(*condition_expression), "ConditionExpression");
+        return parse_condition_expression(rjson::to_string_view(*condition_expression));
    } catch(expressions_syntax_error& e) {
        throw api_error::validation(e.what());
    }
@@ -1658,16 +1607,17 @@ static bool check_needs_read_before_write(const parsed::condition_expression& co

 // Fail the expression if it has unused attribute names or values. This is
 // how DynamoDB behaves, so we do too.
-static void verify_all_are_used(const rjson::value* field,
-        const std::unordered_set<std::string>& used, const char* field_name, const char* operation) {
-    if (!field) {
+static void verify_all_are_used(const rjson::value& req, const char* field,
+        const std::unordered_set<std::string>& used, const char* operation) {
+    const rjson::value* attribute_names = rjson::find(req, field);
+    if (!attribute_names) {
        return;
    }
-    for (auto it = field->MemberBegin(); it != field->MemberEnd(); ++it) {
+    for (auto it = attribute_names->MemberBegin(); it != attribute_names->MemberEnd(); ++it) {
        if (!used.contains(it->name.GetString())) {
            throw api_error::validation(
                format("{} has spurious '{}', not used in {}",
-                    field_name, it->name.GetString(), operation));
+                       field, it->name.GetString(), operation));
        }
    }
 }
@@ -1694,8 +1644,8 @@ public:
            resolve_condition_expression(_condition_expression,
                    expression_attribute_names, expression_attribute_values,
                    used_attribute_names, used_attribute_values);
-            verify_all_are_used(expression_attribute_names, used_attribute_names,"ExpressionAttributeNames", "PutItem");
-            verify_all_are_used(expression_attribute_values, used_attribute_values,"ExpressionAttributeValues", "PutItem");
+            verify_all_are_used(_request, "ExpressionAttributeNames", used_attribute_names, "PutItem");
+            verify_all_are_used(_request, "ExpressionAttributeValues", used_attribute_values, "PutItem");
        } else {
            if (expression_attribute_names) {
                throw api_error::validation("ExpressionAttributeNames cannot be used without ConditionExpression");
@@ -1779,8 +1729,8 @@ public:
            resolve_condition_expression(_condition_expression,
                    expression_attribute_names, expression_attribute_values,
                    used_attribute_names, used_attribute_values);
-            verify_all_are_used(expression_attribute_names, used_attribute_names,"ExpressionAttributeNames", "DeleteItem");
-            verify_all_are_used(expression_attribute_values, used_attribute_values, "ExpressionAttributeValues", "DeleteItem");
+            verify_all_are_used(_request, "ExpressionAttributeNames", used_attribute_names, "DeleteItem");
+            verify_all_are_used(_request, "ExpressionAttributeValues", used_attribute_values, "DeleteItem");
        } else {
            if (expression_attribute_names) {
                throw api_error::validation("ExpressionAttributeNames cannot be used without ConditionExpression");
@@ -2350,14 +2300,14 @@ static std::optional<attrs_to_get> calculate_attrs_to_get(const rjson::value& re
 * as before.
 */ 
 void executor::describe_single_item(const cql3::selection::selection& selection,
-    const std::vector<managed_bytes_opt>& result_row,
+    const std::vector<bytes_opt>& result_row,
    const std::optional<attrs_to_get>& attrs_to_get,
    rjson::value& item,
    bool include_all_embedded_attributes) 
 {
    const auto& columns = selection.get_columns();
    auto column_it = columns.begin();
-    for (const managed_bytes_opt& cell : result_row) {
+    for (const bytes_opt& cell : result_row) {
        std::string column_name = (*column_it)->name_as_text();
        if (cell && column_name != executor::ATTRS_COLUMN_NAME) {
            if (!attrs_to_get || attrs_to_get->contains(column_name)) {
@@ -2365,9 +2315,7 @@ void executor::describe_single_item(const cql3::selection::selection& selection,
                // so add() makes sense
                rjson::add_with_string_name(item, column_name, rjson::empty_object());
                rjson::value& field = item[column_name.c_str()];
-                cell->with_linearized([&] (bytes_view linearized_cell) {
-                    rjson::add_with_string_name(field, type_to_string((*column_it)->type), json_key_column_value(linearized_cell, **column_it));
-                });
+                rjson::add_with_string_name(field, type_to_string((*column_it)->type), json_key_column_value(*cell, **column_it));
            }
        } else if (cell) {
            auto deserialized = attrs_type()->deserialize(*cell);
@@ -2423,22 +2371,21 @@ std::optional<rjson::value> executor::describe_single_item(schema_ptr schema,
    return item;
 }

-future<std::vector<rjson::value>> executor::describe_multi_item(schema_ptr schema,
-        const query::partition_slice&& slice,
-        shared_ptr<cql3::selection::selection> selection,
-        foreign_ptr<lw_shared_ptr<query::result>> query_result,
-        shared_ptr<const std::optional<attrs_to_get>> attrs_to_get) {
-    cql3::selection::result_set_builder builder(*selection, gc_clock::now());
-    query::result_view::consume(*query_result, slice, cql3::selection::result_set_builder::visitor(builder, *schema, *selection));
+std::vector<rjson::value> executor::describe_multi_item(schema_ptr schema,
+        const query::partition_slice& slice,
+        const cql3::selection::selection& selection,
+        const query::result& query_result,
+        const std::optional<attrs_to_get>& attrs_to_get) {
+    cql3::selection::result_set_builder builder(selection, gc_clock::now());
+    query::result_view::consume(query_result, slice, cql3::selection::result_set_builder::visitor(builder, *schema, selection));
    auto result_set = builder.build();
    std::vector<rjson::value> ret;
    for (auto& result_row : result_set->rows()) {
        rjson::value item = rjson::empty_object();
-        describe_single_item(*selection, result_row, *attrs_to_get, item);
+        describe_single_item(selection, result_row, attrs_to_get, item);
        ret.push_back(std::move(item));
-        co_await coroutine::maybe_yield();
    }
-    co_return ret;
+    return ret;
 }

 static bool check_needs_read_before_write(const parsed::value& v) {
@@ -2553,8 +2500,8 @@ update_item_operation::update_item_operation(service::storage_proxy& proxy, rjso
            expression_attribute_names, expression_attribute_values,
            used_attribute_names, used_attribute_values);

-    verify_all_are_used(expression_attribute_names, used_attribute_names, "ExpressionAttributeNames", "UpdateItem");
-    verify_all_are_used(expression_attribute_values, used_attribute_values, "ExpressionAttributeValues", "UpdateItem");
+    verify_all_are_used(_request, "ExpressionAttributeNames", used_attribute_names, "UpdateItem");
+    verify_all_are_used(_request, "ExpressionAttributeValues", used_attribute_values, "UpdateItem");

    // DynamoDB forbids having both old-style AttributeUpdates or Expected
    // and new-style UpdateExpression or ConditionExpression in the same request
@@ -3163,8 +3110,7 @@ future<executor::request_return_type> executor::get_item(client_state& client_st

    std::unordered_set<std::string> used_attribute_names;
    auto attrs_to_get = calculate_attrs_to_get(request, used_attribute_names);
-    const rjson::value* expression_attribute_names = rjson::find(request, "ExpressionAttributeNames");
-    verify_all_are_used(expression_attribute_names, used_attribute_names, "ExpressionAttributeNames", "GetItem");
+    verify_all_are_used(request, "ExpressionAttributeNames", used_attribute_names, "GetItem");

    return _proxy.query(schema, std::move(command), std::move(partition_ranges), cl,
            service::storage_proxy::coordinator_query_options(executor::default_timeout(), std::move(permit), client_state, trace_state)).then(
@@ -3275,8 +3221,7 @@ future<executor::request_return_type> executor::batch_get_item(client_state& cli
        rs.cl = get_read_consistency(it->value);
        std::unordered_set<std::string> used_attribute_names;
        rs.attrs_to_get = ::make_shared<const std::optional<attrs_to_get>>(calculate_attrs_to_get(it->value, used_attribute_names));
-        const rjson::value* expression_attribute_names = rjson::find(it->value, "ExpressionAttributeNames");
-        verify_all_are_used(expression_attribute_names, used_attribute_names,"ExpressionAttributeNames", "GetItem");
+        verify_all_are_used(request, "ExpressionAttributeNames", used_attribute_names, "GetItem");
        auto& keys = (it->value)["Keys"];
        for (rjson::value& key : keys.GetArray()) {
            rs.add(key);
@@ -3312,7 +3257,8 @@ future<executor::request_return_type> executor::batch_get_item(client_state& cli
                    service::storage_proxy::coordinator_query_options(executor::default_timeout(), permit, client_state, trace_state)).then(
                    [schema = rs.schema, partition_slice = std::move(partition_slice), selection = std::move(selection), attrs_to_get = rs.attrs_to_get] (service::storage_proxy::coordinator_query_result qr) mutable {
                utils::get_local_injector().inject("alternator_batch_get_item", [] { throw std::runtime_error("batch_get_item injection"); });
-                return describe_multi_item(std::move(schema), std::move(partition_slice), std::move(selection), std::move(qr.query_result), std::move(attrs_to_get));
+                std::vector<rjson::value> jsons = describe_multi_item(schema, partition_slice, *selection, *qr.query_result, *attrs_to_get);
+                return make_ready_future<std::vector<rjson::value>>(std::move(jsons));
            });
            response_futures.push_back(std::move(f));
        }
@@ -3445,7 +3391,7 @@ filter::filter(const rjson::value& request, request_type rt,
            throw api_error::validation("Cannot use both old-style and new-style parameters in same request: FilterExpression and AttributesToGet");
        }
        try {
-            auto parsed = parse_condition_expression(rjson::to_string_view(*expression), "FilterExpression");
+            auto parsed = parse_condition_expression(rjson::to_string_view(*expression));
            const rjson::value* expression_attribute_names = rjson::find(request, "ExpressionAttributeNames");
            const rjson::value* expression_attribute_values = rjson::find(request, "ExpressionAttributeValues");
            resolve_condition_expression(parsed,
@@ -3552,7 +3498,7 @@ public:
        _column_it = _columns.begin();
    }

-    void accept_value(managed_bytes_view_opt result_bytes_view) {
+    void accept_value(const std::optional<query::result_bytes_view>& result_bytes_view) {
        if (!result_bytes_view) {
            ++_column_it;
            return;
@@ -3849,10 +3795,8 @@ future<executor::request_return_type> executor::scan(client_state& client_state,
    // optimized the filtering by modifying partition_ranges and/or
    // ck_bounds. We haven't done this optimization yet.

-    const rjson::value* expression_attribute_names = rjson::find(request, "ExpressionAttributeNames");
-    const rjson::value* expression_attribute_values = rjson::find(request, "ExpressionAttributeValues");
-    verify_all_are_used(expression_attribute_names, used_attribute_names, "ExpressionAttributeNames", "Scan");
-    verify_all_are_used(expression_attribute_values, used_attribute_values, "ExpressionAttributeValues", "Scan");
+    verify_all_are_used(request, "ExpressionAttributeNames", used_attribute_names, "Scan");
+    verify_all_are_used(request, "ExpressionAttributeValues", used_attribute_values, "Scan");

    return do_query(_proxy, schema, exclusive_start_key, std::move(partition_ranges), std::move(ck_bounds), std::move(attrs_to_get), limit, cl,
            std::move(filter), query::partition_slice::option_set(), client_state, _stats.cql_stats, trace_state, std::move(permit));
@@ -4073,7 +4017,7 @@ calculate_bounds_condition_expression(schema_ptr schema,
    // sort-key range.
    parsed::condition_expression p;
    try {
-        p = parse_condition_expression(rjson::to_string_view(expression), "KeyConditionExpression");
+        p = parse_condition_expression(rjson::to_string_view(expression));
    } catch(expressions_syntax_error& e) {
        throw api_error::validation(e.what());
    }
@@ -4293,17 +4237,13 @@ future<executor::request_return_type> executor::query(client_state& client_state
        throw api_error::validation("Query must have one of "
                "KeyConditions or KeyConditionExpression");
    }
-
-    const rjson::value* expression_attribute_names = rjson::find(request, "ExpressionAttributeNames");
-    const rjson::value* expression_attribute_values = rjson::find(request, "ExpressionAttributeValues");
-
    // exactly one of key_conditions or key_condition_expression
    auto [partition_ranges, ck_bounds] = key_conditions
                ? calculate_bounds_conditions(schema, *key_conditions)
                : calculate_bounds_condition_expression(schema, *key_condition_expression,
-                        expression_attribute_values,
+                        rjson::find(request, "ExpressionAttributeValues"),
                        used_attribute_values,
-                        expression_attribute_names,
+                        rjson::find(request, "ExpressionAttributeNames"),
                        used_attribute_names);

    filter filter(request, filter::request_type::QUERY,
@@ -4330,8 +4270,8 @@ future<executor::request_return_type> executor::query(client_state& client_state
    select_type select = parse_select(request, table_type);

    auto attrs_to_get = calculate_attrs_to_get(request, used_attribute_names, select);
-    verify_all_are_used(expression_attribute_names, used_attribute_names, "ExpressionAttributeNames", "Query");
-    verify_all_are_used(expression_attribute_values, used_attribute_values, "ExpressionAttributeValues", "Query");
+    verify_all_are_used(request, "ExpressionAttributeValues", used_attribute_values, "Query");
+    verify_all_are_used(request, "ExpressionAttributeNames", used_attribute_names, "Query");
    query::partition_slice::option_set opts;
    opts.set_if<query::partition_slice::option::reversed>(!forward);
    return do_query(_proxy, schema, exclusive_start_key, std::move(partition_ranges), std::move(ck_bounds), std::move(attrs_to_get), limit, cl,
@@ -4392,17 +4332,6 @@ future<executor::request_return_type> executor::list_tables(client_state& client

 future<executor::request_return_type> executor::describe_endpoints(client_state& client_state, service_permit permit, rjson::value request, std::string host_header) {
    _stats.api_operations.describe_endpoints++;
-    // The alternator_describe_endpoints configuration can be used to disable
-    // the DescribeEndpoints operation, or set it to return a fixed string
-    std::string override = _proxy.data_dictionary().get_config().alternator_describe_endpoints();
-    if (!override.empty()) {
-        if (override == "disabled") {
-            _stats.unsupported_operations++;
-            return make_ready_future<request_return_type>(api_error::unknown_operation(
-                "DescribeEndpoints disabled by configuration (alternator_describe_endpoints=disabled)"));
-        }
-        host_header = std::move(override);
-    }
    rjson::value response = rjson::empty_object();
    // Without having any configuration parameter to say otherwise, we tell
    // the user to return to the same endpoint they used to reach us. The only
@@ -4440,10 +4369,6 @@ future<executor::request_return_type> executor::describe_continuous_backups(clie
    try {
        schema = _proxy.data_dictionary().find_schema(sstring(executor::KEYSPACE_NAME_PREFIX) + table_name, table_name);
    } catch(data_dictionary::no_such_column_family&) {
-        // DynamoDB returns validation error even when table does not exist
-        // and the table name is invalid.
-        validate_table_name(table_name);
-
        throw api_error::table_not_found(
                format("Table {} not found", table_name));
    }
@@ -4457,23 +4382,25 @@ future<executor::request_return_type> executor::describe_continuous_backups(clie
    co_return make_jsonable(std::move(response));
 }

-// Create the metadata for the keyspace in which we put the alternator
-// table if it doesn't already exist.
+// Create the keyspace in which we put the alternator table, if it doesn't
+// already exist.
 // Currently, we automatically configure the keyspace based on the number
 // of nodes in the cluster: A cluster with 3 or more live nodes, gets RF=3.
 // A smaller cluster (presumably, a test only), gets RF=1. The user may
 // manually create the keyspace to override this predefined behavior.
-static lw_shared_ptr<keyspace_metadata> create_keyspace_metadata(std::string_view keyspace_name, service::storage_proxy& sp, gms::gossiper& gossiper, api::timestamp_type ts) {
-    int endpoint_count = gossiper.num_endpoints();
+static future<std::vector<mutation>> create_keyspace(std::string_view keyspace_name, service::storage_proxy& sp, service::migration_manager& mm, gms::gossiper& gossiper, api::timestamp_type ts) {
+    sstring keyspace_name_str(keyspace_name);
+    int endpoint_count = gossiper.get_endpoint_states().size();
    int rf = 3;
    if (endpoint_count < rf) {
        rf = 1;
        elogger.warn("Creating keyspace '{}' for Alternator with unsafe RF={} because cluster only has {} nodes.",
-                keyspace_name, rf, endpoint_count);
+                keyspace_name_str, rf, endpoint_count);
    }
    auto opts = get_network_topology_options(sp, gossiper, rf);
+    auto ksm = keyspace_metadata::new_keyspace(keyspace_name_str, "org.apache.cassandra.locator.NetworkTopologyStrategy", std::move(opts), true);

-    return keyspace_metadata::new_keyspace(keyspace_name, "org.apache.cassandra.locator.NetworkTopologyStrategy", std::move(opts), true);
+    co_return mm.prepare_new_keyspace_announcement(ksm, ts);
 }

 future<> executor::start() {
--- a/alternator/executor.hh
+++ b/alternator/executor.hh
@@ -22,7 +22,6 @@
 #include "alternator/error.hh"
 #include "stats.hh"
 #include "utils/rjson.hh"
-#include "utils/updateable_value.hh"

 namespace db {
    class system_distributed_keyspace;
@@ -171,16 +170,8 @@ public:
    static constexpr auto KEYSPACE_NAME_PREFIX = "alternator_";
    static constexpr std::string_view INTERNAL_TABLE_PREFIX = ".scylla.alternator.";

-    executor(gms::gossiper& gossiper,
-             service::storage_proxy& proxy,
-             service::migration_manager& mm,
-             db::system_distributed_keyspace& sdks,
-             cdc::metadata& cdc_metadata,
-             smp_service_group ssg,
-             utils::updateable_value<uint32_t> default_timeout_in_ms)
-        : _gossiper(gossiper), _proxy(proxy), _mm(mm), _sdks(sdks), _cdc_metadata(cdc_metadata), _ssg(ssg) {
-        s_default_timeout_in_ms = std::move(default_timeout_in_ms);
-    }
+    executor(gms::gossiper& gossiper, service::storage_proxy& proxy, service::migration_manager& mm, db::system_distributed_keyspace& sdks, cdc::metadata& cdc_metadata, smp_service_group ssg)
+        : _gossiper(gossiper), _proxy(proxy), _mm(mm), _sdks(sdks), _cdc_metadata(cdc_metadata), _ssg(ssg) {}

    future<request_return_type> create_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
    future<request_return_type> describe_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
@@ -208,16 +199,13 @@ public:
    future<request_return_type> describe_continuous_backups(client_state& client_state, service_permit permit, rjson::value request);

    future<> start();
-    future<> stop() {
-        // disconnect from the value source, but keep the value unchanged.
-        s_default_timeout_in_ms = utils::updateable_value<uint32_t>{s_default_timeout_in_ms()};
-        return make_ready_future<>();
-    }
+    future<> stop() { return make_ready_future<>(); }

    static sstring table_name(const schema&);
    static db::timeout_clock::time_point default_timeout();
+    static void set_default_timeout(db::timeout_clock::duration timeout);
 private:
-    static thread_local utils::updateable_value<uint32_t> s_default_timeout_in_ms;
+    static db::timeout_clock::duration s_default_timeout;
 public:
    static schema_ptr find_table(service::storage_proxy&, const rjson::value& request);

@@ -225,31 +213,30 @@ private:
    friend class rmw_operation;

    static void describe_key_schema(rjson::value& parent, const schema&, std::unordered_map<std::string,std::string> * = nullptr);
+    static void describe_key_schema(rjson::value& parent, const schema& schema, std::unordered_map<std::string,std::string>&);
    
 public:
-    static void describe_key_schema(rjson::value& parent, const schema& schema, std::unordered_map<std::string,std::string>&);
-
    static std::optional<rjson::value> describe_single_item(schema_ptr,
        const query::partition_slice&,
        const cql3::selection::selection&,
        const query::result&,
        const std::optional<attrs_to_get>&);

-    static future<std::vector<rjson::value>> describe_multi_item(schema_ptr schema,
-        const query::partition_slice&& slice,
-        shared_ptr<cql3::selection::selection> selection,
-        foreign_ptr<lw_shared_ptr<query::result>> query_result,
-        shared_ptr<const std::optional<attrs_to_get>> attrs_to_get);
+    static std::vector<rjson::value> describe_multi_item(schema_ptr schema,
+        const query::partition_slice& slice,
+        const cql3::selection::selection& selection,
+        const query::result& query_result,
+        const std::optional<attrs_to_get>& attrs_to_get);

    static void describe_single_item(const cql3::selection::selection&,
-        const std::vector<managed_bytes_opt>&,
+        const std::vector<bytes_opt>&,
        const std::optional<attrs_to_get>&,
        rjson::value&,
        bool = false);

    static void add_stream_options(const rjson::value& stream_spec, schema_builder&, service::storage_proxy& sp);
    static void supplement_table_info(rjson::value& descr, const schema& schema, service::storage_proxy& sp);
-    static void supplement_table_stream_info(rjson::value& descr, const schema& schema, const service::storage_proxy& sp);
+    static void supplement_table_stream_info(rjson::value& descr, const schema& schema, service::storage_proxy& sp);
 };

 // is_big() checks approximately if the given JSON value is "bigger" than
--- a/alternator/expressions.cc
+++ b/alternator/expressions.cc
@@ -29,7 +29,7 @@
 namespace alternator {

 template <typename Func, typename Result = std::result_of_t<Func(expressionsParser&)>>
-static Result do_with_parser(std::string_view input, Func&& f) {
+Result do_with_parser(std::string_view input, Func&& f) {
    expressionsLexer::InputStreamType input_stream{
        reinterpret_cast<const ANTLR_UINT8*>(input.data()),
        ANTLR_ENC_UTF8,
@@ -43,41 +43,31 @@ static Result do_with_parser(std::string_view input, Func&& f) {
    return result;
 }

-template <typename Func, typename Result = std::result_of_t<Func(expressionsParser&)>>
-static Result parse(const char* input_name, std::string_view input, Func&& f) {
-    if (input.length() > 4096) {
-        throw expressions_syntax_error(format("{} expression size {} exceeds allowed maximum 4096.",
-            input_name, input.length()));
-    }
-    try {
-        return do_with_parser(input, f);
-    } catch (expressions_syntax_error& e) {
-        // If already an expressions_syntax_error, don't print the type's
-        // name (it's just ugly), just the message.
-        // TODO: displayRecognitionError could set a position inside the
-        // expressions_syntax_error in throws, and we could use it here to
-        // mark the broken position in 'input'.
-        throw expressions_syntax_error(format("Failed parsing {} '{}': {}",
-            input_name, input, e.what()));
-    } catch (...) {
-        throw expressions_syntax_error(format("Failed parsing {} '{}': {}",
-            input_name, input, std::current_exception()));
-    }
-}
-
 parsed::update_expression
 parse_update_expression(std::string_view query) {
-    return parse("UpdateExpression", query,  std::mem_fn(&expressionsParser::update_expression));
+    try {
+        return do_with_parser(query,  std::mem_fn(&expressionsParser::update_expression));
+    } catch (...) {
+        throw expressions_syntax_error(format("Failed parsing UpdateExpression '{}': {}", query, std::current_exception()));
+    }
 }

 std::vector<parsed::path>
 parse_projection_expression(std::string_view query) {
-    return parse ("ProjectionExpression", query,  std::mem_fn(&expressionsParser::projection_expression));
+    try {
+        return do_with_parser(query,  std::mem_fn(&expressionsParser::projection_expression));
+    } catch (...) {
+        throw expressions_syntax_error(format("Failed parsing ProjectionExpression '{}': {}", query, std::current_exception()));
+    }
 }

 parsed::condition_expression
-parse_condition_expression(std::string_view query, const char* caller) {
-    return parse(caller, query,  std::mem_fn(&expressionsParser::condition_expression));
+parse_condition_expression(std::string_view query) {
+    try {
+        return do_with_parser(query,  std::mem_fn(&expressionsParser::condition_expression));
+    } catch (...) {
+        throw expressions_syntax_error(format("Failed parsing ConditionExpression '{}': {}", query, std::current_exception()));
+    }
 }

 namespace parsed {
@@ -428,14 +418,9 @@ void for_condition_expression_on(const parsed::condition_expression& ce, const n
 // calculate_size() is ConditionExpression's size() function, i.e., it takes
 // a JSON-encoded value and returns its "size" as defined differently for the
 // different types - also as a JSON-encoded number.
-// If the value's type (e.g. number) has no size defined, there are two cases:
-// 1. If from_data (the value came directly from an attribute of the data),
-//    It returns a JSON-encoded "null" value. Comparisons against this
-//    non-numeric value will later fail, so eventually the application will
-//    get a ConditionalCheckFailedException.
-// 2. Otherwise (the value came from a constant in the query or some other
-//    calculation), throw a ValidationException.
-static rjson::value calculate_size(const rjson::value& v, bool from_data) {
+// It return a JSON-encoded "null" value if this value's type has no size
+// defined. Comparisons against this non-numeric value will later fail.
+static rjson::value calculate_size(const rjson::value& v) {
    // NOTE: If v is improperly formatted for our JSON value encoding, it
    // must come from the request itself, not from the database, so it makes
    // sense to throw a ValidationException if we see such a problem.
@@ -464,12 +449,10 @@ static rjson::value calculate_size(const rjson::value& v, bool from_data) {
            throw api_error::validation(format("invalid byte string: {}", v));
        }
        ret = base64_decoded_len(rjson::to_string_view(it->value));
-    } else if (from_data) {
+    } else {
        rjson::value json_ret = rjson::empty_object();
        rjson::add(json_ret, "null", rjson::value(true));
        return json_ret;
-    } else {
-        throw api_error::validation(format("Unsupported operand type {} for function size()", it->name));
    }
    rjson::value json_ret = rjson::empty_object();
    rjson::add(json_ret, "N", rjson::from_string(std::to_string(ret)));
@@ -551,7 +534,7 @@ std::unordered_map<std::string_view, function_handler_type*> function_handlers {
                        format("{}: size() accepts 1 parameter, got {}", caller, f._parameters.size()));
            }
            rjson::value v = calculate_value(f._parameters[0], caller, previous_item);
-            return calculate_size(v, f._parameters[0].is_path());
+            return calculate_size(v);
        }
    },
    {"attribute_exists", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
@@ -679,7 +662,7 @@ static rjson::value extract_path(const rjson::value* item,
            // objects. But today Alternator does not validate the structure
            // of nested documents before storing them, so this can happen on
            // read.
-            throw api_error::validation(format("{}: malformed item read: {}", caller, *item));
+            throw api_error::validation(format("{}: malformed item read: {}", *item));
        }
        const char* type = v->MemberBegin()->name.GetString();
        v = &(v->MemberBegin()->value);
--- a/alternator/expressions.g
+++ b/alternator/expressions.g
@@ -74,22 +74,7 @@ options {
 */
@parser::context {
    void displayRecognitionError(ANTLR_UINT8** token_names, ExceptionBaseType* ex) {
-        const char* err;
-        switch (ex->getType()) {
-        case antlr3::ExceptionType::FAILED_PREDICATE_EXCEPTION:
-            err = "expression nested too deeply";
-            break;
-        default:
-            err = "syntax error";
-            break;
-        }
-        // Alternator expressions are always single line so ex->get_line()
-        // is always 1, no sense to print it.
-        // TODO: return the position as part of the exception, so the
-        // caller in expressions.cc that knows the expression string can
-        // mark the error position in the final error message.
-        throw expressions_syntax_error(format("{} at char {}", err,
-            ex->get_charPositionInLine()));
+        throw expressions_syntax_error("syntax error");
    }
 }
@lexer::context {
@@ -98,23 +83,6 @@ options {
    }
 }

-/* Unfortunately, ANTLR uses recursion - not the heap - to parse recursive
- * expressions. To make things even worse, ANTLR has no way to limit the
- * depth of this recursion (unlike Yacc which has YYMAXDEPTH). So deeply-
- * nested expression like "(((((((((((((..." can easily crash Scylla on a
- * stack overflow (see issue #14477).
- *
- * We are lucky that in the grammar for DynamoDB expressions (below),
- * only a few specific rules can recurse, so it was fairly easy to add a
- * "depth" counter to a few specific rules, and then use a predicate
- * "{depth<MAX_DEPTH}?" to avoid parsing if the depth exceeds this limit,
- * and throw a FAILED_PREDICATE_EXCEPTION in that case, which we will
- * report to the user as a "expression nested too deeply" error.
- */
-@parser::members {
-    static constexpr int MAX_DEPTH = 400;
-}
-
 /*
 * Lexical analysis phase, i.e., splitting the input up to tokens.
 * Lexical analyzer rules have names starting in capital letters.
@@ -187,20 +155,19 @@ path returns [parsed::path p]:
      | '[' INTEGER ']'           { $p.add_index(std::stoi($INTEGER.text)); }
    )*;

-/* See comment above why the "depth" counter was needed here */
-value[int depth] returns [parsed::value v]:
+value returns [parsed::value v]:
      VALREF       { $v.set_valref($VALREF.text); }
    | path         { $v.set_path($path.p); }
-    | {depth<MAX_DEPTH}? NAME { $v.set_func_name($NAME.text); }
-     '(' x=value[depth+1]    { $v.add_func_parameter($x.v); }
-     (',' x=value[depth+1]   { $v.add_func_parameter($x.v); })*
+    | NAME         { $v.set_func_name($NAME.text); }
+     '(' x=value   { $v.add_func_parameter($x.v); }
+     (',' x=value  { $v.add_func_parameter($x.v); })*
     ')'
    ;

 update_expression_set_rhs returns [parsed::set_rhs rhs]:
-    v=value[0]  { $rhs.set_value(std::move($v.v)); }
-    (   '+' v=value[0]  { $rhs.set_plus(std::move($v.v)); }
-      | '-' v=value[0]  { $rhs.set_minus(std::move($v.v)); }
+    v=value  { $rhs.set_value(std::move($v.v)); }
+    (   '+' v=value  { $rhs.set_plus(std::move($v.v)); }
+      | '-' v=value  { $rhs.set_minus(std::move($v.v)); }
    )?
    ;

@@ -238,7 +205,7 @@ projection_expression returns [std::vector<parsed::path> v]:


 primitive_condition returns [parsed::primitive_condition c]:
-      v=value[0]      { $c.add_value(std::move($v.v));
+      v=value         { $c.add_value(std::move($v.v));
                        $c.set_operator(parsed::primitive_condition::type::VALUE); }
      (  (  '='       { $c.set_operator(parsed::primitive_condition::type::EQ); }
          | '<' '>'   { $c.set_operator(parsed::primitive_condition::type::NE); }
@@ -247,14 +214,14 @@ primitive_condition returns [parsed::primitive_condition c]:
          | '>'       { $c.set_operator(parsed::primitive_condition::type::GT); }
          | '>' '='   { $c.set_operator(parsed::primitive_condition::type::GE); }
         )
-         v=value[0]   { $c.add_value(std::move($v.v)); }
+         v=value      { $c.add_value(std::move($v.v)); }
       | BETWEEN      { $c.set_operator(parsed::primitive_condition::type::BETWEEN); }
-         v=value[0]   { $c.add_value(std::move($v.v)); }
+         v=value      { $c.add_value(std::move($v.v)); }
         AND
-         v=value[0]   { $c.add_value(std::move($v.v)); }
+         v=value      { $c.add_value(std::move($v.v)); }
       | IN '('       { $c.set_operator(parsed::primitive_condition::type::IN); }
-         v=value[0]   { $c.add_value(std::move($v.v)); }
-         (',' v=value[0] { $c.add_value(std::move($v.v)); })*
+         v=value      { $c.add_value(std::move($v.v)); }
+         (',' v=value { $c.add_value(std::move($v.v)); })*
         ')'
      )?
    ;
@@ -264,20 +231,19 @@ primitive_condition returns [parsed::primitive_condition c]:
 // common rule prefixes, and (lack of) support for operator precedence.
 // These rules could have been written more clearly using a more powerful
 // parser generator - such as Yacc.
-// See comment above why the "depth" counter was needed here.
-boolean_expression[int depth] returns [parsed::condition_expression e]:
-	  b=boolean_expression_1[depth]       { $e.append(std::move($b.e), '|'); }
-	  (OR b=boolean_expression_1[depth]   { $e.append(std::move($b.e), '|'); } )*
+boolean_expression returns [parsed::condition_expression e]:
+	  b=boolean_expression_1       { $e.append(std::move($b.e), '|'); }
+	  (OR b=boolean_expression_1   { $e.append(std::move($b.e), '|'); } )*
 	;
-boolean_expression_1[int depth] returns [parsed::condition_expression e]:
-	  b=boolean_expression_2[depth]       { $e.append(std::move($b.e), '&'); }
-	  (AND b=boolean_expression_2[depth]  { $e.append(std::move($b.e), '&'); } )*
+boolean_expression_1 returns [parsed::condition_expression e]:
+	  b=boolean_expression_2       { $e.append(std::move($b.e), '&'); }
+	  (AND b=boolean_expression_2  { $e.append(std::move($b.e), '&'); } )*
 	;
-boolean_expression_2[int depth] returns [parsed::condition_expression e]:
+boolean_expression_2 returns [parsed::condition_expression e]:
 	  p=primitive_condition        { $e.set_primitive(std::move($p.c)); }
-	| {depth<MAX_DEPTH}? NOT b=boolean_expression_2[depth+1]   { $e = std::move($b.e); $e.apply_not(); }
-	| {depth<MAX_DEPTH}? '(' b=boolean_expression[depth+1] ')' { $e = std::move($b.e); }
+	| NOT b=boolean_expression_2   { $e = std::move($b.e); $e.apply_not(); }
+	| '(' b=boolean_expression ')' { $e = std::move($b.e); }
    ;

 condition_expression returns [parsed::condition_expression e]:
-    boolean_expression[0] { e=std::move($boolean_expression.e); } EOF;
+    boolean_expression { e=std::move($boolean_expression.e); } EOF;
--- a/alternator/expressions.hh
+++ b/alternator/expressions.hh
@@ -28,7 +28,7 @@ public:

 parsed::update_expression parse_update_expression(std::string_view query);
 std::vector<parsed::path> parse_projection_expression(std::string_view query);
-parsed::condition_expression parse_condition_expression(std::string_view query, const char* caller);
+parsed::condition_expression parse_condition_expression(std::string_view query);

 void resolve_update_expression(parsed::update_expression& ue,
        const rjson::value* expression_attribute_names,
--- a/alternator/serialization.cc
+++ b/alternator/serialization.cc
@@ -50,115 +50,6 @@ type_representation represent_type(alternator_type atype) {
    return it->second;
 }

-// Get the magnitude and precision of a big_decimal - as these concepts are
-// defined by DynamoDB - to allow us to enforce limits on those as explained
-// in ssue #6794. The "magnitude" of 9e123 is 123 and of -9e-123 is -123,
-// the "precision" of 12.34e56 is the number of significant digits - 4.
-//
-// Unfortunately it turned out to be quite difficult to take a big_decimal and
-// calculate its magnitude and precision from its scale() and unscaled_value().
-// So in the following ugly implementation we calculate them from the string
-// representation instead. We assume the number was already parsed
-// sucessfully to a big_decimal to it follows its syntax rules.
-//
-// FIXME: rewrite this function to take a big_decimal, not a string.
-// Maybe a snippet like this can help:
-// boost::multiprecision::cpp_int digits = boost::multiprecision::log10(num.unscaled_value().convert_to<boost::multiprecision::mpf_float_50>()).convert_to<boost::multiprecision::cpp_int>() + 1;
-
-
-internal::magnitude_and_precision internal::get_magnitude_and_precision(std::string_view s) {
-    size_t e_or_end = s.find_first_of("eE");
-    std::string_view base = s.substr(0, e_or_end);
-    if (s[0]=='-' || s[0]=='+') {
-        base = base.substr(1);
-    }
-    int magnitude = 0;
-    int precision = 0;
-    size_t dot_or_end = base.find_first_of(".");
-    size_t nonzero = base.find_first_not_of("0");
-    if (dot_or_end != std::string_view::npos) {
-        if (nonzero == dot_or_end) {
-            // 0.000031 => magnitude = -5 (like 3.1e-5), precision = 2.
-            std::string_view fraction = base.substr(dot_or_end + 1);
-            size_t nonzero2 = fraction.find_first_not_of("0");
-            if (nonzero2 != std::string_view::npos) {
-                magnitude = -nonzero2 - 1;
-                precision = fraction.size() - nonzero2;
-            }
-        } else {
-            // 000123.45678 => magnitude = 2, precision = 8.
-            magnitude = dot_or_end - nonzero - 1;
-            precision = base.size() - nonzero - 1;
-        }
-        // trailing zeros don't count to precision, e.g., precision
-        // of 1000.0, 1.0 or 1.0000 are just 1.
-        size_t last_significant = base.find_last_not_of(".0");
-        if (last_significant == std::string_view::npos) {
-            precision = 0;
-        } else if (last_significant < dot_or_end) {
-            // e.g., 1000.00 reduce 5 = 7 - (0+1) - 1 from precision
-            precision -= base.size() - last_significant - 2;
-        } else {
-            // e.g., 1235.60 reduce 5 = 7 - (5+1) from precision
-            precision -= base.size() - last_significant - 1;
-        }
-    } else if (nonzero == std::string_view::npos) {
-        // all-zero integer 000000
-        magnitude = 0;
-        precision = 0;
-    } else {
-        magnitude = base.size() - 1 - nonzero;
-        precision = base.size() - nonzero;
-        // trailing zeros don't count to precision, e.g., precision
-        // of 1000 is just 1.
-        size_t last_significant = base.find_last_not_of("0");
-        if (last_significant == std::string_view::npos) {
-            precision = 0;
-        } else {
-            // e.g., 1000 reduce 3 = 4 - (0+1)
-            precision -= base.size() - last_significant - 1;
-        }
-    }
-    if (precision && e_or_end != std::string_view::npos) {
-        std::string_view exponent = s.substr(e_or_end + 1);
-        if (exponent.size() > 4) {
-            // don't even bother atoi(), exponent is too large
-            magnitude = exponent[0]=='-' ? -9999 : 9999;
-        } else {
-            try {
-                magnitude += boost::lexical_cast<int32_t>(exponent);
-            } catch (...) {
-                magnitude = 9999;
-            }
-        }
-    }
-    return magnitude_and_precision {magnitude, precision};
-}
-
-// Parse a number read from user input, validating that it has a valid
-// numeric format and also in the allowed magnitude and precision ranges
-// (see issue #6794). Throws an api_error::validation if the validation
-// failed.
-static big_decimal parse_and_validate_number(std::string_view s) {
-    try {
-        big_decimal ret(s);
-        auto [magnitude, precision] = internal::get_magnitude_and_precision(s);
-        if (magnitude > 125) {
-            throw api_error::validation(format("Number overflow: {}. Attempting to store a number with magnitude larger than supported range.", s));
-        }
-        if (magnitude < -130) {
-            throw api_error::validation(format("Number underflow: {}. Attempting to store a number with magnitude lower than supported range.", s));
-        }
-        if (precision > 38) {
-            throw api_error::validation(format("Number too precise: {}. Attempting to store a number with more significant digits than supported.", s));
-        }
-        return ret;
-    } catch (const marshal_exception& e) {
-        throw api_error::validation(format("The parameter cannot be converted to a numeric value: {}", s));
-    }
-
-}
-
 struct from_json_visitor {
    const rjson::value& v;
    bytes_ostream& bo;
@@ -176,7 +67,11 @@ struct from_json_visitor {
        bo.write(boolean_type->decompose(v.GetBool()));
    }
    void operator()(const decimal_type_impl& t) const {
-        bo.write(decimal_type->decompose(parse_and_validate_number(rjson::to_string_view(v))));
+        try {
+            bo.write(t.from_string(rjson::to_string_view(v)));
+        } catch (const marshal_exception& e) {
+            throw api_error::validation(format("The parameter cannot be converted to a numeric value: {}", v));
+        }
    }
    // default
    void operator()(const abstract_type& t) const {
@@ -308,8 +203,6 @@ bytes get_key_from_typed_value(const rjson::value& key_typed_value, const column
        // FIXME: it's difficult at this point to get information if value was provided
        // in request or comes from the storage, for now we assume it's user's fault.
        return *unwrap_bytes(value, true);
-    } else if (column.type == decimal_type) {
-        return decimal_type->decompose(parse_and_validate_number(rjson::to_string_view(value)));
    } else {
        return column.type->from_string(value_view);
    }
@@ -402,13 +295,16 @@ big_decimal unwrap_number(const rjson::value& v, std::string_view diagnostic) {
    if (it->name != "N") {
        throw api_error::validation(format("{}: expected number, found type '{}'", diagnostic, it->name));
    }
-    if (!it->value.IsString()) {
-        // We shouldn't reach here. Callers normally validate their input
-        // earlier with validate_value().
-        throw api_error::validation(format("{}: improperly formatted number constant", diagnostic));
+    try {
+        if (!it->value.IsString()) {
+            // We shouldn't reach here. Callers normally validate their input
+            // earlier with validate_value().
+            throw api_error::validation(format("{}: improperly formatted number constant", diagnostic));
+        }
+        return big_decimal(rjson::to_string_view(it->value));
+    } catch (const marshal_exception& e) {
+        throw api_error::validation(format("The parameter cannot be converted to a numeric value: {}", it->value));
    }
-    big_decimal ret = parse_and_validate_number(rjson::to_string_view(it->value));
-    return ret;
 }

 std::optional<big_decimal> try_unwrap_number(const rjson::value& v) {
@@ -420,8 +316,8 @@ std::optional<big_decimal> try_unwrap_number(const rjson::value& v) {
        return std::nullopt;
    }
    try {
-        return parse_and_validate_number(rjson::to_string_view(it->value));
-    } catch (api_error&) {
+        return big_decimal(rjson::to_string_view(it->value));
+    } catch (const marshal_exception& e) {
        return std::nullopt;
    }
 }
--- a/alternator/serialization.hh
+++ b/alternator/serialization.hh
@@ -94,12 +94,5 @@ std::optional<rjson::value> set_diff(const rjson::value& v1, const rjson::value&
 // Returns a null value if one of the arguments is not actually a list.
 rjson::value list_concatenate(const rjson::value& v1, const rjson::value& v2);

-namespace internal {
-struct magnitude_and_precision {
-    int magnitude;
-    int precision;
-};
-magnitude_and_precision get_magnitude_and_precision(std::string_view);
-}

 }
--- a/alternator/server.cc
+++ b/alternator/server.cc
@@ -424,7 +424,7 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
    co_await client_state.maybe_update_per_service_level_params();

    tracing::trace_state_ptr trace_state = maybe_trace_query(client_state, username, op, content);
-    tracing::trace(trace_state, "{}", op);
+    tracing::trace(trace_state, op);
    rjson::value json_request = co_await _json_parser.parse(std::move(content));
    co_return co_await callback_it->second(_executor, client_state, trace_state,
            make_service_permit(std::move(units)), std::move(json_request), std::move(req));
--- a/alternator/streams.cc
+++ b/alternator/streams.cc
@@ -1096,7 +1096,7 @@ void executor::add_stream_options(const rjson::value& stream_specification, sche
    }
 }

-void executor::supplement_table_stream_info(rjson::value& descr, const schema& schema, const service::storage_proxy& sp) {
+void executor::supplement_table_stream_info(rjson::value& descr, const schema& schema, service::storage_proxy& sp) {
    auto& opts = schema.cdc_options();
    if (opts.enabled()) {
        auto db = sp.data_dictionary();
--- a/alternator/ttl.cc
+++ b/alternator/ttl.cc
@@ -241,7 +241,7 @@ static bool is_expired(const rjson::value& expiration_time, gc_clock::time_point
 // understands it is an expiration event - not a user-initiated deletion.
 static future<> expire_item(service::storage_proxy& proxy,
                            const service::query_state& qs,
-                            const std::vector<managed_bytes_opt>& row,
+                            const std::vector<bytes_opt>& row,
                            schema_ptr schema,
                            api::timestamp_type ts) {
    // Prepare the row key to delete
@@ -260,7 +260,7 @@ static future<> expire_item(service::storage_proxy& proxy,
            // FIXME: log or increment a metric if this happens.
            return make_ready_future<>();
        }
-        exploded_pk.push_back(to_bytes(*row_c));
+        exploded_pk.push_back(*row_c);
    }
    auto pk = partition_key::from_exploded(exploded_pk);
    mutation m(schema, pk);
@@ -280,7 +280,7 @@ static future<> expire_item(service::storage_proxy& proxy,
                // FIXME: log or increment a metric if this happens.
                return make_ready_future<>();
            }
-            exploded_ck.push_back(to_bytes(*row_c));
+            exploded_ck.push_back(*row_c);
        }
        auto ck = clustering_key::from_exploded(exploded_ck);
        m.partition().clustered_row(*schema, ck).apply(tombstone(ts, gc_clock::now()));
@@ -387,7 +387,7 @@ class token_ranges_owned_by_this_shard {
    class ranges_holder_primary {
        const dht::token_range_vector _token_ranges;
     public:
-        ranges_holder_primary(const locator::vnode_effective_replication_map_ptr& erm, gms::gossiper& g, gms::inet_address ep)
+        ranges_holder_primary(const locator::effective_replication_map_ptr& erm, gms::gossiper& g, gms::inet_address ep)
            : _token_ranges(erm->get_primary_ranges(ep)) {}
        std::size_t size() const { return _token_ranges.size(); }
        const dht::token_range& operator[](std::size_t i) const {
@@ -430,7 +430,6 @@ class token_ranges_owned_by_this_shard {
    size_t _range_idx;
    size_t _end_idx;
    std::optional<dht::selective_token_range_sharder> _intersecter;
-    locator::effective_replication_map_ptr _erm;
 public:
    token_ranges_owned_by_this_shard(replica::database& db, gms::gossiper& g, schema_ptr s)
        :  _s(s)
@@ -438,7 +437,6 @@ public:
                g, utils::fb_utilities::get_broadcast_address())
        , _range_idx(random_offset(0, _token_ranges.size() - 1))
        , _end_idx(_range_idx + _token_ranges.size())
-        , _erm(s->table().get_effective_replication_map())
    {
        tlogger.debug("Generating token ranges starting from base range {} of {}", _range_idx, _token_ranges.size());
    }
@@ -471,7 +469,7 @@ public:
                    return std::nullopt;
                }
            }
-            _intersecter.emplace(_erm->get_sharder(*_s), _token_ranges[_range_idx % _token_ranges.size()], this_shard_id());
+            _intersecter.emplace(_s->get_sharder(), _token_ranges[_range_idx % _token_ranges.size()], this_shard_id());
        }
    }

@@ -595,7 +593,7 @@ static future<> scan_table_ranges(
            continue;
        }
        for (const auto& row : rows) {
-            const managed_bytes_opt& cell = row[*expiration_column];
+            const bytes_opt& cell = row[*expiration_column];
            if (!cell) {
                continue;
            }
--- a/api/CMakeLists.txt
+++ b/api/CMakeLists.txt
@@ -14,7 +14,6 @@ set(swagger_files
  api-doc/hinted_handoff.json
  api-doc/lsa.json
  api-doc/messaging_service.json
-  api-doc/metrics.json
  api-doc/storage_proxy.json
  api-doc/storage_service.json
  api-doc/stream_manager.json
--- a/api/api-doc/column_family.json
+++ b/api/api-doc/column_family.json
@@ -437,68 +437,6 @@
            }
         ]
      },
-      {
-         "path":"/column_family/tombstone_gc/{name}",
-         "operations":[
-            {
-               "method":"GET",
-               "summary":"Check if tombstone GC is enabled for a given table",
-               "type":"boolean",
-               "nickname":"get_tombstone_gc",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"name",
-                     "description":"The table name in keyspace:name format",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  }
-               ]
-            },
-            {
-               "method":"POST",
-               "summary":"Enable tombstone GC for a given table",
-               "type":"void",
-               "nickname":"enable_tombstone_gc",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"name",
-                     "description":"The table name in keyspace:name format",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  }
-               ]
-            },
-            {
-               "method":"DELETE",
-               "summary":"Disable tombstone GC for a given table",
-               "type":"void",
-               "nickname":"disable_tombstone_gc",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"name",
-                     "description":"The table name in keyspace:name format",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  }
-               ]
-            }
-         ]
-      },
      {
         "path":"/column_family/estimate_keys/{name}",
         "operations":[
--- a/api/api-doc/error_injection.json
+++ b/api/api-doc/error_injection.json
@@ -34,14 +34,6 @@
                     "allowMultiple":false,
                     "type":"boolean",
                     "paramType":"query"
-                  },
-                  {
-                     "name":"parameters",
-                     "description":"dict of parameters to pass to the injection (json format)",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"dict",
-                     "paramType":"body"
                  }
               ]
            },
@@ -66,30 +58,6 @@
            }
         ]
      },
-      {
-         "path":"/v2/error_injection/injection/{injection}/message",
-         "operations":[
-            {
-               "method":"POST",
-               "summary":"Send message to trigger an event in injection's code",
-               "type":"void",
-               "nickname":"message_injection",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"injection",
-                     "description":"injection name, should correspond to an injection added in code",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  }
-               ]
-            }
-         ]
-      },
      {
         "path":"/v2/error_injection/injection",
         "operations":[
@@ -118,15 +86,5 @@
            }
         ]
      }
-   ],
-   "components":{
-      "schemas": {
-         "dict": {
-            "type": "object",
-            "additionalProperties": {
-               "type": "string"
-            }
-         }
-      }
-   }
+   ]
 }
--- a/api/api-doc/messaging_service.json
+++ b/api/api-doc/messaging_service.json
@@ -245,7 +245,7 @@
                 "GOSSIP_SHUTDOWN",
                 "DEFINITIONS_UPDATE",
                 "TRUNCATE",
-                 "UNUSED__REPLICATION_FINISHED",
+                 "REPLICATION_FINISHED",
                 "MIGRATION_REQUEST",
                 "PREPARE_MESSAGE",
                 "PREPARE_DONE_MESSAGE",
--- a/api/api-doc/metrics.def.json
+++ b/api/api-doc/metrics.def.json
@@ -1,34 +0,0 @@
-    "metrics_config": {
-        "id": "metrics_config",
-        "summary": "An entry in the metrics configuration",
-        "properties": {
-            "source_labels": {
-                "type": "array",
-                "items": {
-                    "type": "string"
-                },
-                "description": "The source labels, a match is based on concatination of the labels"
-            },
-            "action": {
-                "type": "string",
-                "description": "The action to perfrom on match",
-                "enum": ["skip_when_empty", "report_when_empty", "replace", "keep", "drop", "drop_label"]
-            },
-            "target_label": {
-                "type": "string",
-                "description": "The application state version"
-            },
-            "replacement": {
-                "type": "string",
-                "description": "The replacement string to use when replacing a value"
-            },
-            "regex": {
-                "type": "string",
-                "description": "The regex string to use when replacing a value"
-            },
-            "separator": {
-                "type": "string",
-                "description": "The separator string to use when concatinating the labels"
-            }
-        }
-    }
--- a/api/api-doc/metrics.json
+++ b/api/api-doc/metrics.json
@@ -1,66 +0,0 @@
-    "/v2/metrics-config/":{
-        "get":{
-            "description":"Return the metrics layer configuration",
-            "operationId":"get_metrics_config",
-            "produces":[
-                "application/json"
-            ],
-            "tags":[
-                "metrics"
-            ],
-            "parameters":[
-            ],
-            "responses":{
-                "200":{
-                "schema": {
-                    "type":"array",
-                    "items":{
-                        "$ref":"#/definitions/metrics_config",
-                        "description":"metrics Config value"
-                    }
-                    }
-                },
-                "default":{
-                    "description":"unexpected error",
-                    "schema":{
-                        "$ref":"#/definitions/ErrorModel"
-                    }
-                }
-            }
-        },
-        "post": {
-             "description":"Set the metrics layer relabel configuration",
-            "operationId":"set_metrics_config",
-            "produces":[
-                "application/json"
-            ],
-            "tags":[
-                "metrics"
-            ],
-            "parameters":[
-               {
-                "in":"body",
-                "name":"conf",
-                "description":"An array of relabel_config objects",
-                "schema": {
-                    "type":"array",
-                    "items":{
-                        "$ref":"#/definitions/metrics_config",
-                        "description":"metrics Config value"
-                    }
-                }
-               }
-            ],
-            "responses":{
-                "200":{
-                    "description": "OK"
-                },
-                "default":{
-                    "description":"unexpected error",
-                    "schema":{
-                        "$ref":"#/definitions/ErrorModel"
-                    }
-                }
-            }
-        }
-    }
--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -465,7 +465,7 @@
         "operations":[
            {
               "method":"GET",
-               "summary":"Retrieve the mapping of endpoint to host ID of all nodes that own tokens",
+               "summary":"Retrieve the mapping of endpoint to host ID",
               "type":"array",
               "items":{
                  "type":"mapper"
@@ -1114,14 +1114,6 @@
                     "allowMultiple":false,
                     "type":"string",
                     "paramType":"query"
-                  },
-                  {
-                     "name":"ranges_parallelism",
-                     "description":"An integer specifying the number of ranges to repair in parallel by user request. If this number is bigger than the max_repair_ranges_in_parallel calculated by Scylla core, the smaller one will be used.",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"query"
                  }
               ]
            },
@@ -1954,7 +1946,7 @@
         "operations":[
            {
               "method":"POST",
-               "summary":"Forces this node to recalculate versions of schema objects.",
+               "summary":"Reset local schema",
               "type":"void",
               "nickname":"reset_local_schema",
               "produces":[
@@ -2118,65 +2110,6 @@
            }
         ]
      },
-      {
-         "path":"/storage_service/tombstone_gc/{keyspace}",
-         "operations":[
-            {
-               "method":"POST",
-               "summary":"Enable tombstone GC",
-               "type":"void",
-               "nickname":"enable_tombstone_gc",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"keyspace",
-                     "description":"The keyspace",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  },
-                  {
-                     "name":"cf",
-                     "description":"Comma-separated column family names",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"query"
-                  }
-               ]
-            },
-            {
-               "method":"DELETE",
-               "summary":"Disable tombstone GC",
-               "type":"void",
-               "nickname":"disable_tombstone_gc",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"keyspace",
-                     "description":"The keyspace",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  },
-                  {
-                     "name":"cf",
-                     "description":"Comma-separated column family names",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"query"
-                  }
-               ]
-            }
-         ]
-      },
      {
         "path":"/storage_service/deliver_hints",
         "operations":[
@@ -2495,23 +2428,7 @@
               ]
            }
         ]
-      },
-      {
-         "path":"/storage_service/raft_topology/reload",
-         "operations":[
-            {
-               "method":"POST",
-               "summary":"Reload Raft topology state from disk.",
-               "type":"void",
-               "nickname":"reload_raft_topology_state",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-               ]
-            }
-         ]
-      }
+      }      
   ],
   "models":{
      "mapper":{
@@ -2714,7 +2631,7 @@
                "description":"File creation time"
            },
            "generation":{
-                "type":"string",
+                "type":"long",
                "description":"SSTable generation"
            },
            "level":{
--- a/api/api-doc/swagger20_header.json
+++ b/api/api-doc/swagger20_header.json
@@ -16,7 +16,7 @@
    }
  },
  "host": "{{Host}}",
-  "basePath": "/",
+  "basePath": "/v2",
  "schemes": [
    "http"
  ],
--- a/api/api-doc/task_manager.json
+++ b/api/api-doc/task_manager.json
@@ -1,182 +1,182 @@
 {
-   "apiVersion":"0.0.1",
-   "swaggerVersion":"1.2",
-   "basePath":"{{Protocol}}://{{Host}}",
-   "resourcePath":"/task_manager",
-   "produces":[
-      "application/json"
-   ],
-   "apis":[
-      {
-         "path":"/task_manager/list_modules",
-         "operations":[
-            {
-               "method":"GET",
-               "summary":"Get all modules names",
-               "type":"array",
-               "items":{
-                  "type":"string"
-               },
-               "nickname":"get_modules",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-               ]
-            }
-         ]
-      },
-      {
-         "path":"/task_manager/list_module_tasks/{module}",
-         "operations":[
-            {
-               "method":"GET",
-               "summary":"Get a list of tasks",
-               "type":"array",
-               "items":{
-                  "type":"task_stats"
-               },
-               "nickname":"get_tasks",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"module",
-                     "description":"The module to query about",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  },
-                  {
-                     "name":"internal",
-                     "description":"Boolean flag indicating whether internal tasks should be shown (false by default)",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"boolean",
-                     "paramType":"query"
-                  },
-                  {
-                     "name":"keyspace",
-                     "description":"The keyspace to query about",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"query"
-                  },
-                  {
-                     "name":"table",
-                     "description":"The table to query about",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"query"
-                  }
-               ]
-            }
-         ]
-      },
-      {
-         "path":"/task_manager/task_status/{task_id}",
-         "operations":[
-            {
-               "method":"GET",
-               "summary":"Get task status",
-               "type":"task_status",
-               "nickname":"get_task_status",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"task_id",
-                     "description":"The uuid of a task to query about",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  }
-               ]
-            }
-         ]
-      },
-      {
-         "path":"/task_manager/abort_task/{task_id}",
-         "operations":[
-            {
-               "method":"POST",
-               "summary":"Abort running task and its descendants",
-               "type":"void",
-               "nickname":"abort_task",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"task_id",
-                     "description":"The uuid of a task to abort",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  }
-               ]
-            }
-         ]
-      },
-      {
-         "path":"/task_manager/wait_task/{task_id}",
-         "operations":[
-            {
-               "method":"GET",
-               "summary":"Wait for a task to complete",
-               "type":"task_status",
-               "nickname":"wait_task",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"task_id",
-                     "description":"The uuid of a task to wait for",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  }
-               ]
-            }
-         ]
-      },
-      {
-         "path":"/task_manager/task_status_recursive/{task_id}",
-         "operations":[
-            {
-               "method":"GET",
-               "summary":"Get statuses of the task and all its descendants",
-               "type":"array",
-               "items":{
-                  "type":"task_status"
-               },
-               "nickname":"get_task_status_recursively",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"task_id",
-                     "description":"The uuid of a task to query about",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  }
-               ]
-            }
-         ]
-      },
-      {
+    "apiVersion":"0.0.1",
+    "swaggerVersion":"1.2",
+    "basePath":"{{Protocol}}://{{Host}}",
+    "resourcePath":"/task_manager",
+    "produces":[
+       "application/json"
+    ],
+    "apis":[
+       {
+          "path":"/task_manager/list_modules",
+          "operations":[
+             {
+                "method":"GET",
+                "summary":"Get all modules names",
+                "type":"array",
+                "items":{
+                   "type":"string"
+                },
+                "nickname":"get_modules",
+                "produces":[
+                   "application/json"
+                ],
+                "parameters":[
+                ]
+             }
+          ]
+       },
+       {
+          "path":"/task_manager/list_module_tasks/{module}",
+          "operations":[
+             {
+                "method":"GET",
+                "summary":"Get a list of tasks",
+                "type":"array",
+                "items":{
+                    "type":"task_stats"
+                },
+                "nickname":"get_tasks",
+                "produces":[
+                   "application/json"
+                ],
+                "parameters":[
+                    {
+                        "name":"module",
+                        "description":"The module to query about",
+                        "required":true,
+                        "allowMultiple":false,
+                        "type":"string",
+                        "paramType":"path"
+                    },
+                    {
+                        "name":"internal",
+                        "description":"Boolean flag indicating whether internal tasks should be shown (false by default)",
+                        "required":false,
+                        "allowMultiple":false,
+                        "type":"boolean",
+                        "paramType":"query"
+                    },
+                    {
+                        "name":"keyspace",
+                        "description":"The keyspace to query about",
+                        "required":false,
+                        "allowMultiple":false,
+                        "type":"string",
+                        "paramType":"query"
+                    },
+                    {
+                        "name":"table",
+                        "description":"The table to query about",
+                        "required":false,
+                        "allowMultiple":false,
+                        "type":"string",
+                        "paramType":"query"
+                    }
+                ]
+             }
+          ]
+       },
+       {
+          "path":"/task_manager/task_status/{task_id}",
+          "operations":[
+             {
+                "method":"GET",
+                "summary":"Get task status",
+                "type":"task_status",
+                "nickname":"get_task_status",
+                "produces":[
+                   "application/json"
+                ],
+                "parameters":[
+                    {
+                        "name":"task_id",
+                        "description":"The uuid of a task to query about",
+                        "required":true,
+                        "allowMultiple":false,
+                        "type":"string",
+                        "paramType":"path"
+                    }
+                ]
+             }
+          ]
+       },
+       {
+          "path":"/task_manager/abort_task/{task_id}",
+          "operations":[
+             {
+                "method":"POST",
+                "summary":"Abort running task and its descendants",
+                "type":"void",
+                "nickname":"abort_task",
+                "produces":[
+                   "application/json"
+                ],
+                "parameters":[
+                   {
+                      "name":"task_id",
+                      "description":"The uuid of a task to abort",
+                      "required":true,
+                      "allowMultiple":false,
+                      "type":"string",
+                      "paramType":"path"
+                   }
+                ]
+             }
+          ]
+       },
+       {
+        "path":"/task_manager/wait_task/{task_id}",
+        "operations":[
+           {
+              "method":"GET",
+              "summary":"Wait for a task to complete",
+              "type":"task_status",
+              "nickname":"wait_task",
+              "produces":[
+                 "application/json"
+              ],
+              "parameters":[
+                 {
+                    "name":"task_id",
+                    "description":"The uuid of a task to wait for",
+                    "required":true,
+                    "allowMultiple":false,
+                    "type":"string",
+                    "paramType":"path"
+                 }
+              ]
+           }
+        ]
+     },
+     {
+      "path":"/task_manager/task_status_recursive/{task_id}",
+      "operations":[
+         {
+            "method":"GET",
+            "summary":"Get statuses of the task and all its descendants",
+            "type":"array",
+            "items":{
+               "type":"task_status"
+            },
+            "nickname":"get_task_status_recursively",
+            "produces":[
+               "application/json"
+            ],
+            "parameters":[
+                {
+                    "name":"task_id",
+                    "description":"The uuid of a task to query about",
+                    "required":true,
+                    "allowMultiple":false,
+                    "type":"string",
+                    "paramType":"path"
+                }
+            ]
+         }
+      ]
+     },
+     {
         "path":"/task_manager/ttl",
         "operations":[
            {
@@ -199,96 +199,88 @@
               ]
            }
         ]
-      }
-   ],
-   "models":{
-      "task_stats" :{
-         "id": "task_stats",
-         "description":"A task statistics object",
-         "properties":{
-            "task_id":{
-               "type":"string",
-               "description":"The uuid of a task"
-            },
-            "state":{
-               "type":"string",
-               "enum":[
+     }
+    ],
+    "models":{
+       "task_stats" :{
+           "id": "task_stats",
+           "description":"A task statistics object",
+           "properties":{
+             "task_id":{
+                "type":"string",
+                "description":"The uuid of a task"
+             },
+             "state":{
+                "type":"string",
+                "enum":[
                  "created",
                  "running",
                  "done",
                  "failed"
-               ],
-               "description":"The state of a task"
-            },
-            "type":{
-               "type":"string",
-               "description":"The description of the task"
-            },
-            "scope":{
-               "type":"string",
-               "description":"The scope of the task"
-            },
-            "keyspace":{
-               "type":"string",
-               "description":"The keyspace the task is working on (if applicable)"
-            },
-            "table":{
-               "type":"string",
-               "description":"The table the task is working on (if applicable)"
-            },
-            "entity":{
-               "type":"string",
-               "description":"Task-specific entity description"
-            },
-            "sequence_number":{
-               "type":"long",
-               "description":"The running sequence number of the task"
-            }
-         }
-      },
-      "task_status":{
-         "id":"task_status",
-         "description":"A task status object",
-         "properties":{
-            "id":{
-               "type":"string",
-               "description":"The uuid of the task"
-            },
-            "type":{
-               "type":"string",
-               "description":"The description of the task"
-            },
-            "scope":{
-               "type":"string",
-               "description":"The scope of the task"
-            },
-            "state":{
+                ],
+                "description":"The state of a task"
+             },
+             "type":{
+                "type":"string",
+                "description":"The description of the task"
+             },
+             "keyspace":{
+                "type":"string",
+                "description":"The keyspace the task is working on (if applicable)"
+             },
+             "table":{
+                "type":"string",
+                "description":"The table the task is working on (if applicable)"
+             },
+             "entity":{
+                "type":"string",
+                "description":"Task-specific entity description"
+             },
+             "sequence_number":{
+                "type":"long",
+                "description":"The running sequence number of the task"
+             }
+           }
+       },
+       "task_status":{
+          "id":"task_status",
+          "description":"A task status object",
+          "properties":{
+             "id":{
+                "type":"string",
+                "description":"The uuid of the task"
+             },
+             "type":{
+                "type":"string",
+                "description":"The description of the task"
+             },
+             "state":{
               "type":"string",
               "enum":[
-                  "created",
-                  "running",
-                  "done",
-                  "failed"
+                 "created",
+                 "running",
+                 "done",
+                 "failed"
               ],
-               "description":"The state of the task"
-            },
-            "is_abortable":{
-               "type":"boolean",
-               "description":"Boolean flag indicating whether the task can be aborted"
-            },
-            "start_time":{
-               "type":"datetime",
-               "description":"The start time of the task"
-            },
-            "end_time":{
-               "type":"datetime",
-               "description":"The end time of the task (unspecified when the task is not completed)"
-            },
-            "error":{
-               "type":"string",
-               "description":"Error string, if the task failed"
-            },
-            "parent_id":{
+                "description":"The state of the task"
+             },
+             "is_abortable":{
+                "type":"boolean",
+                "description":"Boolean flag indicating whether the task can be aborted"
+             },
+             "start_time":{
+                "type":"datetime",
+                "description":"The start time of the task"
+             },
+             "end_time":{
+                "type":"datetime",
+                "description":"The end time of the task (unspecified when the task is not completed)"
+             },
+             "error":{
+                "type":"string",
+                "description":"Error string, if the task failed"
+             },
+             "parent_id":{
               "type":"string",
               "description":"The uuid of the parent task"
            },
@@ -326,12 +318,12 @@
            },
            "children_ids":{
               "type":"array",
-               "items":{
-                  "type":"string"
-               },
+                "items":{
+                    "type":"string"
+                },
               "description":"Task IDs of children of this task"
            }
-         }
-      }
-   }
-}
+          }
+       }
+    }
+ }
--- a/api/api-doc/task_manager_test.json
+++ b/api/api-doc/task_manager_test.json
@@ -1,153 +1,153 @@
 {
-   "apiVersion":"0.0.1",
-   "swaggerVersion":"1.2",
-   "basePath":"{{Protocol}}://{{Host}}",
-   "resourcePath":"/task_manager_test",
-   "produces":[
-      "application/json"
-   ],
-   "apis":[
-      {
-         "path":"/task_manager_test/test_module",
-         "operations":[
-            {
-               "method":"POST",
-               "summary":"Register test module in task manager",
-               "type":"void",
-               "nickname":"register_test_module",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-               ]
-            },
-            {
-               "method":"DELETE",
-               "summary":"Unregister test module in task manager",
-               "type":"void",
-               "nickname":"unregister_test_module",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-               ]
-            }
-         ]
-      },
-      {
-         "path":"/task_manager_test/test_task",
-         "operations":[
-            {
-               "method":"POST",
-               "summary":"Register test task",
-               "type":"string",
-               "nickname":"register_test_task",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"task_id",
-                     "description":"The uuid of a task to register",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"query"
-                  },
-                  {
-                     "name":"shard",
-                     "description":"The shard of the task",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"long",
-                     "paramType":"query"
-                  },
-                  {
-                     "name":"parent_id",
-                     "description":"The uuid of a parent task",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"query"
-                  },
-                  {
-                     "name":"keyspace",
-                     "description":"The keyspace the task is working on",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"query"
-                  },
-                  {
-                     "name":"table",
-                     "description":"The table the task is working on",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"query"
-                  },
-                  {
-                     "name":"entity",
-                     "description":"Task-specific entity description",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"query"
-                  }
-               ]
-            },
-            {
-               "method":"DELETE",
-               "summary":"Unregister test task",
-               "type":"void",
-               "nickname":"unregister_test_task",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"task_id",
-                     "description":"The uuid of a task to register",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"query"
-                  }
-               ]
-            }
-         ]
-      },
-      {
-         "path":"/task_manager_test/finish_test_task/{task_id}",
-         "operations":[
-            {
-               "method":"POST",
-               "summary":"Finish test task",
-               "type":"void",
-               "nickname":"finish_test_task",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"task_id",
-                     "description":"The uuid of a task to finish",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  },
-                  {
-                     "name":"error",
-                     "description":"The error with which task fails (if it does)",
-                     "required":false,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"query"
-                  }
-               ]
-            }
-         ]
-      }
-   ]
-}
+    "apiVersion":"0.0.1",
+    "swaggerVersion":"1.2",
+    "basePath":"{{Protocol}}://{{Host}}",
+    "resourcePath":"/task_manager_test",
+    "produces":[
+       "application/json"
+    ],
+    "apis":[
+       {
+          "path":"/task_manager_test/test_module",
+          "operations":[
+             {
+                "method":"POST",
+                "summary":"Register test module in task manager",
+                "type":"void",
+                "nickname":"register_test_module",
+                "produces":[
+                   "application/json"
+                ],
+                "parameters":[
+                ]
+             },
+             {
+                "method":"DELETE",
+                "summary":"Unregister test module in task manager",
+                "type":"void",
+                "nickname":"unregister_test_module",
+                "produces":[
+                   "application/json"
+                ],
+                "parameters":[
+                ]
+             }
+          ]
+       },
+       {
+          "path":"/task_manager_test/test_task",
+          "operations":[
+             {
+                "method":"POST",
+                "summary":"Register test task",
+                "type":"string",
+                "nickname":"register_test_task",
+                "produces":[
+                   "application/json"
+                ],
+                "parameters":[
+                    {
+                        "name":"task_id",
+                        "description":"The uuid of a task to register",
+                        "required":false,
+                        "allowMultiple":false,
+                        "type":"string",
+                        "paramType":"query"
+                    },
+                    {
+                        "name":"shard",
+                        "description":"The shard of the task",
+                        "required":false,
+                        "allowMultiple":false,
+                        "type":"long",
+                        "paramType":"query"
+                    },
+                    {
+                        "name":"parent_id",
+                        "description":"The uuid of a parent task",
+                        "required":false,
+                        "allowMultiple":false,
+                        "type":"string",
+                        "paramType":"query"
+                    },
+                    {
+                        "name":"keyspace",
+                        "description":"The keyspace the task is working on",
+                        "required":false,
+                        "allowMultiple":false,
+                        "type":"string",
+                        "paramType":"query"
+                    },
+                    {
+                        "name":"table",
+                        "description":"The table the task is working on",
+                        "required":false,
+                        "allowMultiple":false,
+                        "type":"string",
+                        "paramType":"query"
+                    },
+                    {
+                        "name":"entity",
+                        "description":"Task-specific entity description",
+                        "required":false,
+                        "allowMultiple":false,
+                        "type":"string",
+                        "paramType":"query"
+                    }
+                ]
+             },
+             {
+                "method":"DELETE",
+                "summary":"Unregister test task",
+                "type":"void",
+                "nickname":"unregister_test_task",
+                "produces":[
+                   "application/json"
+                ],
+                "parameters":[
+                    {
+                        "name":"task_id",
+                        "description":"The uuid of a task to register",
+                        "required":true,
+                        "allowMultiple":false,
+                        "type":"string",
+                        "paramType":"query"
+                    }
+                ]
+             }
+          ]
+       },
+       {
+          "path":"/task_manager_test/finish_test_task/{task_id}",
+          "operations":[
+             {
+                "method":"POST",
+                "summary":"Finish test task",
+                "type":"void",
+                "nickname":"finish_test_task",
+                "produces":[
+                   "application/json"
+                ],
+                "parameters":[
+                   {
+                      "name":"task_id",
+                      "description":"The uuid of a task to finish",
+                      "required":true,
+                      "allowMultiple":false,
+                      "type":"string",
+                      "paramType":"path"
+                   },
+                   {
+                      "name":"error",
+                      "description":"The error with which task fails (if it does)",
+                      "required":false,
+                      "allowMultiple":false,
+                      "type":"string",
+                      "paramType":"query"
+                   }
+                ]
+             }
+          ]
+       }
+    ]
+ }
--- a/api/api.cc
+++ b/api/api.cc
@@ -60,10 +60,8 @@ future<> set_server_init(http_context& ctx) {
        rb->set_api_doc(r);
        rb02->set_api_doc(r);
        rb02->register_api_file(r, "swagger20_header");
-        rb02->register_api_file(r, "metrics");
        rb->register_function(r, "system",
                "The system related API");
-        rb02->add_definitions_file(r, "metrics");
        set_system(ctx, r);
    });
 }
@@ -71,7 +69,7 @@ future<> set_server_init(http_context& ctx) {
 future<> set_server_config(http_context& ctx, const db::config& cfg) {
    auto rb02 = std::make_shared < api_registry_builder20 > (ctx.api_doc, "/v2");
    return ctx.http_server.set_routes([&ctx, &cfg, rb02](routes& r) {
-        set_config(rb02, ctx, r, cfg, false);
+        set_config(rb02, ctx, r, cfg);
    });
 }

@@ -102,16 +100,12 @@ future<> unset_rpc_controller(http_context& ctx) {
    return ctx.http_server.set_routes([&ctx] (routes& r) { unset_rpc_controller(ctx, r); });
 }

-future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, service::raft_group0_client& group0_client) {
-    return register_api(ctx, "storage_service", "The storage service API", [&ss, &group0_client] (http_context& ctx, routes& r) {
-            set_storage_service(ctx, r, ss, group0_client);
+future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, sharded<gms::gossiper>& g, sharded<cdc::generation_service>& cdc_gs, sharded<db::system_keyspace>& sys_ks) {
+    return register_api(ctx, "storage_service", "The storage service API", [&ss, &g, &cdc_gs, &sys_ks] (http_context& ctx, routes& r) {
+            set_storage_service(ctx, r, ss, g.local(), cdc_gs, sys_ks);
        });
 }

-future<> unset_server_storage_service(http_context& ctx) {
-    return ctx.http_server.set_routes([&ctx] (routes& r) { unset_storage_service(ctx, r); });
-}
-
 future<> set_server_sstables_loader(http_context& ctx, sharded<sstables_loader>& sst_loader) {
    return ctx.http_server.set_routes([&ctx, &sst_loader] (routes& r) { set_sstables_loader(ctx, r, sst_loader); });
 }
@@ -193,10 +187,10 @@ future<> unset_server_messaging_service(http_context& ctx) {
    return ctx.http_server.set_routes([&ctx] (routes& r) { unset_messaging_service(ctx, r); });
 }

-future<> set_server_storage_proxy(http_context& ctx, sharded<service::storage_proxy>& proxy) {
+future<> set_server_storage_proxy(http_context& ctx, sharded<service::storage_service>& ss) {
    return register_api(ctx, "storage_proxy",
-                "The storage proxy API", [&proxy] (http_context& ctx, routes& r) {
-                    set_storage_proxy(ctx, r, proxy);
+                "The storage proxy API", [&ss] (http_context& ctx, routes& r) {
+                    set_storage_proxy(ctx, r, ss);
                });
 }

@@ -220,10 +214,10 @@ future<> set_server_cache(http_context& ctx) {
            "The cache service API", set_cache_service);
 }

-future<> set_hinted_handoff(http_context& ctx, sharded<service::storage_proxy>& proxy) {
+future<> set_hinted_handoff(http_context& ctx, sharded<gms::gossiper>& g) {
    return register_api(ctx, "hinted_handoff",
-                "The hinted handoff API", [&proxy] (http_context& ctx, routes& r) {
-                    set_hinted_handoff(ctx, r, proxy);
+                "The hinted handoff API", [&g] (http_context& ctx, routes& r) {
+                    set_hinted_handoff(ctx, r, g.local());
                });
 }

@@ -270,36 +264,28 @@ future<> set_server_done(http_context& ctx) {
    });
 }

-future<> set_server_task_manager(http_context& ctx, sharded<tasks::task_manager>& tm, lw_shared_ptr<db::config> cfg) {
+future<> set_server_task_manager(http_context& ctx, lw_shared_ptr<db::config> cfg) {
    auto rb = std::make_shared < api_registry_builder > (ctx.api_doc);

-    return ctx.http_server.set_routes([rb, &ctx, &tm, &cfg = *cfg](routes& r) {
+    return ctx.http_server.set_routes([rb, &ctx, &cfg = *cfg](routes& r) {
        rb->register_function(r, "task_manager",
                "The task manager API");
-        set_task_manager(ctx, r, tm, cfg);
+        set_task_manager(ctx, r, cfg);
    });
 }

-future<> unset_server_task_manager(http_context& ctx) {
-    return ctx.http_server.set_routes([&ctx] (routes& r) { unset_task_manager(ctx, r); });
-}
-
 #ifndef SCYLLA_BUILD_MODE_RELEASE

-future<> set_server_task_manager_test(http_context& ctx, sharded<tasks::task_manager>& tm) {
+future<> set_server_task_manager_test(http_context& ctx) {
    auto rb = std::make_shared < api_registry_builder > (ctx.api_doc);

-    return ctx.http_server.set_routes([rb, &ctx, &tm](routes& r) mutable {
+    return ctx.http_server.set_routes([rb, &ctx](routes& r) mutable {
        rb->register_function(r, "task_manager_test",
                "The task manager test API");
-        set_task_manager_test(ctx, r, tm);
+        set_task_manager_test(ctx, r);
    });
 }

-future<> unset_server_task_manager_test(http_context& ctx) {
-    return ctx.http_server.set_routes([&ctx] (routes& r) { unset_task_manager_test(ctx, r); });
-}
-
 #endif

 void req_params::process(const request& req) {
--- a/api/api_init.hh
+++ b/api/api_init.hh
@@ -22,7 +22,6 @@ namespace service {
 class load_meter;
 class storage_proxy;
 class storage_service;
-class raft_group0_client;

 } // namespace service

@@ -52,6 +51,7 @@ class system_keyspace;
 }
 namespace netw { class messaging_service; }
 class repair_service;
+namespace cdc { class generation_service; }

 namespace gms {

@@ -61,10 +61,6 @@ class gossiper;

 namespace auth { class service; }

-namespace tasks {
-class task_manager;
-}
-
 namespace api {

 struct http_context {
@@ -72,12 +68,15 @@ struct http_context {
    sstring api_doc;
    httpd::http_server_control http_server;
    distributed<replica::database>& db;
+    distributed<service::storage_proxy>& sp;
    service::load_meter& lmeter;
    const sharded<locator::shared_token_metadata>& shared_token_metadata;
+    sharded<tasks::task_manager>& tm;

    http_context(distributed<replica::database>& _db,
-            service::load_meter& _lm, const sharded<locator::shared_token_metadata>& _stm)
-            : db(_db), lmeter(_lm), shared_token_metadata(_stm) {
+            distributed<service::storage_proxy>& _sp,
+            service::load_meter& _lm, const sharded<locator::shared_token_metadata>& _stm, sharded<tasks::task_manager>& _tm)
+            : db(_db), sp(_sp), lmeter(_lm), shared_token_metadata(_stm), tm(_tm) {
    }

    const locator::token_metadata& get_token_metadata();
@@ -87,8 +86,7 @@ future<> set_server_init(http_context& ctx);
 future<> set_server_config(http_context& ctx, const db::config& cfg);
 future<> set_server_snitch(http_context& ctx, sharded<locator::snitch_ptr>& snitch);
 future<> unset_server_snitch(http_context& ctx);
-future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, service::raft_group0_client&);
-future<> unset_server_storage_service(http_context& ctx);
+future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, sharded<gms::gossiper>& g, sharded<cdc::generation_service>& cdc_gs, sharded<db::system_keyspace>& sys_ks);
 future<> set_server_sstables_loader(http_context& ctx, sharded<sstables_loader>& sst_loader);
 future<> unset_server_sstables_loader(http_context& ctx);
 future<> set_server_view_builder(http_context& ctx, sharded<db::view::view_builder>& vb);
@@ -108,19 +106,17 @@ future<> set_server_load_sstable(http_context& ctx, sharded<db::system_keyspace>
 future<> unset_server_load_sstable(http_context& ctx);
 future<> set_server_messaging_service(http_context& ctx, sharded<netw::messaging_service>& ms);
 future<> unset_server_messaging_service(http_context& ctx);
-future<> set_server_storage_proxy(http_context& ctx, sharded<service::storage_proxy>& proxy);
+future<> set_server_storage_proxy(http_context& ctx, sharded<service::storage_service>& ss);
 future<> unset_server_storage_proxy(http_context& ctx);
 future<> set_server_stream_manager(http_context& ctx, sharded<streaming::stream_manager>& sm);
 future<> unset_server_stream_manager(http_context& ctx);
-future<> set_hinted_handoff(http_context& ctx, sharded<service::storage_proxy>& p);
+future<> set_hinted_handoff(http_context& ctx, sharded<gms::gossiper>& g);
 future<> unset_hinted_handoff(http_context& ctx);
 future<> set_server_gossip_settle(http_context& ctx, sharded<gms::gossiper>& g);
 future<> set_server_cache(http_context& ctx);
 future<> set_server_compaction_manager(http_context& ctx);
 future<> set_server_done(http_context& ctx);
-future<> set_server_task_manager(http_context& ctx, sharded<tasks::task_manager>& tm, lw_shared_ptr<db::config> cfg);
-future<> unset_server_task_manager(http_context& ctx);
-future<> set_server_task_manager_test(http_context& ctx, sharded<tasks::task_manager>& tm);
-future<> unset_server_task_manager_test(http_context& ctx);
+future<> set_server_task_manager(http_context& ctx, lw_shared_ptr<db::config> cfg);
+future<> set_server_task_manager_test(http_context& ctx);

 }
--- a/api/authorization_cache.cc
+++ b/api/authorization_cache.cc
@@ -11,7 +11,6 @@
 #include "api/authorization_cache.hh"
 #include "api/api.hh"
 #include "auth/common.hh"
-#include "auth/service.hh"

 namespace api {
 using namespace json;
--- a/api/column_family.cc
+++ b/api/column_family.cc
@@ -43,7 +43,7 @@ std::tuple<sstring, sstring> parse_fully_qualified_cf_name(sstring name) {
    return std::make_tuple(name.substr(0, pos), name.substr(end));
 }

-table_id get_uuid(const sstring& ks, const sstring& cf, const replica::database& db) {
+const table_id& get_uuid(const sstring& ks, const sstring& cf, const replica::database& db) {
    try {
        return db.find_uuid(ks, cf);
    } catch (replica::no_such_column_family& e) {
@@ -51,7 +51,7 @@ table_id get_uuid(const sstring& ks, const sstring& cf, const replica::database&
    }
 }

-table_id get_uuid(const sstring& name, const replica::database& db) {
+const table_id& get_uuid(const sstring& name, const replica::database& db) {
    auto [ks, cf] = parse_fully_qualified_cf_name(name);
    return get_uuid(ks, cf, db);
 }
@@ -135,9 +135,9 @@ static future<json::json_return_type>  get_cf_histogram(http_context& ctx, const
 static future<json::json_return_type> get_cf_histogram(http_context& ctx, utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
    std::function<utils::ihistogram(const replica::database&)> fun = [f] (const replica::database& db)  {
        utils::ihistogram res;
-        db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> table) mutable {
-            res += (table->get_stats().*f).hist;
-        });
+        for (auto i : db.get_column_families()) {
+            res += (i.second->get_stats().*f).hist;
+        }
        return res;
    };
    return ctx.db.map(fun).then([](const std::vector<utils::ihistogram> &res) {
@@ -162,9 +162,9 @@ static future<json::json_return_type>  get_cf_rate_and_histogram(http_context& c
 static future<json::json_return_type> get_cf_rate_and_histogram(http_context& ctx, utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
    std::function<utils::rate_moving_average_and_histogram(const replica::database&)> fun = [f] (const replica::database& db)  {
        utils::rate_moving_average_and_histogram res;
-        db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> table) {
-            res += (table->get_stats().*f).rate();
-        });
+        for (auto i : db.get_column_families()) {
+            res += (i.second->get_stats().*f).rate();
+        }
        return res;
    };
    return ctx.db.map(fun).then([](const std::vector<utils::rate_moving_average_and_histogram> &res) {
@@ -306,21 +306,21 @@ ratio_holder filter_recent_false_positive_as_ratio_holder(const sstables::shared
 void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace>& sys_ks) {
    cf::get_column_family_name.set(r, [&ctx] (const_req req){
        std::vector<sstring> res;
-        ctx.db.local().get_tables_metadata().for_each_table_id([&] (const std::pair<sstring, sstring>& kscf, table_id) {
-            res.push_back(kscf.first + ":" + kscf.second);
-        });
+        for (auto i: ctx.db.local().get_column_families_mapping()) {
+            res.push_back(i.first.first + ":" + i.first.second);
+        }
        return res;
    });

    cf::get_column_family.set(r, [&ctx] (std::unique_ptr<http::request> req){
-        std::list<cf::column_family_info> res;
-            ctx.db.local().get_tables_metadata().for_each_table_id([&] (const std::pair<sstring, sstring>& kscf, table_id) {
+            std::list<cf::column_family_info> res;
+            for (auto i: ctx.db.local().get_column_families_mapping()) {
                cf::column_family_info info;
-                info.ks = kscf.first;
-                info.cf =  kscf.second;
+                info.ks = i.first.first;
+                info.cf =  i.first.second;
                info.type = "ColumnFamilies";
                res.push_back(info);
-            });
+            }
            return make_ready_future<json::json_return_type>(json::stream_range_as_array(std::move(res), std::identity()));
        });

@@ -871,7 +871,6 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::enable_auto_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) {
-        apilog.info("column_family/enable_auto_compaction: name={}", req->param["name"]);
        return ctx.db.invoke_on(0, [&ctx, req = std::move(req)] (replica::database& db) {
            auto g = replica::database::autocompaction_toggle_guard(db);
            return foreach_column_family(ctx, req->param["name"], [](replica::column_family &cf) {
@@ -883,7 +882,6 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::disable_auto_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) {
-        apilog.info("column_family/disable_auto_compaction: name={}", req->param["name"]);
        return ctx.db.invoke_on(0, [&ctx, req = std::move(req)] (replica::database& db) {
            auto g = replica::database::autocompaction_toggle_guard(db);
            return foreach_column_family(ctx, req->param["name"], [](replica::column_family &cf) {
@@ -894,30 +892,6 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
        });
    });

-    cf::get_tombstone_gc.set(r, [&ctx] (const_req req) {
-        auto uuid = get_uuid(req.param["name"], ctx.db.local());
-        replica::table& t = ctx.db.local().find_column_family(uuid);
-        return t.tombstone_gc_enabled();
-    });
-
-    cf::enable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
-        apilog.info("column_family/enable_tombstone_gc: name={}", req->param["name"]);
-        return foreach_column_family(ctx, req->param["name"], [](replica::table& t) {
-            t.set_tombstone_gc_enabled(true);
-        }).then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
-    });
-
-    cf::disable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
-        apilog.info("column_family/disable_tombstone_gc: name={}", req->param["name"]);
-        return foreach_column_family(ctx, req->param["name"], [](replica::table& t) {
-            t.set_tombstone_gc_enabled(false);
-        }).then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
-    });
-
    cf::get_built_indexes.set(r, [&ctx, &sys_ks](std::unique_ptr<http::request> req) {
        auto ks_cf = parse_fully_qualified_cf_name(req->param["name"]);
        auto&& ks = std::get<0>(ks_cf);
@@ -981,7 +955,6 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace

    cf::set_compaction_strategy_class.set(r, [&ctx](std::unique_ptr<http::request> req) {
        sstring strategy = req->get_query_param("class_name");
-        apilog.info("column_family/set_compaction_strategy_class: name={} strategy={}", req->param["name"], strategy);
        return foreach_column_family(ctx, req->param["name"], [strategy](replica::column_family& cf) {
            cf.set_compaction_strategy(sstables::compaction_strategy::type(strategy));
        }).then([] {
@@ -1017,12 +990,11 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
        auto key = req->get_query_param("key");
        auto uuid = get_uuid(req->param["name"], ctx.db.local());

-        return ctx.db.map_reduce0([key, uuid] (replica::database& db) -> future<std::unordered_set<sstring>> {
-            auto sstables = co_await db.find_column_family(uuid).get_sstables_by_partition_key(key);
-            co_return boost::copy_range<std::unordered_set<sstring>>(sstables | boost::adaptors::transformed([] (auto s) { return s->get_filename(); }));
+        return ctx.db.map_reduce0([key, uuid] (replica::database& db) {
+            return db.find_column_family(uuid).get_sstables_by_partition_key(key);
        }, std::unordered_set<sstring>(),
-        [](std::unordered_set<sstring> a, std::unordered_set<sstring>&& b) mutable {
-            a.merge(b);
+            [](std::unordered_set<sstring> a, std::unordered_set<sstring>&& b) mutable {
+            a.insert(b.begin(),b.end());
            return a;
        }).then([](const std::unordered_set<sstring>& res) {
            return make_ready_future<json::json_return_type>(container_to_vec(res));
@@ -1051,13 +1023,9 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
            fail(unimplemented::cause::API);
        }

-        apilog.info("column_family/force_major_compaction: name={}", req->param["name"]);
        auto [ks, cf] = parse_fully_qualified_cf_name(req->param["name"]);
        auto keyspace = validate_keyspace(ctx, ks);
-        std::vector<table_info> table_infos = {table_info{
-            .name = cf,
-            .id = ctx.db.local().find_uuid(ks, cf)
-        }};
+        std::vector<table_id> table_infos = {ctx.db.local().find_uuid(ks, cf)};

        auto& compaction_module = ctx.db.local().get_compaction_manager().get_task_manager_module();
        auto task = co_await compaction_module.make_and_start_task<major_keyspace_compaction_task_impl>({}, std::move(keyspace), ctx.db, std::move(table_infos));
--- a/api/column_family.hh
+++ b/api/column_family.hh
@@ -23,7 +23,7 @@ namespace api {
 void set_column_family(http_context& ctx, httpd::routes& r, sharded<db::system_keyspace>& sys_ks);
 void unset_column_family(http_context& ctx, httpd::routes& r);

-table_id get_uuid(const sstring& name, const replica::database& db);
+const table_id& get_uuid(const sstring& name, const replica::database& db);
 future<> foreach_column_family(http_context& ctx, const sstring& name, std::function<void(replica::column_family&)> f);


@@ -68,10 +68,9 @@ struct map_reduce_column_families_locally {
    std::function<std::unique_ptr<std::any>(std::unique_ptr<std::any>, std::unique_ptr<std::any>)> reducer;
    future<std::unique_ptr<std::any>> operator()(replica::database& db) const {
        auto res = seastar::make_lw_shared<std::unique_ptr<std::any>>(std::make_unique<std::any>(init));
-        return db.get_tables_metadata().for_each_table_gently([res, this] (table_id, seastar::lw_shared_ptr<replica::table> table) {
-            *res = reducer(std::move(*res), mapper(*table.get()));
-            return make_ready_future();
-        }).then([res] () {
+        return do_for_each(db.get_column_families(), [res, this](const std::pair<table_id, seastar::lw_shared_ptr<replica::table>>& i) {
+            *res = reducer(std::move(*res), mapper(*i.second.get()));
+        }).then([res] {
            return std::move(*res);
        });
    }
--- a/api/compaction_manager.cc
+++ b/api/compaction_manager.cc
@@ -68,8 +68,8 @@ void set_compaction_manager(http_context& ctx, routes& r) {
    cm::get_pending_tasks_by_table.set(r, [&ctx] (std::unique_ptr<http::request> req) {
        return ctx.db.map_reduce0([](replica::database& db) {
            return do_with(std::unordered_map<std::pair<sstring, sstring>, uint64_t, utils::tuple_hash>(), [&db](std::unordered_map<std::pair<sstring, sstring>, uint64_t, utils::tuple_hash>& tasks) {
-                return db.get_tables_metadata().for_each_table_gently([&tasks] (table_id, lw_shared_ptr<replica::table> table) {
-                    replica::table& cf = *table.get();
+                return do_for_each(db.get_column_families(), [&tasks](const std::pair<table_id, seastar::lw_shared_ptr<replica::table>>& i) -> future<> {
+                    replica::table& cf = *i.second.get();
                    tasks[std::make_pair(cf.schema()->ks_name(), cf.schema()->cf_name())] = cf.estimate_pending_compactions();
                    return make_ready_future<>();
                }).then([&tasks] {
--- a/api/config.cc
+++ b/api/config.cc
@@ -45,7 +45,7 @@ future<> get_config_swagger_entry(std::string_view name, const std::string& desc
    } else {
        ss <<',';
    };
-    ss << "\"/v2/config/" << name <<"\": {"
+    ss << "\"/config/" << name <<"\": {"
      "\"get\": {"
        "\"description\": \"" << boost::replace_all_copy(boost::replace_all_copy(boost::replace_all_copy(description,"\n","\\n"),"\"", "''"), "\t", " ") <<"\","
        "\"operationId\": \"find_config_"<< name <<"\","
@@ -76,9 +76,9 @@ future<> get_config_swagger_entry(std::string_view name, const std::string& desc

 namespace cs = httpd::config_json;

-void set_config(std::shared_ptr < api_registry_builder20 > rb, http_context& ctx, routes& r, const db::config& cfg, bool first) {
-    rb->register_function(r, [&cfg, first] (output_stream<char>& os) {
-        return do_with(first, [&os, &cfg] (bool& first) {
+void set_config(std::shared_ptr < api_registry_builder20 > rb, http_context& ctx, routes& r, const db::config& cfg) {
+    rb->register_function(r, [&cfg] (output_stream<char>& os) {
+        return do_with(true, [&os, &cfg] (bool& first) {
            auto f = make_ready_future();
            for (auto&& cfg_ref : cfg.values()) {
                auto&& cfg = cfg_ref.get();
--- a/api/config.hh
+++ b/api/config.hh
@@ -13,5 +13,5 @@

 namespace api {

-void set_config(std::shared_ptr<httpd::api_registry_builder20> rb, http_context& ctx, httpd::routes& r, const db::config& cfg, bool first = false);
+void set_config(std::shared_ptr<httpd::api_registry_builder20> rb, http_context& ctx, httpd::routes& r, const db::config& cfg);
 }
--- a/api/error_injection.cc
+++ b/api/error_injection.cc
@@ -12,9 +12,7 @@
 #include <seastar/http/exception.hh>
 #include "log.hh"
 #include "utils/error_injection.hh"
-#include "utils/rjson.hh"
 #include <seastar/core/future-util.hh>
-#include <seastar/util/short_streams.hh>

 namespace api {
 using namespace seastar::httpd;
@@ -26,27 +24,10 @@ void set_error_injection(http_context& ctx, routes& r) {
    hf::enable_injection.set(r, [](std::unique_ptr<request> req) {
        sstring injection = req->param["injection"];
        bool one_shot = req->get_query_param("one_shot") == "True";
-        auto params = req->content;
-
-        const size_t max_params_size = 1024 * 1024;
-        if (params.size() > max_params_size) {
-            // This is a hard limit, because we don't want to allocate
-            // too much memory or block the thread for too long.
-            throw httpd::bad_param_exception(format("Injection parameters are too long, max length is {}", max_params_size));
-        }
-
-        try {
-            auto parameters = params.empty()
-                ? utils::error_injection_parameters{}
-                : rjson::parse_to_map<utils::error_injection_parameters>(params);
-
-            auto& errinj = utils::get_local_injector();
-            return errinj.enable_on_all(injection, one_shot, std::move(parameters)).then([] {
-                return make_ready_future<json::json_return_type>(json::json_void());
-            });
-        } catch (const rjson::error& e) {
-            throw httpd::bad_param_exception(format("Failed to parse injections parameters: {}", e.what()));
-        }
+        auto& errinj = utils::get_local_injector();
+        return errinj.enable_on_all(injection, one_shot).then([] {
+            return make_ready_future<json::json_return_type>(json::json_void());
+        });
    });

    hf::get_enabled_injections_on_all.set(r, [](std::unique_ptr<request> req) {
@@ -71,13 +52,6 @@ void set_error_injection(http_context& ctx, routes& r) {
        });
    });

-    hf::message_injection.set(r, [](std::unique_ptr<request> req) {
-        sstring injection = req->param["injection"];
-        auto& errinj = utils::get_local_injector();
-        return errinj.receive_message_on_all(injection).then([] {
-            return make_ready_future<json::json_return_type>(json::json_void());
-        });
-    });
 }

 } // namespace api
--- a/api/failure_detector.cc
+++ b/api/failure_detector.cc
@@ -19,25 +19,24 @@ namespace fd = httpd::failure_detector_json;
 void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
    fd::get_all_endpoint_states.set(r, [&g](std::unique_ptr<request> req) {
        std::vector<fd::endpoint_state> res;
-        res.reserve(g.num_endpoints());
-        g.for_each_endpoint_state([&] (const gms::inet_address& addr, const gms::endpoint_state& eps) {
+        for (auto i : g.get_endpoint_states()) {
            fd::endpoint_state val;
-            val.addrs = fmt::to_string(addr);
-            val.is_alive = g.is_alive(addr);
-            val.generation = eps.get_heart_beat_state().get_generation().value();
-            val.version = eps.get_heart_beat_state().get_heart_beat_version().value();
-            val.update_time = eps.get_update_timestamp().time_since_epoch().count();
-            for (const auto& [as_type, app_state] : eps.get_application_state_map()) {
+            val.addrs = fmt::to_string(i.first);
+            val.is_alive = i.second.is_alive();
+            val.generation = i.second.get_heart_beat_state().get_generation().value();
+            val.version = i.second.get_heart_beat_state().get_heart_beat_version().value();
+            val.update_time = i.second.get_update_timestamp().time_since_epoch().count();
+            for (auto a : i.second.get_application_state_map()) {
                fd::version_value version_val;
                // We return the enum index and not it's name to stay compatible to origin
                // method that the state index are static but the name can be changed.
-                version_val.application_state = static_cast<std::underlying_type<gms::application_state>::type>(as_type);
-                version_val.value = app_state.value();
-                version_val.version = app_state.version().value();
+                version_val.application_state = static_cast<std::underlying_type<gms::application_state>::type>(a.first);
+                version_val.value = a.second.value();
+                version_val.version = a.second.version().value();
                val.application_state.push(version_val);
            }
-            res.emplace_back(std::move(val));
-        });
+            res.push_back(val);
+        }
        return make_ready_future<json::json_return_type>(res);
    });

@@ -57,9 +56,9 @@ void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {

    fd::get_simple_states.set(r, [&g] (std::unique_ptr<request> req) {
        std::map<sstring, sstring> nodes_status;
-        g.for_each_endpoint_state([&] (const gms::inet_address& node, const gms::endpoint_state&) {
-            nodes_status.emplace(node.to_sstring(), g.is_alive(node) ? "UP" : "DOWN");
-        });
+        for (auto& entry : g.get_endpoint_states()) {
+            nodes_status.emplace(entry.first.to_sstring(), entry.second.is_alive() ? "UP" : "DOWN");
+        }
        return make_ready_future<json::json_return_type>(map_to_key_value<fd::mapper>(nodes_status));
    });

@@ -71,7 +70,7 @@ void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
    });

    fd::get_endpoint_state.set(r, [&g] (std::unique_ptr<request> req) {
-        auto state = g.get_endpoint_state_ptr(gms::inet_address(req->param["addr"]));
+        auto* state = g.get_endpoint_state_for_endpoint_ptr(gms::inet_address(req->param["addr"]));
        if (!state) {
            return make_ready_future<json::json_return_type>(format("unknown endpoint {}", req->param["addr"]));
        }
--- a/api/gossiper.cc
+++ b/api/gossiper.cc
@@ -6,11 +6,8 @@
 * SPDX-License-Identifier: AGPL-3.0-or-later
 */

-#include <seastar/core/coroutine.hh>
-
 #include "gossiper.hh"
 #include "api/api-doc/gossiper.json.hh"
-#include "gms/endpoint_state.hh"
 #include "gms/gossiper.hh"

 namespace api {
@@ -18,9 +15,9 @@ using namespace seastar::httpd;
 using namespace json;

 void set_gossiper(http_context& ctx, routes& r, gms::gossiper& g) {
-    httpd::gossiper_json::get_down_endpoint.set(r, [&g] (std::unique_ptr<request> req) -> future<json::json_return_type> {
-        auto res = co_await g.get_unreachable_members_synchronized();
-        co_return json::json_return_type(container_to_vec(res));
+    httpd::gossiper_json::get_down_endpoint.set(r, [&g] (const_req req) {
+        auto res = g.get_unreachable_members();
+        return container_to_vec(res);
    });


@@ -30,11 +27,9 @@ void set_gossiper(http_context& ctx, routes& r, gms::gossiper& g) {
        });
    });

-    httpd::gossiper_json::get_endpoint_downtime.set(r, [&g] (std::unique_ptr<request> req) -> future<json::json_return_type> {
-        gms::inet_address ep(req->param["addr"]);
-        // synchronize unreachable_members on all shards
-        co_await g.get_unreachable_members_synchronized();
-        co_return g.get_endpoint_downtime(ep);
+    httpd::gossiper_json::get_endpoint_downtime.set(r, [&g] (const_req req) {
+        gms::inet_address ep(req.param["addr"]);
+        return g.get_endpoint_downtime(ep);
    });

    httpd::gossiper_json::get_current_generation_number.set(r, [&g] (std::unique_ptr<http::request> req) {
@@ -64,7 +59,7 @@ void set_gossiper(http_context& ctx, routes& r, gms::gossiper& g) {

    httpd::gossiper_json::force_remove_endpoint.set(r, [&g](std::unique_ptr<http::request> req) {
        gms::inet_address ep(req->param["addr"]);
-        return g.force_remove_endpoint(ep, gms::null_permit_id).then([] {
+        return g.force_remove_endpoint(ep).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
    });
--- a/api/hinted_handoff.cc
+++ b/api/hinted_handoff.cc
@@ -13,6 +13,7 @@
 #include "api/api-doc/hinted_handoff.json.hh"

 #include "gms/inet_address.hh"
+#include "gms/gossiper.hh"
 #include "service/storage_proxy.hh"

 namespace api {
@@ -21,33 +22,38 @@ using namespace json;
 using namespace seastar::httpd;
 namespace hh = httpd::hinted_handoff_json;

-void set_hinted_handoff(http_context& ctx, routes& r, sharded<service::storage_proxy>& proxy) {
-    hh::create_hints_sync_point.set(r, [&proxy] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
-        auto parse_hosts_list = [] (sstring arg) {
+void set_hinted_handoff(http_context& ctx, routes& r, gms::gossiper& g) {
+    hh::create_hints_sync_point.set(r, [&ctx, &g] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
+        auto parse_hosts_list = [&g] (sstring arg) {
            std::vector<sstring> hosts_str = split(arg, ",");
            std::vector<gms::inet_address> hosts;
            hosts.reserve(hosts_str.size());

-            for (const auto& host_str : hosts_str) {
-                try {
-                    gms::inet_address host;
-                    host = gms::inet_address(host_str);
-                    hosts.push_back(host);
-                } catch (std::exception& e) {
-                    throw httpd::bad_param_exception(format("Failed to parse host address {}: {}", host_str, e.what()));
+            if (hosts_str.empty()) {
+                // No target_hosts specified means that we should wait for hints for all nodes to be sent
+                const auto members_set = g.get_live_members();
+                std::copy(members_set.begin(), members_set.end(), std::back_inserter(hosts));
+            } else {
+                for (const auto& host_str : hosts_str) {
+                    try {
+                        gms::inet_address host;
+                        host = gms::inet_address(host_str);
+                        hosts.push_back(host);
+                    } catch (std::exception& e) {
+                        throw httpd::bad_param_exception(format("Failed to parse host address {}: {}", host_str, e.what()));
+                    }
                }
            }
-
            return hosts;
        };

        std::vector<gms::inet_address> target_hosts = parse_hosts_list(req->get_query_param("target_hosts"));
-        return proxy.local().create_hint_sync_point(std::move(target_hosts)).then([] (db::hints::sync_point sync_point) {
+        return ctx.sp.local().create_hint_sync_point(std::move(target_hosts)).then([] (db::hints::sync_point sync_point) {
            return json::json_return_type(sync_point.encode());
        });
    });

-    hh::get_hints_sync_point.set(r, [&proxy] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
+    hh::get_hints_sync_point.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
        db::hints::sync_point sync_point;
        const sstring encoded = req->get_query_param("id");
        try {
@@ -81,7 +87,7 @@ void set_hinted_handoff(http_context& ctx, routes& r, sharded<service::storage_p
        using return_type = hh::ns_get_hints_sync_point::get_hints_sync_point_return_type;
        using return_type_wrapper = hh::ns_get_hints_sync_point::return_type_wrapper;

-        return proxy.local().wait_for_hint_sync_point(std::move(sync_point), deadline).then([] {
+        return ctx.sp.local().wait_for_hint_sync_point(std::move(sync_point), deadline).then([] {
            return json::json_return_type(return_type_wrapper(return_type::DONE));
        }).handle_exception_type([] (const timed_out_error&) {
            return json::json_return_type(return_type_wrapper(return_type::IN_PROGRESS));
--- a/api/hinted_handoff.hh
+++ b/api/hinted_handoff.hh
@@ -8,14 +8,17 @@

 #pragma once

-#include <seastar/core/sharded.hh>
 #include "api.hh"

-namespace service { class storage_proxy; }
+namespace gms {
+
+class gossiper;
+
+}

 namespace api {

-void set_hinted_handoff(http_context& ctx, httpd::routes& r, sharded<service::storage_proxy>& p);
+void set_hinted_handoff(http_context& ctx, httpd::routes& r, gms::gossiper& g);
 void unset_hinted_handoff(http_context& ctx, httpd::routes& r);

 }
--- a/api/storage_proxy.cc
+++ b/api/storage_proxy.cc
@@ -10,6 +10,7 @@
 #include "service/storage_proxy.hh"
 #include "api/api-doc/storage_proxy.json.hh"
 #include "api/api-doc/utils.json.hh"
+#include "service/storage_service.hh"
 #include "db/config.hh"
 #include "utils/histogram.hh"
 #include "replica/database.hh"
@@ -115,17 +116,17 @@ utils_json::estimated_histogram time_to_json_histogram(const utils::time_estimat
    return res;
 }

-static future<json::json_return_type>  sum_estimated_histogram(sharded<service::storage_proxy>& proxy, utils::timed_rate_moving_average_summary_and_histogram service::storage_proxy_stats::stats::*f) {
-    return two_dimensional_map_reduce(proxy, [f] (service::storage_proxy_stats::stats& stats) {
+static future<json::json_return_type>  sum_estimated_histogram(http_context& ctx, utils::timed_rate_moving_average_summary_and_histogram service::storage_proxy_stats::stats::*f) {
+    return two_dimensional_map_reduce(ctx.sp, [f] (service::storage_proxy_stats::stats& stats) {
        return (stats.*f).histogram();
    }, utils::time_estimated_histogram_merge, utils::time_estimated_histogram()).then([](const utils::time_estimated_histogram& val) {
        return make_ready_future<json::json_return_type>(time_to_json_histogram(val));
    });
 }

-static future<json::json_return_type>  sum_estimated_histogram(sharded<service::storage_proxy>& proxy, utils::estimated_histogram service::storage_proxy_stats::stats::*f) {
+static future<json::json_return_type>  sum_estimated_histogram(http_context& ctx, utils::estimated_histogram service::storage_proxy_stats::stats::*f) {

-    return two_dimensional_map_reduce(proxy, f, utils::estimated_histogram_merge,
+    return two_dimensional_map_reduce(ctx.sp, f, utils::estimated_histogram_merge,
            utils::estimated_histogram()).then([](const utils::estimated_histogram& val) {
        utils_json::estimated_histogram res;
        res = val;
@@ -133,8 +134,8 @@ static future<json::json_return_type>  sum_estimated_histogram(sharded<service::
    });
 }

-static future<json::json_return_type>  total_latency(sharded<service::storage_proxy>& proxy, utils::timed_rate_moving_average_summary_and_histogram service::storage_proxy_stats::stats::*f) {
-    return two_dimensional_map_reduce(proxy, [f] (service::storage_proxy_stats::stats& stats) {
+static future<json::json_return_type>  total_latency(http_context& ctx, utils::timed_rate_moving_average_summary_and_histogram service::storage_proxy_stats::stats::*f) {
+    return two_dimensional_map_reduce(ctx.sp, [f] (service::storage_proxy_stats::stats& stats) {
            return (stats.*f).hist.mean * (stats.*f).hist.count;
        }, std::plus<double>(), 0.0).then([](double val) {
        int64_t res = val;
@@ -183,43 +184,43 @@ sum_timer_stats_storage_proxy(distributed<proxy>& d,
    });
 }

-void set_storage_proxy(http_context& ctx, routes& r, sharded<service::storage_proxy>& proxy) {
+void set_storage_proxy(http_context& ctx, routes& r, sharded<service::storage_service>& ss) {
    sp::get_total_hints.set(r, [](std::unique_ptr<http::request> req)  {
        //TBD
        unimplemented();
        return make_ready_future<json::json_return_type>(0);
    });

-    sp::get_hinted_handoff_enabled.set(r, [&proxy](std::unique_ptr<http::request> req)  {
-        const auto& filter = proxy.local().get_hints_host_filter();
+    sp::get_hinted_handoff_enabled.set(r, [&ctx](std::unique_ptr<http::request> req)  {
+        const auto& filter = ctx.sp.local().get_hints_host_filter();
        return make_ready_future<json::json_return_type>(!filter.is_disabled_for_all());
    });

-    sp::set_hinted_handoff_enabled.set(r, [&proxy](std::unique_ptr<http::request> req)  {
+    sp::set_hinted_handoff_enabled.set(r, [&ctx](std::unique_ptr<http::request> req)  {
        auto enable = req->get_query_param("enable");
        auto filter = (enable == "true" || enable == "1")
                ? db::hints::host_filter(db::hints::host_filter::enabled_for_all_tag {})
                : db::hints::host_filter(db::hints::host_filter::disabled_for_all_tag {});
-        return proxy.invoke_on_all([filter = std::move(filter)] (service::storage_proxy& sp) {
+        return ctx.sp.invoke_on_all([filter = std::move(filter)] (service::storage_proxy& sp) {
            return sp.change_hints_host_filter(filter);
        }).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
    });

-    sp::get_hinted_handoff_enabled_by_dc.set(r, [&proxy](std::unique_ptr<http::request> req)  {
+    sp::get_hinted_handoff_enabled_by_dc.set(r, [&ctx](std::unique_ptr<http::request> req)  {
        std::vector<sstring> res;
-        const auto& filter = proxy.local().get_hints_host_filter();
+        const auto& filter = ctx.sp.local().get_hints_host_filter();
        const auto& dcs = filter.get_dcs();
        res.reserve(res.size());
        std::copy(dcs.begin(), dcs.end(), std::back_inserter(res));
        return make_ready_future<json::json_return_type>(res);
    });

-    sp::set_hinted_handoff_enabled_by_dc_list.set(r, [&proxy](std::unique_ptr<http::request> req)  {
+    sp::set_hinted_handoff_enabled_by_dc_list.set(r, [&ctx](std::unique_ptr<http::request> req)  {
        auto dcs = req->get_query_param("dcs");
        auto filter = db::hints::host_filter::parse_from_dc_list(std::move(dcs));
-        return proxy.invoke_on_all([filter = std::move(filter)] (service::storage_proxy& sp) {
+        return ctx.sp.invoke_on_all([filter = std::move(filter)] (service::storage_proxy& sp) {
            return sp.change_hints_host_filter(filter);
        }).then([] {
            return make_ready_future<json::json_return_type>(json_void());
@@ -341,131 +342,144 @@ void set_storage_proxy(http_context& ctx, routes& r, sharded<service::storage_pr
        return make_ready_future<json::json_return_type>(json_void());
    });

-    sp::get_read_repair_attempted.set(r, [&proxy](std::unique_ptr<http::request> req)  {
-        return sum_stats_storage_proxy(proxy, &service::storage_proxy_stats::stats::read_repair_attempts);
+    sp::get_read_repair_attempted.set(r, [&ctx](std::unique_ptr<http::request> req)  {
+        return sum_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::read_repair_attempts);
    });

-    sp::get_read_repair_repaired_blocking.set(r, [&proxy](std::unique_ptr<http::request> req)  {
-        return sum_stats_storage_proxy(proxy, &service::storage_proxy_stats::stats::read_repair_repaired_blocking);
+    sp::get_read_repair_repaired_blocking.set(r, [&ctx](std::unique_ptr<http::request> req)  {
+        return sum_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::read_repair_repaired_blocking);
    });

-    sp::get_read_repair_repaired_background.set(r, [&proxy](std::unique_ptr<http::request> req)  {
-        return sum_stats_storage_proxy(proxy, &service::storage_proxy_stats::stats::read_repair_repaired_background);
+    sp::get_read_repair_repaired_background.set(r, [&ctx](std::unique_ptr<http::request> req)  {
+        return sum_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::read_repair_repaired_background);
    });

-    sp::get_cas_read_timeouts.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timed_rate_as_long(proxy, &proxy::stats::cas_read_timeouts);
+    sp::get_schema_versions.set(r, [&ss](std::unique_ptr<http::request> req)  {
+        return ss.local().describe_schema_versions().then([] (auto result) {
+            std::vector<sp::mapper_list> res;
+            for (auto e : result) {
+                sp::mapper_list entry;
+                entry.key = std::move(e.first);
+                entry.value = std::move(e.second);
+                res.emplace_back(std::move(entry));
+            }
+            return make_ready_future<json::json_return_type>(std::move(res));
+        });
    });

-    sp::get_cas_read_unavailables.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timed_rate_as_long(proxy, &proxy::stats::cas_read_unavailables);
+    sp::get_cas_read_timeouts.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timed_rate_as_long(ctx.sp, &proxy::stats::cas_read_timeouts);
    });

-    sp::get_cas_write_timeouts.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timed_rate_as_long(proxy, &proxy::stats::cas_write_timeouts);
+    sp::get_cas_read_unavailables.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timed_rate_as_long(ctx.sp, &proxy::stats::cas_read_unavailables);
    });

-    sp::get_cas_write_unavailables.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timed_rate_as_long(proxy, &proxy::stats::cas_write_unavailables);
+    sp::get_cas_write_timeouts.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timed_rate_as_long(ctx.sp, &proxy::stats::cas_write_timeouts);
    });

-    sp::get_cas_write_metrics_unfinished_commit.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_stats(proxy, &proxy::stats::cas_write_unfinished_commit);
+    sp::get_cas_write_unavailables.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timed_rate_as_long(ctx.sp, &proxy::stats::cas_write_unavailables);
    });

-    sp::get_cas_write_metrics_contention.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_estimated_histogram(proxy, &proxy::stats::cas_write_contention);
+    sp::get_cas_write_metrics_unfinished_commit.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_stats(ctx.sp, &proxy::stats::cas_write_unfinished_commit);
    });

-    sp::get_cas_write_metrics_condition_not_met.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_stats(proxy, &proxy::stats::cas_write_condition_not_met);
+    sp::get_cas_write_metrics_contention.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_estimated_histogram(ctx, &proxy::stats::cas_write_contention);
    });

-    sp::get_cas_write_metrics_failed_read_round_optimization.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_stats(proxy, &proxy::stats::cas_failed_read_round_optimization);
+    sp::get_cas_write_metrics_condition_not_met.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_stats(ctx.sp, &proxy::stats::cas_write_condition_not_met);
    });

-    sp::get_cas_read_metrics_unfinished_commit.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_stats(proxy, &proxy::stats::cas_read_unfinished_commit);
+    sp::get_cas_write_metrics_failed_read_round_optimization.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_stats(ctx.sp, &proxy::stats::cas_failed_read_round_optimization);
    });

-    sp::get_cas_read_metrics_contention.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_estimated_histogram(proxy, &proxy::stats::cas_read_contention);
+    sp::get_cas_read_metrics_unfinished_commit.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_stats(ctx.sp, &proxy::stats::cas_read_unfinished_commit);
    });

-    sp::get_read_metrics_timeouts.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timed_rate_as_long(proxy, &service::storage_proxy_stats::stats::read_timeouts);
+    sp::get_cas_read_metrics_contention.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_estimated_histogram(ctx, &proxy::stats::cas_read_contention);
    });

-    sp::get_read_metrics_unavailables.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timed_rate_as_long(proxy, &service::storage_proxy_stats::stats::read_unavailables);
+    sp::get_read_metrics_timeouts.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::read_timeouts);
    });

-    sp::get_range_metrics_timeouts.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timed_rate_as_long(proxy, &service::storage_proxy_stats::stats::range_slice_timeouts);
+    sp::get_read_metrics_unavailables.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::read_unavailables);
    });

-    sp::get_range_metrics_unavailables.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timed_rate_as_long(proxy, &service::storage_proxy_stats::stats::range_slice_unavailables);
+    sp::get_range_metrics_timeouts.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::range_slice_timeouts);
    });

-    sp::get_write_metrics_timeouts.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timed_rate_as_long(proxy, &service::storage_proxy_stats::stats::write_timeouts);
+    sp::get_range_metrics_unavailables.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::range_slice_unavailables);
    });

-    sp::get_write_metrics_unavailables.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timed_rate_as_long(proxy, &service::storage_proxy_stats::stats::write_unavailables);
+    sp::get_write_metrics_timeouts.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::write_timeouts);
    });

-    sp::get_read_metrics_timeouts_rates.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timed_rate_as_obj(proxy, &service::storage_proxy_stats::stats::read_timeouts);
+    sp::get_write_metrics_unavailables.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::write_unavailables);
    });

-    sp::get_read_metrics_unavailables_rates.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timed_rate_as_obj(proxy, &service::storage_proxy_stats::stats::read_unavailables);
+    sp::get_read_metrics_timeouts_rates.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::read_timeouts);
    });

-    sp::get_range_metrics_timeouts_rates.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timed_rate_as_obj(proxy, &service::storage_proxy_stats::stats::range_slice_timeouts);
+    sp::get_read_metrics_unavailables_rates.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::read_unavailables);
    });

-    sp::get_range_metrics_unavailables_rates.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timed_rate_as_obj(proxy, &service::storage_proxy_stats::stats::range_slice_unavailables);
+    sp::get_range_metrics_timeouts_rates.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::range_slice_timeouts);
    });

-    sp::get_write_metrics_timeouts_rates.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timed_rate_as_obj(proxy, &service::storage_proxy_stats::stats::write_timeouts);
+    sp::get_range_metrics_unavailables_rates.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::range_slice_unavailables);
    });

-    sp::get_write_metrics_unavailables_rates.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timed_rate_as_obj(proxy, &service::storage_proxy_stats::stats::write_unavailables);
+    sp::get_write_metrics_timeouts_rates.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::write_timeouts);
    });

-    sp::get_range_metrics_latency_histogram_depricated.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_histogram_stats_storage_proxy(proxy, &service::storage_proxy_stats::stats::range);
+    sp::get_write_metrics_unavailables_rates.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::write_unavailables);
    });

-    sp::get_write_metrics_latency_histogram_depricated.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_histogram_stats_storage_proxy(proxy, &service::storage_proxy_stats::stats::write);
+    sp::get_range_metrics_latency_histogram_depricated.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_histogram_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::range);
    });

-    sp::get_read_metrics_latency_histogram_depricated.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_histogram_stats_storage_proxy(proxy, &service::storage_proxy_stats::stats::read);
+    sp::get_write_metrics_latency_histogram_depricated.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_histogram_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::write);
    });

-    sp::get_range_metrics_latency_histogram.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timer_stats_storage_proxy(proxy, &service::storage_proxy_stats::stats::range);
+    sp::get_read_metrics_latency_histogram_depricated.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_histogram_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::read);
    });

-    sp::get_write_metrics_latency_histogram.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timer_stats_storage_proxy(proxy, &service::storage_proxy_stats::stats::write);
-    });
-    sp::get_cas_write_metrics_latency_histogram.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timer_stats(proxy, &proxy::stats::cas_write);
+    sp::get_range_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timer_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::range);
    });

-    sp::get_cas_read_metrics_latency_histogram.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timer_stats(proxy, &proxy::stats::cas_read);
+    sp::get_write_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timer_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::write);
+    });
+    sp::get_cas_write_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timer_stats(ctx.sp, &proxy::stats::cas_write);
+    });
+
+    sp::get_cas_read_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timer_stats(ctx.sp, &proxy::stats::cas_read);
    });

    sp::get_view_write_metrics_latency_histogram.set(r, [](std::unique_ptr<http::request> req) {
@@ -476,31 +490,31 @@ void set_storage_proxy(http_context& ctx, routes& r, sharded<service::storage_pr
        return make_ready_future<json::json_return_type>(get_empty_moving_average());
    });

-    sp::get_read_metrics_latency_histogram.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timer_stats_storage_proxy(proxy, &service::storage_proxy_stats::stats::read);
+    sp::get_read_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timer_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::read);
    });

-    sp::get_read_estimated_histogram.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_estimated_histogram(proxy, &service::storage_proxy_stats::stats::read);
+    sp::get_read_estimated_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_estimated_histogram(ctx, &service::storage_proxy_stats::stats::read);
    });

-    sp::get_read_latency.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return total_latency(proxy, &service::storage_proxy_stats::stats::read);
+    sp::get_read_latency.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return total_latency(ctx, &service::storage_proxy_stats::stats::read);
    });
-    sp::get_write_estimated_histogram.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_estimated_histogram(proxy, &service::storage_proxy_stats::stats::write);
+    sp::get_write_estimated_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_estimated_histogram(ctx, &service::storage_proxy_stats::stats::write);
    });

-    sp::get_write_latency.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return total_latency(proxy, &service::storage_proxy_stats::stats::write);
+    sp::get_write_latency.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return total_latency(ctx, &service::storage_proxy_stats::stats::write);
    });

-    sp::get_range_estimated_histogram.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return sum_timer_stats_storage_proxy(proxy, &service::storage_proxy_stats::stats::range);
+    sp::get_range_estimated_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return sum_timer_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::range);
    });

-    sp::get_range_latency.set(r, [&proxy](std::unique_ptr<http::request> req) {
-        return total_latency(proxy, &service::storage_proxy_stats::stats::range);
+    sp::get_range_latency.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        return total_latency(ctx, &service::storage_proxy_stats::stats::range);
    });
 }

@@ -533,6 +547,7 @@ void unset_storage_proxy(http_context& ctx, routes& r) {
    sp::get_read_repair_attempted.unset(r);
    sp::get_read_repair_repaired_blocking.unset(r);
    sp::get_read_repair_repaired_background.unset(r);
+    sp::get_schema_versions.unset(r);
    sp::get_cas_read_timeouts.unset(r);
    sp::get_cas_read_unavailables.unset(r);
    sp::get_cas_write_timeouts.unset(r);
--- a/api/storage_proxy.hh
+++ b/api/storage_proxy.hh
@@ -11,11 +11,11 @@
 #include <seastar/core/sharded.hh>
 #include "api.hh"

-namespace service { class storage_proxy; }
+namespace service { class storage_service; }

 namespace api {

-void set_storage_proxy(http_context& ctx, httpd::routes& r, sharded<service::storage_proxy>& proxy);
+void set_storage_proxy(http_context& ctx, httpd::routes& r, sharded<service::storage_service>& ss);
 void unset_storage_proxy(http_context& ctx, httpd::routes& r);

 }
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -8,7 +8,6 @@

 #include "storage_service.hh"
 #include "api/api-doc/storage_service.json.hh"
-#include "api/api-doc/storage_proxy.json.hh"
 #include "db/config.hh"
 #include "db/schema_tables.hh"
 #include "utils/hash.hh"
@@ -43,6 +42,7 @@
 #include "thrift/controller.hh"
 #include "locator/token_metadata.hh"
 #include "cdc/generation_service.hh"
+#include "service/storage_proxy.hh"
 #include "locator/abstract_replication_strategy.hh"
 #include "sstables_loader.hh"
 #include "db/view/view_builder.hh"
@@ -52,10 +52,22 @@ using namespace std::chrono_literals;

 extern logging::logger apilog;

+namespace std {
+
+std::ostream& operator<<(std::ostream& os, const api::table_info& ti) {
+    fmt::print(os, "table{{name={}, id={}}}", ti.name, ti.id);
+    return os;
+}
+
+} // namespace std
+
 namespace api {

+const locator::token_metadata& http_context::get_token_metadata() {
+        return *shared_token_metadata.local().get();
+}
+
 namespace ss = httpd::storage_service_json;
-namespace sp = httpd::storage_proxy_json;
 using namespace json;

 sstring validate_keyspace(http_context& ctx, sstring ks_name) {
@@ -208,47 +220,32 @@ seastar::future<json::json_return_type> run_toppartitions_query(db::toppartition
    });
 }

-static future<json::json_return_type> set_tables(http_context& ctx, const sstring& keyspace, std::vector<sstring> tables, std::function<future<>(replica::table&)> set) {
+future<json::json_return_type> set_tables_autocompaction(http_context& ctx, const sstring &keyspace, std::vector<sstring> tables, bool enabled) {
    if (tables.empty()) {
        tables = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
    }

-    return do_with(keyspace, std::move(tables), [&ctx, set] (const sstring& keyspace, const std::vector<sstring>& tables) {
-        return ctx.db.invoke_on_all([&keyspace, &tables, set] (replica::database& db) {
-            return parallel_for_each(tables, [&db, &keyspace, set] (const sstring& table) {
-                replica::table& t = db.find_column_family(keyspace, table);
-                return set(t);
-            });
+    apilog.info("set_tables_autocompaction: enabled={} keyspace={} tables={}", enabled, keyspace, tables);
+    return do_with(keyspace, std::move(tables), [&ctx, enabled] (const sstring &keyspace, const std::vector<sstring>& tables) {
+        return ctx.db.invoke_on(0, [&ctx, &keyspace, &tables, enabled] (replica::database& db) {
+            auto g = replica::database::autocompaction_toggle_guard(db);
+            return ctx.db.invoke_on_all([&keyspace, &tables, enabled] (replica::database& db) {
+                return parallel_for_each(tables, [&db, &keyspace, enabled] (const sstring& table) {
+                    replica::column_family& cf = db.find_column_family(keyspace, table);
+                    if (enabled) {
+                        cf.enable_auto_compaction();
+                    } else {
+                        return cf.disable_auto_compaction();
+                    }
+                    return make_ready_future<>();
+                });
+            }).finally([g = std::move(g)] {});
        });
    }).then([] {
        return make_ready_future<json::json_return_type>(json_void());
    });
 }

-future<json::json_return_type> set_tables_autocompaction(http_context& ctx, const sstring &keyspace, std::vector<sstring> tables, bool enabled) {
-    apilog.info("set_tables_autocompaction: enabled={} keyspace={} tables={}", enabled, keyspace, tables);
-
-    return ctx.db.invoke_on(0, [&ctx, keyspace, tables = std::move(tables), enabled] (replica::database& db) {
-        auto g = replica::database::autocompaction_toggle_guard(db);
-        return set_tables(ctx, keyspace, tables, [enabled] (replica::table& cf) {
-            if (enabled) {
-                cf.enable_auto_compaction();
-            } else {
-                return cf.disable_auto_compaction();
-            }
-            return make_ready_future<>();
-        }).finally([g = std::move(g)] {});
-    });
-}
-
-future<json::json_return_type> set_tables_tombstone_gc(http_context& ctx, const sstring &keyspace, std::vector<sstring> tables, bool enabled) {
-    apilog.info("set_tables_tombstone_gc: enabled={} keyspace={} tables={}", enabled, keyspace, tables);
-    return set_tables(ctx, keyspace, std::move(tables), [enabled] (replica::table& t) {
-        t.set_tombstone_gc_enabled(enabled);
-        return make_ready_future<>();
-    });
-}
-
 void set_transport_controller(http_context& ctx, routes& r, cql_transport::controller& ctl) {
    ss::start_native_transport.set(r, [&ctl](std::unique_ptr<http::request> req) {
        return smp::submit_to(0, [&] {
@@ -317,7 +314,7 @@ void set_repair(http_context& ctx, routes& r, sharded<repair_service>& repair) {
    ss::repair_async.set(r, [&ctx, &repair](std::unique_ptr<http::request> req) {
        static std::vector<sstring> options = {"primaryRange", "parallelism", "incremental",
                "jobThreads", "ranges", "columnFamilies", "dataCenters", "hosts", "ignore_nodes", "trace",
-                "startToken", "endToken", "ranges_parallelism"};
+                "startToken", "endToken" };
        std::unordered_map<sstring, sstring> options_map;
        for (auto o : options) {
            auto s = req->get_query_param(o);
@@ -462,21 +459,29 @@ static future<json::json_return_type> describe_ring_as_json(sharded<service::sto
    co_return json::json_return_type(stream_range_as_array(co_await ss.local().describe_ring(keyspace), token_range_endpoints_to_json));
 }

-void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_service>& ss, service::raft_group0_client& group0_client) {
-    ss::local_hostid.set(r, [&ss](std::unique_ptr<http::request> req) {
-        auto id = ss.local().get_token_metadata().get_my_id();
+static std::vector<table_id> get_table_ids(const std::vector<table_info>& table_infos) {
+    std::vector<table_id> table_ids{table_infos.size()};
+    boost::transform(table_infos, table_ids.begin(), [] (const auto& ti) {
+        return ti.id;
+    });
+    return table_ids;
+}
+
+void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_service>& ss, gms::gossiper& g, sharded<cdc::generation_service>& cdc_gs, sharded<db::system_keyspace>& sys_ks) {
+    ss::local_hostid.set(r, [&ctx](std::unique_ptr<http::request> req) {
+        auto id = ctx.db.local().get_config().host_id;
        return make_ready_future<json::json_return_type>(id.to_sstring());
    });

-    ss::get_tokens.set(r, [&ss] (std::unique_ptr<http::request> req) {
-        return make_ready_future<json::json_return_type>(stream_range_as_array(ss.local().get_token_metadata().sorted_tokens(), [](const dht::token& i) {
+    ss::get_tokens.set(r, [&ctx] (std::unique_ptr<http::request> req) {
+        return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.get_token_metadata().sorted_tokens(), [](const dht::token& i) {
           return fmt::to_string(i);
        }));
    });

-    ss::get_node_tokens.set(r, [&ss] (std::unique_ptr<http::request> req) {
+    ss::get_node_tokens.set(r, [&ctx] (std::unique_ptr<http::request> req) {
        gms::inet_address addr(req->param["endpoint"]);
-        return make_ready_future<json::json_return_type>(stream_range_as_array(ss.local().get_token_metadata().get_tokens(addr), [](const dht::token& i) {
+        return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.get_token_metadata().get_tokens(addr), [](const dht::token& i) {
           return fmt::to_string(i);
       }));
    });
@@ -544,8 +549,8 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        });
    });

-    ss::get_leaving_nodes.set(r, [&ss](const_req req) {
-        return container_to_vec(ss.local().get_token_metadata().get_leaving_endpoints());
+    ss::get_leaving_nodes.set(r, [&ctx](const_req req) {
+        return container_to_vec(ctx.get_token_metadata().get_leaving_endpoints());
    });

    ss::get_moving_nodes.set(r, [](const_req req) {
@@ -553,8 +558,8 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        return container_to_vec(addr);
    });

-    ss::get_joining_nodes.set(r, [&ss](const_req req) {
-        auto points = ss.local().get_token_metadata().get_bootstrap_tokens();
+    ss::get_joining_nodes.set(r, [&ctx](const_req req) {
+        auto points = ctx.get_token_metadata().get_bootstrap_tokens();
        std::unordered_set<sstring> addr;
        for (auto i: points) {
            addr.insert(fmt::to_string(i.second));
@@ -614,7 +619,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_

    ss::describe_any_ring.set(r, [&ctx, &ss](std::unique_ptr<http::request> req) {
        // Find an arbitrary non-system keyspace.
-        auto keyspaces = ctx.db.local().get_non_local_vnode_based_strategy_keyspaces();
+        auto keyspaces = ctx.db.local().get_non_local_strategy_keyspaces();
        if (keyspaces.empty()) {
            throw std::runtime_error("No keyspace provided and no non system kespace exist");
        }
@@ -626,9 +631,9 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        return describe_ring_as_json(ss, validate_keyspace(ctx, req->param));
    });

-    ss::get_host_id_map.set(r, [&ss](const_req req) {
+    ss::get_host_id_map.set(r, [&ctx](const_req req) {
        std::vector<ss::mapper> res;
-        return map_to_key_value(ss.local().get_token_metadata().get_endpoint_to_host_id_map_for_reading(), res);
+        return map_to_key_value(ctx.get_token_metadata().get_endpoint_to_host_id_map_for_reading(), res);
    });

    ss::get_load.set(r, [&ctx](std::unique_ptr<http::request> req) {
@@ -648,9 +653,9 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        });
    });

-    ss::get_current_generation_number.set(r, [&ss](std::unique_ptr<http::request> req) {
+    ss::get_current_generation_number.set(r, [&g](std::unique_ptr<http::request> req) {
        gms::inet_address ep(utils::fb_utilities::get_broadcast_address());
-        return ss.local().gossiper().get_current_generation_number(ep).then([](gms::generation_type res) {
+        return g.get_current_generation_number(ep).then([](gms::generation_type res) {
            return make_ready_future<json::json_return_type>(res.value());
        });
    });
@@ -661,10 +666,11 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
                req.get_query_param("key")));
    });

-    ss::cdc_streams_check_and_repair.set(r, [&ss] (std::unique_ptr<http::request> req) {
-        return ss.invoke_on(0, [] (service::storage_service& ss) {
-            return ss.check_and_repair_cdc_streams();
-        }).then([] {
+    ss::cdc_streams_check_and_repair.set(r, [&cdc_gs] (std::unique_ptr<http::request> req) {
+        if (!cdc_gs.local_is_initialized()) {
+            throw std::runtime_error("get_cdc_generation_service: not initialized yet");
+        }
+        return cdc_gs.local().check_and_repair_cdc_streams().then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
    });
@@ -676,7 +682,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        apilog.debug("force_keyspace_compaction: keyspace={} tables={}", keyspace, table_infos);

        auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
-        auto task = co_await compaction_module.make_and_start_task<major_keyspace_compaction_task_impl>({}, std::move(keyspace), db, table_infos);
+        auto task = co_await compaction_module.make_and_start_task<major_keyspace_compaction_task_impl>({}, std::move(keyspace), db, get_table_ids(table_infos));
        try {
            co_await task->done();
        } catch (...) {
@@ -699,7 +705,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        }

        auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
-        auto task = co_await compaction_module.make_and_start_task<cleanup_keyspace_compaction_task_impl>({}, std::move(keyspace), db, table_infos);
+        auto task = co_await compaction_module.make_and_start_task<cleanup_keyspace_compaction_task_impl>({}, std::move(keyspace), db, get_table_ids(table_infos));
        try {
            co_await task->done();
        } catch (...) {
@@ -714,7 +720,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        apilog.info("perform_keyspace_offstrategy_compaction: keyspace={} tables={}", keyspace, table_infos);
        bool res = false;
        auto& compaction_module = ctx.db.local().get_compaction_manager().get_task_manager_module();
-        auto task = co_await compaction_module.make_and_start_task<offstrategy_keyspace_compaction_task_impl>({}, std::move(keyspace), ctx.db, table_infos, res);
+        auto task = co_await compaction_module.make_and_start_task<offstrategy_keyspace_compaction_task_impl>({}, std::move(keyspace), ctx.db, get_table_ids(table_infos), res);
        try {
            co_await task->done();
        } catch (...) {
@@ -732,7 +738,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        apilog.info("upgrade_sstables: keyspace={} tables={} exclude_current_version={}", keyspace, table_infos, exclude_current_version);

        auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
-        auto task = co_await compaction_module.make_and_start_task<upgrade_sstables_compaction_task_impl>({}, std::move(keyspace), db, table_infos, exclude_current_version);
+        auto task = co_await compaction_module.make_and_start_task<upgrade_sstables_compaction_task_impl>({}, std::move(keyspace), db, get_table_ids(table_infos), exclude_current_version);
        try {
            co_await task->done();
        } catch (...) {
@@ -773,16 +779,21 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_

    ss::remove_node.set(r, [&ss](std::unique_ptr<http::request> req) {
        auto host_id = validate_host_id(req->get_query_param("host_id"));
-        std::vector<sstring> ignore_nodes_strs = utils::split_comma_separated_list(req->get_query_param("ignore_nodes"));
+        std::vector<sstring> ignore_nodes_strs= split(req->get_query_param("ignore_nodes"), ",");
        apilog.info("remove_node: host_id={} ignore_nodes={}", host_id, ignore_nodes_strs);
        auto ignore_nodes = std::list<locator::host_id_or_endpoint>();
-        for (const sstring& n : ignore_nodes_strs) {
+        for (std::string n : ignore_nodes_strs) {
            try {
-                auto hoep = locator::host_id_or_endpoint(n);
-                if (!ignore_nodes.empty() && hoep.has_host_id() != ignore_nodes.front().has_host_id()) {
-                    throw std::runtime_error("All nodes should be identified using the same method: either Host IDs or ip addresses.");
+                std::replace(n.begin(), n.end(), '\"', ' ');
+                std::replace(n.begin(), n.end(), '\'', ' ');
+                boost::trim_all(n);
+                if (!n.empty()) {
+                    auto hoep = locator::host_id_or_endpoint(n);
+                    if (!ignore_nodes.empty() && hoep.has_host_id() != ignore_nodes.front().has_host_id()) {
+                        throw std::runtime_error("All nodes should be identified using the same method: either Host IDs or ip addresses.");
+                    }
+                    ignore_nodes.push_back(std::move(hoep));
                }
-                ignore_nodes.push_back(std::move(hoep));
            } catch (...) {
                throw std::runtime_error(format("Failed to parse ignore_nodes parameter: ignore_nodes={}, node={}: {}", ignore_nodes_strs, n, std::current_exception()));
            }
@@ -895,11 +906,11 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        return make_ready_future<json::json_return_type>(json_void());
    });

-    ss::is_initialized.set(r, [&ss](std::unique_ptr<http::request> req) {
-        return ss.local().get_operation_mode().then([&ss] (auto mode) {
+    ss::is_initialized.set(r, [&ss, &g](std::unique_ptr<http::request> req) {
+        return ss.local().get_operation_mode().then([&g] (auto mode) {
            bool is_initialized = mode >= service::storage_service::mode::STARTING;
            if (mode == service::storage_service::mode::NORMAL) {
-                is_initialized = ss.local().gossiper().is_enabled();
+                is_initialized = g.is_enabled();
            }
            return make_ready_future<json::json_return_type>(is_initialized);
        });
@@ -968,9 +979,10 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
                ks.set_incremental_backups(value);
            }

-            db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> table) {
-                table->set_incremental_backups(value);
-            });
+            for (auto& pair: db.get_column_families()) {
+                auto cf_ptr = pair.second;
+                cf_ptr->set_incremental_backups(value);
+            }
        }).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
@@ -1011,11 +1023,13 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        return make_ready_future<json::json_return_type>(res);
    });

-    ss::reset_local_schema.set(r, [&ss](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
+    ss::reset_local_schema.set(r, [&ctx, &sys_ks](std::unique_ptr<http::request> req) {
        // FIXME: We should truncate schema tables if more than one node in the cluster.
+        auto& fs = ctx.sp.local().features();
        apilog.info("reset_local_schema");
-        co_await ss.local().reload_schema();
-        co_return json_void();
+        return db::schema_tables::recalculate_schema_version(sys_ks, ctx.sp, fs).then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
    });

    ss::set_trace_probability.set(r, [](std::unique_ptr<http::request> req) {
@@ -1097,22 +1111,6 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        return set_tables_autocompaction(ctx, keyspace, tables, false);
    });

-    ss::enable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
-        auto keyspace = validate_keyspace(ctx, req->param);
-        auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");
-
-        apilog.info("enable_tombstone_gc: keyspace={} tables={}", keyspace, tables);
-        return set_tables_tombstone_gc(ctx, keyspace, tables, true);
-    });
-
-    ss::disable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
-        auto keyspace = validate_keyspace(ctx, req->param);
-        auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");
-
-        apilog.info("disable_tombstone_gc: keyspace={} tables={}", keyspace, tables);
-        return set_tables_tombstone_gc(ctx, keyspace, tables, false);
-    });
-
    ss::deliver_hints.set(r, [](std::unique_ptr<http::request> req) {
        //TBD
        unimplemented();
@@ -1120,12 +1118,12 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        return make_ready_future<json::json_return_type>(json_void());
      });

-    ss::get_cluster_name.set(r, [&ss](const_req req) {
-        return ss.local().gossiper().get_cluster_name();
+    ss::get_cluster_name.set(r, [&g](const_req req) {
+        return g.get_cluster_name();
    });

-    ss::get_partitioner_name.set(r, [&ss](const_req req) {
-        return ss.local().gossiper().get_partitioner_name();
+    ss::get_partitioner_name.set(r, [&g](const_req req) {
+        return g.get_partitioner_name();
    });

    ss::get_tombstone_warn_threshold.set(r, [](std::unique_ptr<http::request> req) {
@@ -1243,7 +1241,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_

                auto& ext = db.get_config().extensions();

-                db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> t) {
+                for (auto& t : db.get_column_families() | boost::adaptors::map_values) {
                    auto& schema = t->schema();
                    if ((ks.empty() || ks == schema->ks_name()) && (cf.empty() || cf == schema->cf_name())) {
                        // at most Nsstables long
@@ -1259,7 +1257,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
                            ss::sstable info;

                            info.timestamp = t;
-                            info.generation = fmt::to_string(sstable->generation());
+                            info.generation = sstables::generation_value(sstable->generation());
                            info.level = sstable->get_sstable_level();
                            info.size = sstable->bytes_on_disk();
                            info.data_size = sstable->ondisk_data_size();
@@ -1324,7 +1322,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
                        }
                        res.emplace_back(std::move(tst));
                    }
-                });
+                }
                std::sort(res.begin(), res.end(), [](const ss::table_sstables& t1, const ss::table_sstables& t2) {
                    return t1.keyspace() < t2.keyspace() || (t1.keyspace() == t2.keyspace() && t1.table() < t2.table());
                });
@@ -1334,123 +1332,6 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
            });
        });
    });
-
-    ss::reload_raft_topology_state.set(r,
-            [&ss, &group0_client] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
-        co_await ss.invoke_on(0, [&group0_client] (service::storage_service& ss) -> future<> {
-            apilog.info("Waiting for group 0 read/apply mutex before reloading Raft topology state...");
-            auto holder = co_await group0_client.hold_read_apply_mutex();
-            apilog.info("Reloading Raft topology state");
-            // Using topology_transition() instead of topology_state_load(), because the former notifies listeners
-            co_await ss.topology_transition();
-            apilog.info("Reloaded Raft topology state");
-        });
-        co_return json_void();
-    });
-
-    sp::get_schema_versions.set(r, [&ss](std::unique_ptr<http::request> req)  {
-        return ss.local().describe_schema_versions().then([] (auto result) {
-            std::vector<sp::mapper_list> res;
-            for (auto e : result) {
-                sp::mapper_list entry;
-                entry.key = std::move(e.first);
-                entry.value = std::move(e.second);
-                res.emplace_back(std::move(entry));
-            }
-            return make_ready_future<json::json_return_type>(std::move(res));
-        });
-    });
-}
-
-void unset_storage_service(http_context& ctx, routes& r) {
-    ss::local_hostid.unset(r);
-    ss::get_tokens.unset(r);
-    ss::get_node_tokens.unset(r);
-    ss::get_commitlog.unset(r);
-    ss::get_token_endpoint.unset(r);
-    ss::toppartitions_generic.unset(r);
-    ss::get_leaving_nodes.unset(r);
-    ss::get_moving_nodes.unset(r);
-    ss::get_joining_nodes.unset(r);
-    ss::get_release_version.unset(r);
-    ss::get_scylla_release_version.unset(r);
-    ss::get_schema_version.unset(r);
-    ss::get_all_data_file_locations.unset(r);
-    ss::get_saved_caches_location.unset(r);
-    ss::get_range_to_endpoint_map.unset(r);
-    ss::get_pending_range_to_endpoint_map.unset(r);
-    ss::describe_any_ring.unset(r);
-    ss::describe_ring.unset(r);
-    ss::get_host_id_map.unset(r);
-    ss::get_load.unset(r);
-    ss::get_load_map.unset(r);
-    ss::get_current_generation_number.unset(r);
-    ss::get_natural_endpoints.unset(r);
-    ss::cdc_streams_check_and_repair.unset(r);
-    ss::force_keyspace_compaction.unset(r);
-    ss::force_keyspace_cleanup.unset(r);
-    ss::perform_keyspace_offstrategy_compaction.unset(r);
-    ss::upgrade_sstables.unset(r);
-    ss::force_keyspace_flush.unset(r);
-    ss::decommission.unset(r);
-    ss::move.unset(r);
-    ss::remove_node.unset(r);
-    ss::get_removal_status.unset(r);
-    ss::force_remove_completion.unset(r);
-    ss::set_logging_level.unset(r);
-    ss::get_logging_levels.unset(r);
-    ss::get_operation_mode.unset(r);
-    ss::is_starting.unset(r);
-    ss::get_drain_progress.unset(r);
-    ss::drain.unset(r);
-    ss::truncate.unset(r);
-    ss::get_keyspaces.unset(r);
-    ss::stop_gossiping.unset(r);
-    ss::start_gossiping.unset(r);
-    ss::is_gossip_running.unset(r);
-    ss::stop_daemon.unset(r);
-    ss::is_initialized.unset(r);
-    ss::join_ring.unset(r);
-    ss::is_joined.unset(r);
-    ss::set_stream_throughput_mb_per_sec.unset(r);
-    ss::get_stream_throughput_mb_per_sec.unset(r);
-    ss::get_compaction_throughput_mb_per_sec.unset(r);
-    ss::set_compaction_throughput_mb_per_sec.unset(r);
-    ss::is_incremental_backups_enabled.unset(r);
-    ss::set_incremental_backups_enabled.unset(r);
-    ss::rebuild.unset(r);
-    ss::bulk_load.unset(r);
-    ss::bulk_load_async.unset(r);
-    ss::reschedule_failed_deletions.unset(r);
-    ss::sample_key_range.unset(r);
-    ss::reset_local_schema.unset(r);
-    ss::set_trace_probability.unset(r);
-    ss::get_trace_probability.unset(r);
-    ss::get_slow_query_info.unset(r);
-    ss::set_slow_query.unset(r);
-    ss::enable_auto_compaction.unset(r);
-    ss::disable_auto_compaction.unset(r);
-    ss::enable_tombstone_gc.unset(r);
-    ss::disable_tombstone_gc.unset(r);
-    ss::deliver_hints.unset(r);
-    ss::get_cluster_name.unset(r);
-    ss::get_partitioner_name.unset(r);
-    ss::get_tombstone_warn_threshold.unset(r);
-    ss::set_tombstone_warn_threshold.unset(r);
-    ss::get_tombstone_failure_threshold.unset(r);
-    ss::set_tombstone_failure_threshold.unset(r);
-    ss::get_batch_size_failure_threshold.unset(r);
-    ss::set_batch_size_failure_threshold.unset(r);
-    ss::set_hinted_handoff_throttle_in_kb.unset(r);
-    ss::get_metrics_load.unset(r);
-    ss::get_exceptions.unset(r);
-    ss::get_total_hints_in_progress.unset(r);
-    ss::get_total_hints.unset(r);
-    ss::get_ownership.unset(r);
-    ss::get_effective_ownership.unset(r);
-    ss::sstable_info.unset(r);
-    ss::reload_raft_topology_state.unset(r);
-    sp::get_schema_versions.unset(r);
 }

 enum class scrub_status {
@@ -1613,12 +1494,27 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
            throw httpd::bad_param_exception(fmt::format("Unknown argument for 'quarantine_mode' parameter: {}", quarantine_mode_str));
        }

-        sstables::compaction_stats stats;
-        auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
-        auto task = co_await compaction_module.make_and_start_task<scrub_sstables_compaction_task_impl>({}, std::move(keyspace), db, column_families, opts, stats);
+        const auto& reduce_compaction_stats = [] (const compaction_manager::compaction_stats_opt& lhs, const compaction_manager::compaction_stats_opt& rhs) {
+            sstables::compaction_stats stats{};
+            stats += lhs.value();
+            stats += rhs.value();
+            return stats;
+        };
+
        try {
-            co_await task->done();
-            if (stats.validation_errors) {
+            auto opt_stats = co_await db.map_reduce0([&] (replica::database& db) {
+                return map_reduce(column_families, [&] (sstring cfname) -> future<std::optional<sstables::compaction_stats>> {
+                    auto& cm = db.get_compaction_manager();
+                    auto& cf = db.find_column_family(keyspace, cfname);
+                    sstables::compaction_stats stats{};
+                    co_await cf.parallel_foreach_table_state([&] (compaction::table_state& ts) mutable -> future<> {
+                        auto r = co_await cm.perform_sstable_scrub(ts, opts);
+                        stats += r.value_or(sstables::compaction_stats{});
+                    });
+                    co_return stats;
+                }, std::make_optional(sstables::compaction_stats{}), reduce_compaction_stats);
+            }, std::make_optional(sstables::compaction_stats{}), reduce_compaction_stats);
+            if (opt_stats && opt_stats->validation_errors) {
                co_return json::json_return_type(static_cast<int>(scrub_status::validation_errors));
            }
        } catch (const sstables::compaction_aborted_exception&) {
--- a/api/storage_service.hh
+++ b/api/storage_service.hh
@@ -25,6 +25,7 @@ class system_keyspace;
 }
 namespace netw { class messaging_service; }
 class repair_service;
+namespace cdc { class generation_service; }
 class sstables_loader;

 namespace gms {
@@ -50,6 +51,11 @@ sstring validate_keyspace(http_context& ctx, const httpd::parameters& param);
 // If the parameter is found and empty, returns a list of all table names in the keyspace.
 std::vector<sstring> parse_tables(const sstring& ks_name, http_context& ctx, const std::unordered_map<sstring, sstring>& query_params, sstring param_name);

+struct table_info {
+    sstring name;
+    table_id id;
+};
+
 // splits a request parameter assumed to hold a comma-separated list of table names
 // verify that the tables are found, otherwise a bad_param_exception exception is thrown
 // containing the description of the respective no_such_column_family error.
@@ -57,8 +63,7 @@ std::vector<sstring> parse_tables(const sstring& ks_name, http_context& ctx, con
 // if the parameter is not found or is empty, returns a list of all table infos in the keyspace.
 std::vector<table_info> parse_table_infos(const sstring& ks_name, http_context& ctx, const std::unordered_map<sstring, sstring>& query_params, sstring param_name);

-void set_storage_service(http_context& ctx, httpd::routes& r, sharded<service::storage_service>& ss, service::raft_group0_client&);
-void unset_storage_service(http_context& ctx, httpd::routes& r);
+void set_storage_service(http_context& ctx, httpd::routes& r, sharded<service::storage_service>& ss, gms::gossiper& g, sharded<cdc::generation_service>& cdc_gs, sharded<db::system_keyspace>& sys_ls);
 void set_sstables_loader(http_context& ctx, httpd::routes& r, sharded<sstables_loader>& sst_loader);
 void unset_sstables_loader(http_context& ctx, httpd::routes& r);
 void set_view_builder(http_context& ctx, httpd::routes& r, sharded<db::view::view_builder>& vb);
@@ -74,3 +79,9 @@ void unset_snapshot(http_context& ctx, httpd::routes& r);
 seastar::future<json::json_return_type> run_toppartitions_query(db::toppartitions_query& q, http_context &ctx, bool legacy_request = false);

 } // namespace api
+
+namespace std {
+
+std::ostream& operator<<(std::ostream& os, const api::table_info& ti);
+
+} // namespace std
--- a/api/system.cc
+++ b/api/system.cc
@@ -7,18 +7,10 @@
 */

 #include "api/api-doc/system.json.hh"
-#include "api/api-doc/metrics.json.hh"
-
 #include "api/api.hh"

 #include <seastar/core/reactor.hh>
-#include <seastar/core/metrics_api.hh>
-#include <seastar/core/relabel_config.hh>
 #include <seastar/http/exception.hh>
-#include <seastar/util/short_streams.hh>
-#include <seastar/http/short_streams.hh>
-#include "utils/rjson.hh"
-
 #include "log.hh"
 #include "replica/database.hh"

@@ -28,77 +20,8 @@ namespace api {
 using namespace seastar::httpd;

 namespace hs = httpd::system_json;
-namespace hm = httpd::metrics_json;

 void set_system(http_context& ctx, routes& r) {
-    hm::get_metrics_config.set(r, [](const_req req) {
-        std::vector<hm::metrics_config> res;
-        res.resize(seastar::metrics::get_relabel_configs().size());
-        size_t i = 0;
-        for (auto&& r : seastar::metrics::get_relabel_configs()) {
-            res[i].action = r.action;
-            res[i].target_label = r.target_label;
-            res[i].replacement = r.replacement;
-            res[i].separator = r.separator;
-            res[i].source_labels = r.source_labels;
-            res[i].regex = r.expr.str();
-            i++;
-        }
-        return res;
-    });
-
-    hm::set_metrics_config.set(r, [](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
-        rapidjson::Document doc;
-        doc.Parse(req->content.c_str());
-        if (!doc.IsArray()) {
-            throw bad_param_exception("Expected a json array");
-        }
-        std::vector<seastar::metrics::relabel_config> relabels;
-        relabels.resize(doc.Size());
-        for (rapidjson::SizeType i = 0; i < doc.Size(); i++) {
-            const auto& element = doc[i];
-            if (element.HasMember("source_labels")) {
-                std::vector<std::string> source_labels;
-                source_labels.resize(element["source_labels"].Size());
-
-                for (size_t j = 0; j < element["source_labels"].Size(); j++) {
-                    source_labels[j] = element["source_labels"][j].GetString();
-                }
-                relabels[i].source_labels = source_labels;
-            }
-            if (element.HasMember("action")) {
-                relabels[i].action = seastar::metrics::relabel_config_action(element["action"].GetString());
-            }
-            if (element.HasMember("replacement")) {
-                relabels[i].replacement = element["replacement"].GetString();
-            }
-            if (element.HasMember("separator")) {
-                relabels[i].separator = element["separator"].GetString();
-            }
-            if (element.HasMember("target_label")) {
-                relabels[i].target_label = element["target_label"].GetString();
-            }
-            if (element.HasMember("regex")) {
-                relabels[i].expr = element["regex"].GetString();
-            }
-        }
-        return do_with(std::move(relabels), false, [](const std::vector<seastar::metrics::relabel_config>& relabels, bool& failed) {
-            return smp::invoke_on_all([&relabels, &failed] {
-                return metrics::set_relabel_configs(relabels).then([&failed](const metrics::metric_relabeling_result& result) {
-                    if (result.metrics_relabeled_due_to_collision > 0) {
-                        failed = true;
-                    }
-                    return;
-                });
-            }).then([&failed](){
-                if (failed) {
-                    throw bad_param_exception("conflicts found during relabeling");
-                }
-                return make_ready_future<json::json_return_type>(seastar::json::json_void());
-            });
-        });
-    });
-
    hs::get_system_uptime.set(r, [](const_req req) {
        return std::chrono::duration_cast<std::chrono::milliseconds>(engine().uptime()).count();
    });
--- a/api/task_manager.cc
+++ b/api/task_manager.cc
@@ -44,7 +44,6 @@ struct task_stats {
        : task_id(task->id().to_sstring())
        , state(task->get_status().state)
        , type(task->type())
-        , scope(task->get_status().scope)
        , keyspace(task->get_status().keyspace)
        , table(task->get_status().table)
        , entity(task->get_status().entity)
@@ -54,7 +53,6 @@ struct task_stats {
    sstring task_id;
    tasks::task_manager::task_state state;
    std::string type;
-    std::string scope;
    std::string keyspace;
    std::string table;
    std::string entity;
@@ -71,7 +69,6 @@ tm::task_status make_status(full_task_status status) {
    tm::task_status res{};
    res.id = status.task_status.id.to_sstring();
    res.type = status.type;
-    res.scope = status.task_status.scope;
    res.state = status.task_status.state;
    res.is_abortable = bool(status.abortable);
    res.start_time = st;
@@ -111,23 +108,18 @@ future<full_task_status> retrieve_status(const tasks::task_manager::foreign_task
    co_return s;
 }

-void set_task_manager(http_context& ctx, routes& r, sharded<tasks::task_manager>& tm, db::config& cfg) {
-    tm::get_modules.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
-        std::vector<std::string> v = boost::copy_range<std::vector<std::string>>(tm.local().get_modules() | boost::adaptors::map_keys);
+void set_task_manager(http_context& ctx, routes& r, db::config& cfg) {
+    tm::get_modules.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
+        std::vector<std::string> v = boost::copy_range<std::vector<std::string>>(ctx.tm.local().get_modules() | boost::adaptors::map_keys);
        co_return v;
    });

-    tm::get_tasks.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
+    tm::get_tasks.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
        using chunked_stats = utils::chunked_vector<task_stats>;
        auto internal = tasks::is_internal{req_param<bool>(*req, "internal", false)};
-        std::vector<chunked_stats> res = co_await tm.map([&req, internal] (tasks::task_manager& tm) {
+        std::vector<chunked_stats> res = co_await ctx.tm.map([&req, internal] (tasks::task_manager& tm) {
            chunked_stats local_res;
-            tasks::task_manager::module_ptr module;
-            try {
-                module = tm.find_module(req->param["module"]);
-            } catch (...) {
-                throw bad_param_exception(fmt::format("{}", std::current_exception()));
-            }
+            auto module = tm.find_module(req->param["module"]);
            const auto& filtered_tasks = module->get_tasks() | boost::adaptors::filtered([&params = req->query_parameters, internal] (const auto& task) {
                return (internal || !task.second->is_internal()) && filter_tasks(task.second, params);
            });
@@ -156,76 +148,57 @@ void set_task_manager(http_context& ctx, routes& r, sharded<tasks::task_manager>
        co_return std::move(f);
    });

-    tm::get_task_status.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
+    tm::get_task_status.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
        auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
-        tasks::task_manager::foreign_task_ptr task;
-        try {
-            task = co_await tasks::task_manager::invoke_on_task(tm, id, std::function([] (tasks::task_manager::task_ptr task) -> future<tasks::task_manager::foreign_task_ptr> {
-                if (task->is_complete()) {
-                    task->unregister_task();
-                }
-                co_return std::move(task);
-            }));
-        } catch (tasks::task_manager::task_not_found& e) {
-            throw bad_param_exception(e.what());
-        }
+        auto task = co_await tasks::task_manager::invoke_on_task(ctx.tm, id, std::function([] (tasks::task_manager::task_ptr task) -> future<tasks::task_manager::foreign_task_ptr> {
+            auto state = task->get_status().state;
+            if (state == tasks::task_manager::task_state::done || state == tasks::task_manager::task_state::failed) {
+                task->unregister_task();
+            }
+            co_return std::move(task);
+        }));
        auto s = co_await retrieve_status(task);
        co_return make_status(s);
    });

-    tm::abort_task.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
+    tm::abort_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
        auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
-        try {
-            co_await tasks::task_manager::invoke_on_task(tm, id, [] (tasks::task_manager::task_ptr task) -> future<> {
-                if (!task->is_abortable()) {
-                    co_await coroutine::return_exception(std::runtime_error("Requested task cannot be aborted"));
-                }
-                co_await task->abort();
-            });
-        } catch (tasks::task_manager::task_not_found& e) {
-            throw bad_param_exception(e.what());
-        }
+        co_await tasks::task_manager::invoke_on_task(ctx.tm, id, [] (tasks::task_manager::task_ptr task) -> future<> {
+            if (!task->is_abortable()) {
+                co_await coroutine::return_exception(std::runtime_error("Requested task cannot be aborted"));
+            }
+            co_await task->abort();
+        });
        co_return json_void();
    });

-    tm::wait_task.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
+    tm::wait_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
        auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
-        tasks::task_manager::foreign_task_ptr task;
-        try {
-            task = co_await tasks::task_manager::invoke_on_task(tm, id, std::function([] (tasks::task_manager::task_ptr task) {
-                return task->done().then_wrapped([task] (auto f) {
-                    task->unregister_task();
-                    // done() is called only because we want the task to be complete before getting its status.
-                    // The future should be ignored here as the result does not matter.
-                    f.ignore_ready_future();
-                    return make_foreign(task);
-                });
-            }));
-        } catch (tasks::task_manager::task_not_found& e) {
-            throw bad_param_exception(e.what());
-        }
+        auto task = co_await tasks::task_manager::invoke_on_task(ctx.tm, id, std::function([] (tasks::task_manager::task_ptr task) {
+            return task->done().then_wrapped([task] (auto f) {
+                task->unregister_task();
+                f.get();
+                return make_foreign(task);
+            });
+        }));
        auto s = co_await retrieve_status(task);
        co_return make_status(s);
    });

-    tm::get_task_status_recursively.set(r, [&_tm = tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
-        auto& tm = _tm;
+    tm::get_task_status_recursively.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
+        auto& _ctx = ctx;
        auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
        std::queue<tasks::task_manager::foreign_task_ptr> q;
        utils::chunked_vector<full_task_status> res;

-        tasks::task_manager::foreign_task_ptr task;
-        try {
-            // Get requested task.
-            task = co_await tasks::task_manager::invoke_on_task(tm, id, std::function([] (tasks::task_manager::task_ptr task) -> future<tasks::task_manager::foreign_task_ptr> {
-                if (task->is_complete()) {
-                    task->unregister_task();
-                }
-                co_return task;
-            }));
-        } catch (tasks::task_manager::task_not_found& e) {
-            throw bad_param_exception(e.what());
-        }
+        // Get requested task.
+        auto task = co_await tasks::task_manager::invoke_on_task(_ctx.tm, id, std::function([] (tasks::task_manager::task_ptr task) -> future<tasks::task_manager::foreign_task_ptr> {
+            auto state = task->get_status().state;
+            if (state == tasks::task_manager::task_state::done || state == tasks::task_manager::task_state::failed) {
+                task->unregister_task();
+            }
+            co_return task;
+        }));

        // Push children's statuses in BFS order.
        q.push(co_await task.copy());   // Task cannot be moved since we need it to be alive during whole loop execution.
@@ -255,23 +228,9 @@ void set_task_manager(http_context& ctx, routes& r, sharded<tasks::task_manager>

    tm::get_and_update_ttl.set(r, [&cfg] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
        uint32_t ttl = cfg.task_ttl_seconds();
-        try {
-            co_await cfg.task_ttl_seconds.set_value_on_all_shards(req->query_parameters["ttl"], utils::config_file::config_source::API);
-        } catch (...) {
-            throw bad_param_exception(fmt::format("{}", std::current_exception()));
-        }
+        co_await cfg.task_ttl_seconds.set_value_on_all_shards(req->query_parameters["ttl"], utils::config_file::config_source::API);
        co_return json::json_return_type(ttl);
    });
 }

-void unset_task_manager(http_context& ctx, routes& r) {
-    tm::get_modules.unset(r);
-    tm::get_tasks.unset(r);
-    tm::get_task_status.unset(r);
-    tm::abort_task.unset(r);
-    tm::wait_task.unset(r);
-    tm::get_task_status_recursively.unset(r);
-    tm::get_and_update_ttl.unset(r);
-}
-
 }
--- a/api/task_manager.hh
+++ b/api/task_manager.hh
@@ -8,17 +8,11 @@

 #pragma once

-#include <seastar/core/sharded.hh>
 #include "api.hh"
 #include "db/config.hh"

-namespace tasks {
-    class task_manager;
-}
-
 namespace api {

-void set_task_manager(http_context& ctx, httpd::routes& r, sharded<tasks::task_manager>& tm, db::config& cfg);
-void unset_task_manager(http_context& ctx, httpd::routes& r);
+void set_task_manager(http_context& ctx, httpd::routes& r, db::config& cfg);

 }
--- a/api/task_manager_test.cc
+++ b/api/task_manager_test.cc
@@ -20,17 +20,17 @@ namespace tmt = httpd::task_manager_test_json;
 using namespace json;
 using namespace seastar::httpd;

-void set_task_manager_test(http_context& ctx, routes& r, sharded<tasks::task_manager>& tm) {
-    tmt::register_test_module.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
-        co_await tm.invoke_on_all([] (tasks::task_manager& tm) {
+void set_task_manager_test(http_context& ctx, routes& r) {
+    tmt::register_test_module.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
+        co_await ctx.tm.invoke_on_all([] (tasks::task_manager& tm) {
            auto m = make_shared<tasks::test_module>(tm);
            tm.register_module("test", m);
        });
        co_return json_void();
    });

-    tmt::unregister_test_module.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
-        co_await tm.invoke_on_all([] (tasks::task_manager& tm) -> future<> {
+    tmt::unregister_test_module.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
+        co_await ctx.tm.invoke_on_all([] (tasks::task_manager& tm) -> future<> {
            auto module_name = "test";
            auto module = tm.find_module(module_name);
            co_await module->stop();
@@ -38,8 +38,8 @@ void set_task_manager_test(http_context& ctx, routes& r, sharded<tasks::task_man
        co_return json_void();
    });

-    tmt::register_test_task.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
-        sharded<tasks::task_manager>& tms = tm;
+    tmt::register_test_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
+        sharded<tasks::task_manager>& tms = ctx.tm;
        auto it = req->query_parameters.find("task_id");
        auto id = it != req->query_parameters.end() ? tasks::task_id{utils::UUID{it->second}} : tasks::task_id::create_null_id();
        it = req->query_parameters.find("shard");
@@ -54,7 +54,7 @@ void set_task_manager_test(http_context& ctx, routes& r, sharded<tasks::task_man
        tasks::task_info data;
        if (it != req->query_parameters.end()) {
            data.id = tasks::task_id{utils::UUID{it->second}};
-            auto parent_ptr = co_await tasks::task_manager::lookup_task_on_all_shards(tm, data.id);
+            auto parent_ptr = co_await tasks::task_manager::lookup_task_on_all_shards(ctx.tm, data.id);
            data.shard = parent_ptr->get_status().shard;
        }

@@ -69,50 +69,34 @@ void set_task_manager_test(http_context& ctx, routes& r, sharded<tasks::task_man
        co_return id.to_sstring();
    });

-    tmt::unregister_test_task.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
+    tmt::unregister_test_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
        auto id = tasks::task_id{utils::UUID{req->query_parameters["task_id"]}};
-        try {
-            co_await tasks::task_manager::invoke_on_task(tm, id, [] (tasks::task_manager::task_ptr task) -> future<> {
-                tasks::test_task test_task{task};
-                co_await test_task.unregister_task();
-            });
-        } catch (tasks::task_manager::task_not_found& e) {
-            throw bad_param_exception(e.what());
-        }
+        co_await tasks::task_manager::invoke_on_task(ctx.tm, id, [] (tasks::task_manager::task_ptr task) -> future<> {
+            tasks::test_task test_task{task};
+            co_await test_task.unregister_task();
+        });
        co_return json_void();
    });

-    tmt::finish_test_task.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
+    tmt::finish_test_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
        auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
        auto it = req->query_parameters.find("error");
        bool fail = it != req->query_parameters.end();
        std::string error = fail ? it->second : "";

-        try {
-            co_await tasks::task_manager::invoke_on_task(tm, id, [fail, error = std::move(error)] (tasks::task_manager::task_ptr task) {
-                tasks::test_task test_task{task};
-                if (fail) {
-                    test_task.finish_failed(std::make_exception_ptr(std::runtime_error(error)));
-                } else {
-                    test_task.finish();
-                }
-                return make_ready_future<>();
-            });
-        } catch (tasks::task_manager::task_not_found& e) {
-            throw bad_param_exception(e.what());
-        }
+        co_await tasks::task_manager::invoke_on_task(ctx.tm, id, [fail, error = std::move(error)] (tasks::task_manager::task_ptr task) {
+            tasks::test_task test_task{task};
+            if (fail) {
+                test_task.finish_failed(std::make_exception_ptr(std::runtime_error(error)));
+            } else {
+                test_task.finish();
+            }
+            return make_ready_future<>();
+        });
        co_return json_void();
    });
 }

-void unset_task_manager_test(http_context& ctx, routes& r) {
-    tmt::register_test_module.unset(r);
-    tmt::unregister_test_module.unset(r);
-    tmt::register_test_task.unset(r);
-    tmt::unregister_test_task.unset(r);
-    tmt::finish_test_task.unset(r);
-}
-
 }

 #endif
--- a/api/task_manager_test.hh
+++ b/api/task_manager_test.hh
@@ -10,17 +10,11 @@

 #pragma once

-#include <seastar/core/sharded.hh>
 #include "api.hh"

-namespace tasks {
-class task_manager;
-}
-
 namespace api {

-void set_task_manager_test(http_context& ctx, httpd::routes& r, sharded<tasks::task_manager>& tm);
-void unset_task_manager_test(http_context& ctx, httpd::routes& r);
+void set_task_manager_test(http_context& ctx, httpd::routes& r);

 }

--- a/auth/CMakeLists.txt
+++ b/auth/CMakeLists.txt
@@ -7,7 +7,6 @@ target_sources(scylla_auth
    allow_all_authorizer.cc
    authenticated_user.cc
    authenticator.cc
-    certificate_authenticator.cc
    common.cc
    default_authorizer.cc
    password_authenticator.cc
@@ -31,7 +30,6 @@ target_link_libraries(scylla_auth
  PRIVATE
    cql3
    idl
-    wasmtime_bindings
-    libxcrypt::libxcrypt)
+    wasmtime_bindings)

 add_whole_archive(auth scylla_auth)
--- a/auth/authenticated_user.hh
+++ b/auth/authenticated_user.hh
@@ -35,9 +35,16 @@ public:
    ///
    authenticated_user() = default;
    explicit authenticated_user(std::string_view name);
-    friend bool operator==(const authenticated_user&, const authenticated_user&) noexcept = default;
 };

+inline bool operator==(const authenticated_user& u1, const authenticated_user& u2) noexcept {
+    return u1.name == u2.name;
+}
+
+inline bool operator!=(const authenticated_user& u1, const authenticated_user& u2) noexcept {
+    return !(u1 == u2);
+}
+
 const authenticated_user& anonymous_user() noexcept;

 inline bool is_anonymous(const authenticated_user& u) noexcept {
--- a/auth/authenticator.cc
+++ b/auth/authenticator.cc
@@ -18,7 +18,3 @@

 const sstring auth::authenticator::USERNAME_KEY("username");
 const sstring auth::authenticator::PASSWORD_KEY("password");
-
-future<std::optional<auth::authenticated_user>> auth::authenticator::authenticate(session_dn_func) const {
-    return make_ready_future<std::optional<auth::authenticated_user>>(std::nullopt);
-}
--- a/auth/authenticator.hh
+++ b/auth/authenticator.hh
@@ -15,8 +15,6 @@
 #include <set>
 #include <stdexcept>
 #include <unordered_map>
-#include <optional>
-#include <functional>

 #include <seastar/core/enum.hh>
 #include <seastar/core/future.hh>
@@ -38,16 +36,6 @@ namespace auth {

 class authenticated_user;

-// Query alt name info as a single (subject style) string
-using alt_name_func = std::function<future<std::string>()>;
-
-struct certificate_info {
-    std::string subject;
-    alt_name_func get_alt_names;
-};
-
-using session_dn_func = std::function<future<std::optional<certificate_info>>()>;
-
 ///
 /// Abstract client for authenticating role identity.
 ///
@@ -99,13 +87,6 @@ public:
    ///
    virtual future<authenticated_user> authenticate(const credentials_map& credentials) const = 0;

-    ///
-    /// Authenticate (early) using transport info
-    ///
-    /// \returns nullopt if not supported/required. exceptional future if failed
-    ///
-    virtual future<std::optional<authenticated_user>> authenticate(session_dn_func) const;
-
    ///
    /// Create an authentication record for a new user. This is required before the user can log-in.
    ///
--- a/auth/authorizer.hh
+++ b/auth/authorizer.hh
@@ -39,6 +39,10 @@ inline bool operator==(const permission_details& pd1, const permission_details&
            == std::forward_as_tuple(pd2.role_name, pd2.resource, pd2.permissions.mask());
 }

+inline bool operator!=(const permission_details& pd1, const permission_details& pd2) {
+    return !(pd1 == pd2);
+}
+
 inline bool operator<(const permission_details& pd1, const permission_details& pd2) {
    return std::forward_as_tuple(pd1.role_name, pd1.resource, pd1.permissions)
            < std::forward_as_tuple(pd2.role_name, pd2.resource, pd2.permissions);
--- a/auth/certificate_authenticator.cc
+++ b/auth/certificate_authenticator.cc
@@ -1,181 +0,0 @@
-/*
- * Copyright (C) 2022-present ScyllaDB
- *
- */
-
-/*
- * SPDX-License-Identifier: AGPL-3.0-or-later
- */
-
-#include "auth/certificate_authenticator.hh"
-
-#include <regex>
-
-#include "utils/class_registrator.hh"
-#include "data_dictionary/data_dictionary.hh"
-#include "cql3/query_processor.hh"
-#include "db/config.hh"
-
-static const auto CERT_AUTH_NAME = "com.scylladb.auth.CertificateAuthenticator";
-const std::string_view auth::certificate_authenticator_name(CERT_AUTH_NAME);
-
-static logging::logger clogger("certificate_authenticator");
-
-static const std::string cfg_source_attr = "source";
-static const std::string cfg_query_attr = "query";
-
-static const std::string cfg_source_subject = "SUBJECT";
-static const std::string cfg_source_altname = "ALTNAME";
-
-static const class_registrator<auth::authenticator
-    , auth::certificate_authenticator
-    , cql3::query_processor&
-    , ::service::migration_manager&> cert_auth_reg(CERT_AUTH_NAME);
-
-enum class auth::certificate_authenticator::query_source {
-    subject, altname
-};
-
-auth::certificate_authenticator::certificate_authenticator(cql3::query_processor& qp, ::service::migration_manager&)
-    : _queries([&] {
-        auto& conf = qp.db().get_config();
-        auto queries = conf.auth_certificate_role_queries();
-
-        if (queries.empty()) {
-            throw std::invalid_argument("No role extraction queries specified.");
-        }
-
-        std::vector<std::pair<query_source, boost::regex>> res;
-
-        for (auto& map : queries) {
-            // first, check for any invalid config keys
-            if (map.size() == 2) {
-                try {
-                    auto& source = map.at(cfg_source_attr);
-                    std::string query = map.at(cfg_query_attr);
-
-                    std::transform(source.begin(), source.end(), source.begin(), ::toupper);
-
-                    boost::regex ex(query);
-                    if (ex.mark_count() != 1) {
-                        throw std::invalid_argument("Role query must have exactly one mark expression");
-                    }
-
-                    clogger.debug("Append role query: {} : {}", source, query);
-
-                    if (source == cfg_source_subject) {
-                        res.emplace_back(query_source::subject, std::move(ex));
-                    } else if (source == cfg_source_altname) {
-                        res.emplace_back(query_source::altname, std::move(ex));
-                    } else {
-                        throw std::invalid_argument(fmt::format("Invalid source: {}", map.at(cfg_source_attr)));
-                    }
-                    continue;
-                } catch (std::out_of_range&) {
-                    // just fallthrough
-                } catch (std::regex_error&) {
-                    std::throw_with_nested(std::invalid_argument(fmt::format("Invalid query expression: {}", map.at(cfg_query_attr))));
-                }
-            }
-            throw std::invalid_argument(fmt::format("Invalid query: {}", map));
-        }
-        return res;
-    }())
-{}
-
-auth::certificate_authenticator::~certificate_authenticator() = default;
-
-future<> auth::certificate_authenticator::start() {
-    co_return;
-}
-
-future<> auth::certificate_authenticator::stop() {
-    co_return;
-}
-
-std::string_view auth::certificate_authenticator::qualified_java_name() const {
-    return certificate_authenticator_name;
-}
-
-bool auth::certificate_authenticator::require_authentication() const {
-    return true;
-}
-
-auth::authentication_option_set auth::certificate_authenticator::supported_options() const {
-    return {};
-}
-
-auth::authentication_option_set auth::certificate_authenticator::alterable_options() const {
-    return {};
-}
-
-future<std::optional<auth::authenticated_user>> auth::certificate_authenticator::authenticate(session_dn_func f) const {
-    if (!f) {
-        co_return std::nullopt;
-    }
-    auto dninfo = co_await f();
-    if (!dninfo) {
-        throw exceptions::authentication_exception("No valid certificate found");
-    }
-
-    auto& subject = dninfo->subject;
-    std::optional<std::string> altname ;
-
-    const std::string* source_str = nullptr;
-
-    for (auto& [source, expr] : _queries) {
-        switch (source) {
-            default:
-            case query_source::subject:
-                source_str = &subject;
-                break;
-            case query_source::altname:
-                if (!altname) {
-                    altname = dninfo->get_alt_names ? co_await dninfo->get_alt_names() : std::string{};
-                }
-                source_str = &*altname;
-                break;
-        }
-
-        clogger.debug("Checking {}: {}", int(source), *source_str);
-
-        boost::smatch m;
-        if (boost::regex_search(*source_str, m, expr)) {
-            auto username = m[1].str();
-            clogger.debug("Return username: {}", username);
-            co_return username;
-        }
-    }
-    throw exceptions::authentication_exception(format("Subject '{}'/'{}' does not match any query expression", subject, altname));
-}
-
-
-future<auth::authenticated_user> auth::certificate_authenticator::authenticate(const credentials_map&) const {
-    throw exceptions::authentication_exception("Cannot authenticate using attribute map");
-}
-
-future<> auth::certificate_authenticator::create(std::string_view role_name, const authentication_options& options) const {
-    // TODO: should we keep track of roles/enforce existence? Role manager should deal with this...
-    co_return;
-}
-
-future<> auth::certificate_authenticator::alter(std::string_view role_name, const authentication_options& options) const {
-    co_return;
-}
-
-future<> auth::certificate_authenticator::drop(std::string_view role_name) const {
-    co_return;
-}
-
-future<auth::custom_options> auth::certificate_authenticator::query_custom_options(std::string_view) const {
-    co_return auth::custom_options{};
-}
-
-const auth::resource_set& auth::certificate_authenticator::protected_resources() const {
-    static const resource_set resources;
-    return resources;
-}
-
-::shared_ptr<auth::sasl_challenge> auth::certificate_authenticator::new_sasl_challenge() const {
-    throw exceptions::authentication_exception("Login authentication not supported");
-}
--- a/auth/certificate_authenticator.hh
+++ b/auth/certificate_authenticator.hh
@@ -1,62 +0,0 @@
-/*
- * Copyright (C) 2022-present ScyllaDB
- *
- */
-
-/*
- * SPDX-License-Identifier: AGPL-3.0-or-later
- */
-
-#pragma once
-
-#include <boost/regex.hpp>
-#include "auth/authenticator.hh"
-
-namespace cql3 {
-
-class query_processor;
-
-} // namespace cql3
-
-namespace service {
-class migration_manager;
-}
-
-namespace auth {
-
-extern const std::string_view certificate_authenticator_name;
-
-class certificate_authenticator : public authenticator {
-    enum class query_source;
-    std::vector<std::pair<query_source, boost::regex>> _queries;
-public:
-    certificate_authenticator(cql3::query_processor&, ::service::migration_manager&);
-    ~certificate_authenticator();
-
-    future<> start() override;
-    future<> stop() override;
-
-    std::string_view qualified_java_name() const override;
-
-    bool require_authentication() const override;
-
-    authentication_option_set supported_options() const override;
-    authentication_option_set alterable_options() const override;
-
-    future<authenticated_user> authenticate(const credentials_map& credentials) const override;
-    future<std::optional<authenticated_user>> authenticate(session_dn_func) const override;
-
-    future<> create(std::string_view role_name, const authentication_options& options) const override;
-    future<> alter(std::string_view role_name, const authentication_options& options) const override;
-    future<> drop(std::string_view role_name) const override;
-
-    future<custom_options> query_custom_options(std::string_view role_name) const override;
-
-    const resource_set& protected_resources() const override;
-
-    ::shared_ptr<sasl_challenge> new_sasl_challenge() const override;
-private:
-};
-
-}
-
--- a/auth/common.cc
+++ b/auth/common.cc
@@ -71,8 +71,7 @@ static future<> create_metadata_table_if_missing_impl(
        auto group0_guard = co_await mm.start_group0_operation();
        auto ts = group0_guard.write_timestamp();
        try {
-            co_return co_await mm.announce(co_await ::service::prepare_new_column_family_announcement(qp.proxy(), table, ts),
-                    std::move(group0_guard), format("auth: create {} metadata table", table->cf_name()));
+            co_return co_await mm.announce(co_await mm.prepare_new_column_family_announcement(table, ts), std::move(group0_guard));
        } catch (exceptions::already_exists_exception&) {}
    }
 }
@@ -85,6 +84,20 @@ future<> create_metadata_table_if_missing(
    return futurize_invoke(create_metadata_table_if_missing_impl, table_name, qp, cql, mm);
 }

+future<> wait_for_schema_agreement(::service::migration_manager& mm, const replica::database& db, seastar::abort_source& as) {
+    static const auto pause = [] { return sleep(std::chrono::milliseconds(500)); };
+
+    return do_until([&db, &as] {
+        as.check();
+        return db.get_version() != replica::database::empty_version;
+    }, pause).then([&mm, &as] {
+        return do_until([&mm, &as] {
+            as.check();
+            return mm.have_schema_agreement();
+        }, pause);
+    });
+}
+
 ::service::query_state& internal_distributed_query_state() noexcept {
 #ifdef DEBUG
    // Give the much slower debug tests more headroom for completing auth queries.
--- a/auth/common.hh
+++ b/auth/common.hh
@@ -22,6 +22,7 @@
 #include "log.hh"
 #include "seastarx.hh"
 #include "utils/exponential_backoff_retry.hh"
+#include "service/query_state.hh"

 using namespace std::chrono_literals;

@@ -31,7 +32,6 @@ class database;

 namespace service {
 class migration_manager;
-class query_state;
 }

 namespace cql3 {
@@ -67,6 +67,8 @@ future<> create_metadata_table_if_missing(
        std::string_view cql,
        ::service::migration_manager&) noexcept;

+future<> wait_for_schema_agreement(::service::migration_manager&, const replica::database&, seastar::abort_source&);
+
 ///
 /// Time-outs for internal, non-local CQL queries.
 ///
--- a/auth/default_authorizer.cc
+++ b/auth/default_authorizer.cc
@@ -129,7 +129,7 @@ future<> default_authorizer::start() {
                _migration_manager).then([this] {
            _finished = do_after_system_ready(_as, [this] {
                return async([this] {
-                    _migration_manager.wait_for_schema_agreement(_qp.db().real_database(), db::timeout_clock::time_point::max(), &_as).get0();
+                    wait_for_schema_agreement(_migration_manager, _qp.db().real_database(), _as).get0();

                    if (legacy_metadata_exists()) {
                        if (!any_granted().get0()) {
--- a/auth/password_authenticator.cc
+++ b/auth/password_authenticator.cc
@@ -29,7 +29,6 @@
 #include "utils/class_registrator.hh"
 #include "replica/database.hh"
 #include "cql3/query_processor.hh"
-#include "db/config.hh"

 namespace auth {

@@ -51,23 +50,14 @@ static const class_registrator<

 static thread_local auto rng_for_salt = std::default_random_engine(std::random_device{}());

-static std::string_view get_config_value(std::string_view value, std::string_view def) {
-    return value.empty() ? def : value;
-}
-
-std::string password_authenticator::default_superuser(const db::config& cfg) {
-    return std::string(get_config_value(cfg.auth_superuser_name(), DEFAULT_USER_NAME));
-}
-
 password_authenticator::~password_authenticator() {
 }

 password_authenticator::password_authenticator(cql3::query_processor& qp, ::service::migration_manager& mm)
    : _qp(qp)
    , _migration_manager(mm)
-    , _stopped(make_ready_future<>()) 
-    , _superuser(default_superuser(qp.db().get_config()))
-{}
+    , _stopped(make_ready_future<>()) {
+}

 static bool has_salted_hash(const cql3::untyped_result_set_row& row) {
    return !row.get_or<sstring>(SALTED_HASH, "").empty();
@@ -116,17 +106,13 @@ future<> password_authenticator::migrate_legacy_metadata() const {
 }

 future<> password_authenticator::create_default_if_missing() const {
-    return default_role_row_satisfies(_qp, &has_salted_hash, _superuser).then([this](bool exists) {
+    return default_role_row_satisfies(_qp, &has_salted_hash).then([this](bool exists) {
        if (!exists) {
-            std::string salted_pwd(get_config_value(_qp.db().get_config().auth_superuser_salted_password(), ""));
-            if (salted_pwd.empty()) {
-                salted_pwd = passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt);
-            }
            return _qp.execute_internal(
                    update_row_query(),
                    db::consistency_level::QUORUM,
                    internal_distributed_query_state(),
-                    {salted_pwd, _superuser},
+                    {passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt), DEFAULT_USER_NAME},
                    cql3::query_processor::cache_internal::no).then([](auto&&) {
                plogger.info("Created default superuser authentication record.");
            });
@@ -146,9 +132,9 @@ future<> password_authenticator::start() {

         _stopped = do_after_system_ready(_as, [this] {
             return async([this] {
-                 _migration_manager.wait_for_schema_agreement(_qp.db().real_database(), db::timeout_clock::time_point::max(), &_as).get0();
+                 wait_for_schema_agreement(_migration_manager, _qp.db().real_database(), _as).get0();

-                 if (any_nondefault_role_row_satisfies(_qp, &has_salted_hash, _superuser).get0()) {
+                 if (any_nondefault_role_row_satisfies(_qp, &has_salted_hash).get0()) {
                     if (legacy_metadata_exists()) {
                         plogger.warn("Ignoring legacy authentication metadata since nondefault data already exist.");
                     }
@@ -175,8 +161,6 @@ future<> password_authenticator::stop() {
 }

 db::consistency_level password_authenticator::consistency_for_user(std::string_view role_name) {
-    // TODO: this is plain dung. Why treat hardcoded default special, but for example a user-created
-    // super user uses plain LOCAL_ONE?
    if (role_name == DEFAULT_USER_NAME) {
        return db::consistency_level::QUORUM;
    }
--- a/auth/password_authenticator.hh
+++ b/auth/password_authenticator.hh
@@ -14,10 +14,6 @@

 #include "auth/authenticator.hh"

-namespace db {
-    class config;
-}
-
 namespace cql3 {

 class query_processor;
@@ -37,11 +33,9 @@ class password_authenticator : public authenticator {
    ::service::migration_manager& _migration_manager;
    future<> _stopped;
    seastar::abort_source _as;
-    std::string _superuser;

 public:
    static db::consistency_level consistency_for_user(std::string_view role_name);
-    static std::string default_superuser(const db::config&);

    password_authenticator(cql3::query_processor&, ::service::migration_manager&);

--- a/auth/resource.cc
+++ b/auth/resource.cc
@@ -79,13 +79,6 @@ static permission_set applicable_permissions(const service_level_resource_view &
 }

 static permission_set applicable_permissions(const functions_resource_view& fv) {
-    if (fv.function_name() || fv.function_signature()) {
-        return permission_set::of<
-                permission::ALTER,
-                permission::DROP,
-                permission::AUTHORIZE,
-                permission::EXECUTE>();
-    }
    return permission_set::of<
            permission::CREATE,
            permission::ALTER,
@@ -299,7 +292,7 @@ std::optional<std::vector<std::string_view>> functions_resource_view::function_a

    std::vector<std::string_view> parts;
    if (_resource._parts[3] == "") {
-        return parts;
+        return {};
    }
    for (size_t i = 3; i < _resource._parts.size(); i++) {
        parts.push_back(_resource._parts[i]);
--- a/auth/resource.hh
+++ b/auth/resource.hh
@@ -117,12 +117,20 @@ private:
    friend class functions_resource_view;

    friend bool operator<(const resource&, const resource&);
-    friend bool operator==(const resource&, const resource&) = default;
+    friend bool operator==(const resource&, const resource&);
    friend resource parse_resource(std::string_view);
 };

 bool operator<(const resource&, const resource&);

+inline bool operator==(const resource& r1, const resource& r2) {
+    return (r1._kind == r2._kind) && (r1._parts == r2._parts);
+}
+
+inline bool operator!=(const resource& r1, const resource& r2) {
+    return !(r1 == r2);
+}
+
 std::ostream& operator<<(std::ostream&, const resource&);

 class resource_kind_mismatch : public std::invalid_argument {
--- a/auth/role_or_anonymous.cc
+++ b/auth/role_or_anonymous.cc
@@ -17,6 +17,10 @@ std::ostream& operator<<(std::ostream& os, const role_or_anonymous& mr) {
    return os;
 }

+bool operator==(const role_or_anonymous& mr1, const role_or_anonymous& mr2) noexcept {
+    return mr1.name == mr2.name;
+}
+
 bool is_anonymous(const role_or_anonymous& mr) noexcept {
    return !mr.name.has_value();
 }
--- a/auth/role_or_anonymous.hh
+++ b/auth/role_or_anonymous.hh
@@ -26,11 +26,16 @@ public:
    role_or_anonymous() = default;
    role_or_anonymous(std::string_view name) : name(name) {
    }
-    friend bool operator==(const role_or_anonymous&, const role_or_anonymous&) noexcept = default;
 };

 std::ostream& operator<<(std::ostream&, const role_or_anonymous&);

+bool operator==(const role_or_anonymous&, const role_or_anonymous&) noexcept;
+
+inline bool operator!=(const role_or_anonymous& mr1, const role_or_anonymous& mr2) noexcept {
+    return !(mr1 == mr2);
+}
+
 bool is_anonymous(const role_or_anonymous&) noexcept;

 }
--- a/auth/roles-metadata.cc
+++ b/auth/roles-metadata.cc
@@ -46,43 +46,59 @@ constexpr std::string_view qualified_name("system_auth.roles");

 future<bool> default_role_row_satisfies(
        cql3::query_processor& qp,
-        std::function<bool(const cql3::untyped_result_set_row&)> p,
-        std::optional<std::string> rolename) {
+        std::function<bool(const cql3::untyped_result_set_row&)> p) {
    static const sstring query = format("SELECT * FROM {} WHERE {} = ?",
            meta::roles_table::qualified_name,
            meta::roles_table::role_col_name);

-    for (auto cl : { db::consistency_level::ONE, db::consistency_level::QUORUM }) {
-        auto results = co_await qp.execute_internal(query, cl
-            , internal_distributed_query_state()
-            , {rolename.value_or(std::string(meta::DEFAULT_SUPERUSER_NAME))}
-            , cql3::query_processor::cache_internal::yes
-            );
-        if (!results->empty()) {
-            co_return p(results->one());
-        }
-    }
-    co_return false;
+    return do_with(std::move(p), [&qp](const auto& p) {
+        return qp.execute_internal(
+                query,
+                db::consistency_level::ONE,
+                {meta::DEFAULT_SUPERUSER_NAME},
+                cql3::query_processor::cache_internal::yes).then([&qp, &p](::shared_ptr<cql3::untyped_result_set> results) {
+            if (results->empty()) {
+                return qp.execute_internal(
+                        query,
+                        db::consistency_level::QUORUM,
+                        internal_distributed_query_state(),
+                        {meta::DEFAULT_SUPERUSER_NAME},
+                        cql3::query_processor::cache_internal::yes).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
+                    if (results->empty()) {
+                        return make_ready_future<bool>(false);
+                    }
+
+                    return make_ready_future<bool>(p(results->one()));
+                });
+            }
+
+            return make_ready_future<bool>(p(results->one()));
+        });
+    });
 }

 future<bool> any_nondefault_role_row_satisfies(
        cql3::query_processor& qp,
-        std::function<bool(const cql3::untyped_result_set_row&)> p,
-        std::optional<std::string> rolename) {
+        std::function<bool(const cql3::untyped_result_set_row&)> p) {
    static const sstring query = format("SELECT * FROM {}", meta::roles_table::qualified_name);

-    auto results = co_await qp.execute_internal(query, db::consistency_level::QUORUM
-        , internal_distributed_query_state(), cql3::query_processor::cache_internal::no
-        );
-    if (results->empty()) {
-        co_return false;
-    }
-    static const sstring col_name = sstring(meta::roles_table::role_col_name);
+    return do_with(std::move(p), [&qp](const auto& p) {
+        return qp.execute_internal(
+                query,
+                db::consistency_level::QUORUM,
+                internal_distributed_query_state(),
+                cql3::query_processor::cache_internal::no).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
+            if (results->empty()) {
+                return false;
+            }

-    co_return boost::algorithm::any_of(*results, [&](const cql3::untyped_result_set_row& row) {
-        auto superuser = rolename ? std::string_view(*rolename) : meta::DEFAULT_SUPERUSER_NAME;
-        const bool is_nondefault = row.get_as<sstring>(col_name) != superuser;
-        return is_nondefault && p(row);
+            static const sstring col_name = sstring(meta::roles_table::role_col_name);
+
+            return boost::algorithm::any_of(*results, [&p](const cql3::untyped_result_set_row& row) {
+                const bool is_nondefault = row.get_as<sstring>(col_name) != meta::DEFAULT_SUPERUSER_NAME;
+                return is_nondefault && p(row);
+            });
+        });
    });
 }

--- a/auth/roles-metadata.hh
+++ b/auth/roles-metadata.hh
@@ -43,17 +43,13 @@ constexpr std::string_view role_col_name{"role", 4};
 ///
 future<bool> default_role_row_satisfies(
        cql3::query_processor&,
-        std::function<bool(const cql3::untyped_result_set_row&)>,
-        std::optional<std::string> rolename = {}
-        );
+        std::function<bool(const cql3::untyped_result_set_row&)>);

 ///
 /// Check that any nondefault role satisfies a predicate. `false` if no nondefault roles exist.
 ///
 future<bool> any_nondefault_role_row_satisfies(
        cql3::query_processor&,
-        std::function<bool(const cql3::untyped_result_set_row&)>,
-        std::optional<std::string> rolename = {}
-        );
+        std::function<bool(const cql3::untyped_result_set_row&)>);

 }
--- a/auth/service.cc
+++ b/auth/service.cc
@@ -7,7 +7,6 @@
 */

 #include <seastar/core/coroutine.hh>
-#include "auth/resource.hh"
 #include "auth/service.hh"

 #include <algorithm>
@@ -21,7 +20,6 @@
 #include "auth/allow_all_authorizer.hh"
 #include "auth/common.hh"
 #include "auth/role_or_anonymous.hh"
-#include "cql3/functions/function_name.hh"
 #include "cql3/functions/functions.hh"
 #include "cql3/query_processor.hh"
 #include "cql3/untyped_result_set.hh"
@@ -68,7 +66,6 @@ private:
    void on_update_function(const sstring& ks_name, const sstring& function_name) override {}
    void on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
    void on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed) override {}
-    void on_update_tablet_metadata() override {}

    void on_drop_keyspace(const sstring& ks_name) override {
        // Do it in the background.
@@ -78,12 +75,6 @@ private:
        }).handle_exception([] (std::exception_ptr e) {
            log.error("Unexpected exception while revoking all permissions on dropped keyspace: {}", e);
        });
-        (void)_authorizer.revoke_all(
-            auth::make_functions_resource(ks_name)).handle_exception_type([](const unsupported_authorization_operation&) {
-            // Nothing.
-        }).handle_exception([] (std::exception_ptr e) {
-            log.error("Unexpected exception while revoking all permissions on functions in dropped keyspace: {}", e);
-        });
    }

    void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) override {
@@ -98,22 +89,8 @@ private:
    }

    void on_drop_user_type(const sstring& ks_name, const sstring& type_name) override {}
-    void on_drop_function(const sstring& ks_name, const sstring& function_name) override {
-        (void)_authorizer.revoke_all(
-            auth::make_functions_resource(ks_name, function_name)).handle_exception_type([](const unsupported_authorization_operation&) {
-            // Nothing.
-        }).handle_exception([] (std::exception_ptr e) {
-            log.error("Unexpected exception while revoking all permissions on dropped function: {}", e);
-        });
-    }
-    void on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {
-        (void)_authorizer.revoke_all(
-            auth::make_functions_resource(ks_name, aggregate_name)).handle_exception_type([](const unsupported_authorization_operation&) {
-            // Nothing.
-        }).handle_exception([] (std::exception_ptr e) {
-            log.error("Unexpected exception while revoking all permissions on dropped aggregate: {}", e);
-        });
-    }
+    void on_drop_function(const sstring& ks_name, const sstring& function_name) override {}
+    void on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
    void on_drop_view(const sstring& ks_name, const sstring& view_name) override {}
 };

@@ -178,8 +155,7 @@ future<> service::create_keyspace_if_missing(::service::migration_manager& mm) c
                    opts,
                    true);

-            co_return co_await mm.announce(::service::prepare_new_keyspace_announcement(db.real_database(), ksm, ts),
-                    std::move(group0_guard), format("auth_service: create {} keyspace", meta::AUTH_KS));
+            co_return co_await mm.announce(mm.prepare_new_keyspace_announcement(ksm, ts), std::move(group0_guard));
        }
    }
 }
--- a/auth/standard_role_manager.cc
+++ b/auth/standard_role_manager.cc
@@ -28,8 +28,6 @@
 #include "log.hh"
 #include "utils/class_registrator.hh"
 #include "replica/database.hh"
-#include "service/migration_manager.hh"
-#include "password_authenticator.hh"

 namespace auth {

@@ -129,13 +127,6 @@ static bool has_can_login(const cql3::untyped_result_set_row& row) {
    return row.has("can_login") && !(boolean_type->deserialize(row.get_blob("can_login")).is_null());
 }

-standard_role_manager::standard_role_manager(cql3::query_processor& qp, ::service::migration_manager& mm)
-    : _qp(qp)
-    , _migration_manager(mm)
-    , _stopped(make_ready_future<>())
-    , _superuser(password_authenticator::default_superuser(qp.db().get_config()))
-{}
-
 std::string_view standard_role_manager::qualified_java_name() const noexcept {
    return "org.apache.cassandra.auth.CassandraRoleManager";
 }
@@ -177,7 +168,7 @@ future<> standard_role_manager::create_metadata_tables_if_missing() const {
 }

 future<> standard_role_manager::create_default_role_if_missing() const {
-    return default_role_row_satisfies(_qp, &has_can_login, _superuser).then([this](bool exists) {
+    return default_role_row_satisfies(_qp, &has_can_login).then([this](bool exists) {
        if (!exists) {
            static const sstring query = format("INSERT INTO {} ({}, is_superuser, can_login) VALUES (?, true, true)",
                    meta::roles_table::qualified_name,
@@ -187,9 +178,9 @@ future<> standard_role_manager::create_default_role_if_missing() const {
                    query,
                    db::consistency_level::QUORUM,
                    internal_distributed_query_state(),
-                    {_superuser},
-                    cql3::query_processor::cache_internal::no).then([this](auto&&) {
-                log.info("Created default superuser role '{}'.", _superuser);
+                    {meta::DEFAULT_SUPERUSER_NAME},
+                    cql3::query_processor::cache_internal::no).then([](auto&&) {
+                log.info("Created default superuser role '{}'.", meta::DEFAULT_SUPERUSER_NAME);
                return make_ready_future<>();
            });
        }
@@ -241,7 +232,7 @@ future<> standard_role_manager::start() {
        return this->create_metadata_tables_if_missing().then([this] {
            _stopped = auth::do_after_system_ready(_as, [this] {
                return seastar::async([this] {
-                    _migration_manager.wait_for_schema_agreement(_qp.db().real_database(), db::timeout_clock::time_point::max(), &_as).get0();
+                    wait_for_schema_agreement(_migration_manager, _qp.db().real_database(), _as).get0();

                    if (any_nondefault_role_row_satisfies(_qp, &has_can_login).get0()) {
                        if (this->legacy_metadata_exists()) {
--- a/auth/standard_role_manager.hh
+++ b/auth/standard_role_manager.hh
@@ -34,10 +34,13 @@ class standard_role_manager final : public role_manager {
    ::service::migration_manager& _migration_manager;
    future<> _stopped;
    seastar::abort_source _as;
-    std::string _superuser;

 public:
-    standard_role_manager(cql3::query_processor&, ::service::migration_manager&);
+    standard_role_manager(cql3::query_processor& qp, ::service::migration_manager& mm)
+            : _qp(qp)
+            , _migration_manager(mm)
+            , _stopped(make_ready_future<>()) {
+    }

    virtual std::string_view qualified_java_name() const noexcept override;

--- a/backlog_controller.hh
+++ b/backlog_controller.hh
@@ -37,8 +37,10 @@
 // The constants q1 and q2 are used to determine the proportional factor at each stage.
 class backlog_controller {
 public:
-    using scheduling_group = seastar::scheduling_group;
-
+    struct scheduling_group {
+        seastar::scheduling_group cpu = default_scheduling_group();
+        seastar::io_priority_class io = default_priority_class();
+    };
    future<> shutdown() {
        _update_timer.cancel();
        return std::move(_inflight_update);
@@ -56,11 +58,11 @@ protected:
    };

    scheduling_group _scheduling_group;
+    timer<> _update_timer;

    std::vector<control_point> _control_points;

    std::function<float()> _current_backlog;
-    timer<> _update_timer;
    // updating shares for an I/O class may contact another shard and returns a future.
    future<> _inflight_update;

@@ -80,9 +82,9 @@ protected:
                       std::vector<control_point> control_points, std::function<float()> backlog,
                       float static_shares = 0)
        : _scheduling_group(std::move(sg))
+        , _update_timer([this] { adjust(); })
        , _control_points()
        , _current_backlog(std::move(backlog))
-        , _update_timer([this] { adjust(); })
        , _inflight_update(make_ready_future<>())
        , _static_shares(static_shares)
    {
--- a/bin/cqlsh
+++ b/bin/cqlsh
@@ -1,8 +0,0 @@
-#!/bin/bash
-
-# Copyright (C) 2023-present ScyllaDB
-# SPDX-License-Identifier: AGPL-3.0-or-later
-
-here=$(dirname "$0")
-exec "$here/../tools/cqlsh/bin/cqlsh" "$@"
-
--- a/bytes.hh
+++ b/bytes.hh
@@ -17,7 +17,7 @@
 #include <functional>
 #include <compare>
 #include "utils/mutable_view.hh"
-#include "utils/simple_hashers.hh"
+#include <xxhash.h>

 using bytes = basic_sstring<int8_t, uint32_t, 31, false>;
 using bytes_view = std::basic_string_view<int8_t>;
@@ -160,7 +160,18 @@ struct appending_hash<bytes_view> {
    }
 };

-using bytes_view_hasher = simple_xx_hasher;
+struct bytes_view_hasher : public hasher {
+    XXH64_state_t _state;
+    bytes_view_hasher(uint64_t seed = 0) noexcept {
+        XXH64_reset(&_state, seed);
+    }
+    void update(const char* ptr, size_t length) noexcept {
+        XXH64_update(&_state, ptr, length);
+    }
+    size_t finalize() {
+        return static_cast<size_t>(XXH64_digest(&_state));
+    }
+};

 namespace std {
 template <>
--- a/bytes_ostream.hh
+++ b/bytes_ostream.hh
@@ -53,10 +53,6 @@ public:
        using difference_type = std::ptrdiff_t;
        using pointer = bytes_view*;
        using reference = bytes_view&;
-
-        struct implementation {
-            blob_storage* current_chunk;
-        };
    private:
        chunk* _current = nullptr;
    public:
@@ -79,11 +75,11 @@ public:
            ++(*this);
            return tmp;
        }
-        bool operator==(const fragment_iterator&) const = default;
-        implementation extract_implementation() const {
-            return implementation {
-                .current_chunk = _current,
-            };
+        bool operator==(const fragment_iterator& other) const {
+            return _current == other._current;
+        }
+        bool operator!=(const fragment_iterator& other) const {
+            return _current != other._current;
        }
    };
    using const_iterator = fragment_iterator;
@@ -436,6 +432,10 @@ public:
        return true;
    }

+    bool operator!=(const bytes_ostream& other) const {
+        return !(*this == other);
+    }
+
    // Makes this instance empty.
    //
    // The first buffer is not deallocated, so callers may rely on the
--- a/cache_flat_mutation_reader.hh
+++ b/cache_flat_mutation_reader.hh
@@ -110,9 +110,6 @@ class cache_flat_mutation_reader final : public flat_mutation_reader_v2::impl {
    flat_mutation_reader_v2* _underlying = nullptr;
    flat_mutation_reader_v2_opt _underlying_holder;

-    gc_clock::time_point _read_time;
-    gc_clock::time_point _gc_before;
-
    future<> do_fill_buffer();
    future<> ensure_underlying();
    void copy_from_cache_to_buffer();
@@ -181,20 +178,6 @@ class cache_flat_mutation_reader final : public flat_mutation_reader_v2::impl {
    const schema& table_schema() {
        return *_snp->schema();
    }
-
-    gc_clock::time_point get_read_time() {
-        return _read_context.tombstone_gc_state() ? gc_clock::now() : gc_clock::time_point::min();
-    }
-
-    gc_clock::time_point get_gc_before(const schema& schema, dht::decorated_key dk, const gc_clock::time_point query_time) {
-        auto gc_state = _read_context.tombstone_gc_state();
-        if (gc_state) {
-            return gc_state->get_gc_before_for_key(schema.shared_from_this(), dk, query_time);
-        }
-
-        return gc_clock::time_point::min();
-    }
-
 public:
    cache_flat_mutation_reader(schema_ptr s,
                               dht::decorated_key dk,
@@ -213,8 +196,6 @@ public:
        , _read_context_holder()
        , _read_context(ctx)    // ctx is owned by the caller, who's responsible for closing it.
        , _next_row(*_schema, *_snp, false, _read_context.is_reversed())
-        , _read_time(get_read_time())
-        , _gc_before(get_gc_before(*_schema, dk, _read_time))
    {
        clogger.trace("csm {}: table={}.{}, reversed={}, snap={}", fmt::ptr(this), _schema->ks_name(), _schema->cf_name(), _read_context.is_reversed(),
                      fmt::ptr(&*_snp));
@@ -749,51 +730,9 @@ void cache_flat_mutation_reader::copy_from_cache_to_buffer() {
        }
    }

+    // We add the row to the buffer even when it's full.
+    // This simplifies the code. For more info see #3139.
    if (_next_row_in_range) {
-        bool remove_row = false;
-
-        if (_read_context.tombstone_gc_state() // do not compact rows when tombstone_gc_state is not set (used in some unit tests)
-            && !_next_row.dummy()
-            && _snp->at_latest_version()
-            && _snp->at_oldest_version()) {
-            deletable_row& row = _next_row.latest_row();
-            tombstone range_tomb = _next_row.range_tombstone_for_row();
-            auto t = row.deleted_at();
-            t.apply(range_tomb);
-
-            auto row_tomb_expired = [&](row_tombstone tomb) {
-                return (tomb && tomb.max_deletion_time() < _gc_before);
-            };
-
-            auto is_row_dead = [&](const deletable_row& row) {
-                auto& m = row.marker();
-                return (!m.is_missing() && m.is_dead(_read_time) && m.deletion_time() < _gc_before);
-            };
-
-            if (row_tomb_expired(t) || is_row_dead(row)) {
-                can_gc_fn always_gc = [&](tombstone) { return true; };
-                const schema& row_schema = _next_row.latest_row_schema();
-
-                _read_context.cache()._tracker.on_row_compacted();
-
-                with_allocator(_snp->region().allocator(), [&] {
-                    deletable_row row_copy(row_schema, row);
-                    row_copy.compact_and_expire(row_schema, t.tomb(), _read_time, always_gc, _gc_before, nullptr);
-                    std::swap(row, row_copy);
-                });
-                remove_row = row.empty();
-
-                auto tomb_expired = [&](tombstone tomb) {
-                    return (tomb && tomb.deletion_time < _gc_before);
-                };
-
-                auto latests_range_tomb = _next_row.get_iterator_in_latest_version()->range_tombstone();
-                if (tomb_expired(latests_range_tomb)) {
-                    _next_row.get_iterator_in_latest_version()->set_range_tombstone({});
-                }
-            }
-        }
-
        if (_next_row.range_tombstone_for_row() != _current_tombstone) [[unlikely]] {
            auto tomb = _next_row.range_tombstone_for_row();
            auto new_lower_bound = position_in_partition::before_key(_next_row.position());
@@ -803,31 +742,8 @@ void cache_flat_mutation_reader::copy_from_cache_to_buffer() {
            _current_tombstone = tomb;
            _read_context.cache()._tracker.on_range_tombstone_read();
        }
-
-        if (remove_row) {
-            _read_context.cache()._tracker.on_row_compacted_away();
-
-            _lower_bound = position_in_partition::after_key(*_schema, _next_row.position());
-
-            partition_snapshot_row_weakref row_ref(_next_row);
-            move_to_next_entry();
-
-            with_allocator(_snp->region().allocator(), [&] {
-                cache_tracker& tracker = _read_context.cache()._tracker;
-                if (row_ref->is_linked()) {
-                    tracker.get_lru().remove(*row_ref);
-                }
-                row_ref->on_evicted(tracker);
-            });
-
-            _snp->region().allocator().invalidate_references();
-            _next_row.force_valid();
-        } else {
-            // We add the row to the buffer even when it's full.
-            // This simplifies the code. For more info see #3139.
-            add_to_buffer(_next_row);
-            move_to_next_entry();
-        }
+        add_to_buffer(_next_row);
+        move_to_next_entry();
    } else {
        move_to_next_range();
    }
@@ -978,7 +894,7 @@ void cache_flat_mutation_reader::add_to_buffer(const partition_snapshot_row_curs
    if (!row.dummy()) {
        _read_context.cache().on_row_hit();
        if (_read_context.digest_requested()) {
-            row.latest_row_prepare_hash();
+            row.latest_row().cells().prepare_hash(table_schema(), column_kind::regular_column);
        }
        add_clustering_row_to_buffer(mutation_fragment_v2(*_schema, _permit, row.row()));
    } else {
--- a/cartesian_product.hh
+++ b/cartesian_product.hh
@@ -68,6 +68,7 @@ public:
            _pos = -1;
        }
        bool operator==(const iterator& o) const { return _pos == o._pos; }
+        bool operator!=(const iterator& o) const { return _pos != o._pos; }
    };
 public:
    cartesian_product(const std::vector<std::vector<T>>& vec_of_vecs) : _vec_of_vecs(vec_of_vecs) {}
--- a/cdc/cdc_options.hh
+++ b/cdc/cdc_options.hh
@@ -65,6 +65,7 @@ public:
    void ttl(int v) { _ttl = v; }

    bool operator==(const options& o) const;
+    bool operator!=(const options& o) const;
 };

 } // namespace cdc
--- a/cdc/generation.cc
+++ b/cdc/generation.cc
@@ -13,7 +13,6 @@
 #include <seastar/core/sleep.hh>
 #include <seastar/core/coroutine.hh>

-#include "gms/endpoint_state.hh"
 #include "keys.hh"
 #include "schema/schema_builder.hh"
 #include "replica/database.hh"
@@ -26,7 +25,6 @@
 #include "gms/inet_address.hh"
 #include "gms/gossiper.hh"
 #include "gms/feature_service.hh"
-#include "utils/error_injection.hh"
 #include "utils/UUID_gen.hh"

 #include "cdc/generation.hh"
@@ -68,10 +66,10 @@ static constexpr auto stream_id_index_shift = stream_id_version_shift + stream_i
 static constexpr auto stream_id_random_shift = stream_id_index_shift + stream_id_index_bits;

 /**
- * Responsibility for encoding stream_id moved from the create_stream_ids
- * function to this constructor, to keep knowledge of composition in a
- * single place. Note the make_new_generation_description function
- * defines the "order" in which we view vnodes etc.
+ * Responsibilty for encoding stream_id moved from factory method to
+ * this constructor, to keep knowledge of composition in a single place.
+ * Note this is private and friended to topology_description_generator,
+ * because he is the one who defined the "order" we view vnodes etc.
 */
 stream_id::stream_id(dht::token token, size_t vnode_index)
    : _value(bytes::initialized_later(), 2 * sizeof(int64_t))
@@ -155,18 +153,18 @@ bool token_range_description::operator==(const token_range_description& o) const
        && sharding_ignore_msb == o.sharding_ignore_msb;
 }

-topology_description::topology_description(utils::chunked_vector<token_range_description> entries)
+topology_description::topology_description(std::vector<token_range_description> entries)
    : _entries(std::move(entries)) {}

 bool topology_description::operator==(const topology_description& o) const {
    return _entries == o._entries;
 }

-const utils::chunked_vector<token_range_description>& topology_description::entries() const& {
+const std::vector<token_range_description>& topology_description::entries() const& {
    return _entries;
 }

-utils::chunked_vector<token_range_description>&& topology_description::entries() && {
+std::vector<token_range_description>&& topology_description::entries() && {
    return std::move(_entries);
 }

@@ -185,48 +183,98 @@ static std::vector<stream_id> create_stream_ids(
    return result;
 }

+class topology_description_generator final {
+    const std::unordered_set<dht::token>& _bootstrap_tokens;
+    const locator::token_metadata_ptr _tmptr;
+    const noncopyable_function<std::pair<size_t, uint8_t> (dht::token)>& _get_sharding_info;
+
+    // Compute a set of tokens that split the token ring into vnodes
+    auto get_tokens() const {
+        auto tokens = _tmptr->sorted_tokens();
+        auto it = tokens.insert(
+                tokens.end(), _bootstrap_tokens.begin(), _bootstrap_tokens.end());
+        std::sort(it, tokens.end());
+        std::inplace_merge(tokens.begin(), it, tokens.end());
+        tokens.erase(std::unique(tokens.begin(), tokens.end()), tokens.end());
+        return tokens;
+    }
+
+    token_range_description create_description(size_t index, dht::token start, dht::token end) const {
+        token_range_description desc;
+
+        desc.token_range_end = end;
+
+        auto [shard_count, ignore_msb] = _get_sharding_info(end);
+        desc.streams = create_stream_ids(index, start, end, shard_count, ignore_msb);
+        desc.sharding_ignore_msb = ignore_msb;
+
+        return desc;
+    }
+public:
+    topology_description_generator(
+            const std::unordered_set<dht::token>& bootstrap_tokens,
+            const locator::token_metadata_ptr tmptr,
+            // This function must return sharding parameters for a node that owns the vnode ending with
+            // the given token. Returns <shard_count, ignore_msb> pair.
+            const noncopyable_function<std::pair<size_t, uint8_t> (dht::token)>& get_sharding_info)
+        : _bootstrap_tokens(bootstrap_tokens)
+        , _tmptr(std::move(tmptr))
+        , _get_sharding_info(get_sharding_info)
+    {}
+
+    /*
+     * Generate a set of CDC stream identifiers such that for each shard
+     * and vnode pair there exists a stream whose token falls into this vnode
+     * and is owned by this shard. It is sometimes not possible to generate
+     * a CDC stream identifier for some (vnode, shard) pair because not all
+     * shards have to own tokens in a vnode. Small vnode can be totally owned
+     * by a single shard. In such case, a stream identifier that maps to
+     * end of the vnode is generated.
+     *
+     * Then build a cdc::topology_description which maps tokens to generated
+     * stream identifiers, such that if token T is owned by shard S in vnode V,
+     * it gets mapped to the stream identifier generated for (S, V).
+     */
+    // Run in seastar::async context.
+    topology_description generate() const {
+        const auto tokens = get_tokens();
+
+        std::vector<token_range_description> vnode_descriptions;
+        vnode_descriptions.reserve(tokens.size());
+
+        vnode_descriptions.push_back(
+                create_description(0, tokens.back(), tokens.front()));
+        for (size_t idx = 1; idx < tokens.size(); ++idx) {
+            vnode_descriptions.push_back(
+                    create_description(idx, tokens[idx - 1], tokens[idx]));
+        }
+
+        return {std::move(vnode_descriptions)};
+    }
+};
+
 bool should_propose_first_generation(const gms::inet_address& me, const gms::gossiper& g) {
    auto my_host_id = g.get_host_id(me);
-    return g.for_each_endpoint_state_until([&] (const gms::inet_address& node, const gms::endpoint_state& eps) {
-        return stop_iteration(my_host_id < g.get_host_id(node));
-    }) == stop_iteration::no;
+    auto& eps = g.get_endpoint_states();
+    return std::none_of(eps.begin(), eps.end(),
+            [&] (const std::pair<gms::inet_address, gms::endpoint_state>& ep) {
+        return my_host_id < g.get_host_id(ep.first);
+    });
 }

-bool is_cdc_generation_optimal(const cdc::topology_description& gen, const locator::token_metadata& tm) {
-    if (tm.sorted_tokens().size() != gen.entries().size()) {
-        // We probably have garbage streams from old generations
-        cdc_log.info("Generation size does not match the token ring");
-        return false;
-    } else {
-        std::unordered_set<dht::token> gen_ends;
-        for (const auto& entry : gen.entries()) {
-            gen_ends.insert(entry.token_range_end);
-        }
-        for (const auto& metadata_token : tm.sorted_tokens()) {
-            if (!gen_ends.contains(metadata_token)) {
-                cdc_log.warn("CDC generation missing token {}", metadata_token);
-                return false;
-            }
-        }
-        return true;
-    }
-}
-
-static future<utils::chunked_vector<mutation>> get_common_cdc_generation_mutations(
+future<utils::chunked_vector<mutation>> get_cdc_generation_mutations(
        schema_ptr s,
-        const partition_key& pkey,
-        noncopyable_function<clustering_key (dht::token)>&& get_ckey_from_range_end,
+        utils::UUID id,
        const cdc::topology_description& desc,
        size_t mutation_size_threshold,
        api::timestamp_type ts) {
    utils::chunked_vector<mutation> res;
-    res.emplace_back(s, pkey);
+    res.emplace_back(s, partition_key::from_singular(*s, id));
+    res.back().set_static_cell(to_bytes("num_ranges"), int32_t(desc.entries().size()), ts);
    size_t size_estimate = 0;
-    size_t total_size_estimate = 0;
    for (auto& e : desc.entries()) {
        if (size_estimate >= mutation_size_threshold) {
-            total_size_estimate += size_estimate;
-            res.emplace_back(s, pkey);
+            res.emplace_back(s, partition_key::from_singular(*s, id));
            size_estimate = 0;
        }

@@ -237,60 +285,16 @@ static future<utils::chunked_vector<mutation>> get_common_cdc_generation_mutatio
        }

        size_estimate += e.streams.size() * 20;
-        auto ckey = get_ckey_from_range_end(e.token_range_end);
+        auto ckey = clustering_key::from_singular(*s, dht::token::to_int64(e.token_range_end));
        res.back().set_cell(ckey, to_bytes("streams"), make_set_value(db::cdc_streams_set_type, std::move(streams)), ts);
        res.back().set_cell(ckey, to_bytes("ignore_msb"), int8_t(e.sharding_ignore_msb), ts);

        co_await coroutine::maybe_yield();
    }

-    total_size_estimate += size_estimate;
-
-    // Copy mutations n times, where n is picked so that the memory size of all mutations together exceeds `max_command_size`.
-    utils::get_local_injector().inject("cdc_generation_mutations_replication", [&res, total_size_estimate, mutation_size_threshold] {
-        utils::chunked_vector<mutation> new_res;
-
-        size_t number_of_copies = (mutation_size_threshold / total_size_estimate + 1) * 2;
-        for (size_t i = 0; i < number_of_copies; ++i) {
-            std::copy(res.begin(), res.end(), std::back_inserter(new_res));
-        }
-
-        res = std::move(new_res);
-    });
-
    co_return res;
 }

-future<utils::chunked_vector<mutation>> get_cdc_generation_mutations_v2(
-        schema_ptr s,
-        utils::UUID id,
-        const cdc::topology_description& desc,
-        size_t mutation_size_threshold,
-        api::timestamp_type ts) {
-    auto pkey = partition_key::from_singular(*s, id);
-    auto get_ckey = [s] (dht::token range_end) {
-        return clustering_key::from_singular(*s, dht::token::to_int64(range_end));
-    };
-
-    auto res = co_await get_common_cdc_generation_mutations(s, pkey, std::move(get_ckey), desc, mutation_size_threshold, ts);
-    res.back().set_static_cell(to_bytes("num_ranges"), int32_t(desc.entries().size()), ts);
-    co_return res;
-}
-
-future<utils::chunked_vector<mutation>> get_cdc_generation_mutations_v3(
-        schema_ptr s,
-        utils::UUID id,
-        const cdc::topology_description& desc,
-        size_t mutation_size_threshold,
-        api::timestamp_type ts) {
-    auto pkey = partition_key::from_singular(*s, CDC_GENERATIONS_V3_KEY);
-    auto get_ckey = [&] (dht::token range_end) {
-        return clustering_key::from_exploded(*s, {timeuuid_type->decompose(id), long_type->decompose(dht::token::to_int64(range_end))}) ;
-    };
-
-    co_return co_await get_common_cdc_generation_mutations(s, pkey, std::move(get_ckey), desc, mutation_size_threshold, ts);
-}
-
 // non-static for testing
 size_t limit_of_streams_in_topology_description() {
    // Each stream takes 16B and we don't want to exceed 4MB so we can have
@@ -323,47 +327,13 @@ topology_description limit_number_of_streams_if_needed(topology_description&& de
    return topology_description(std::move(entries));
 }

-// Compute a set of tokens that split the token ring into vnodes.
-static auto get_tokens(const std::unordered_set<dht::token>& bootstrap_tokens, const locator::token_metadata_ptr tmptr) {
-    auto tokens = tmptr->sorted_tokens();
-    auto it = tokens.insert(tokens.end(), bootstrap_tokens.begin(), bootstrap_tokens.end());
-    std::sort(it, tokens.end());
-    std::inplace_merge(tokens.begin(), it, tokens.end());
-    tokens.erase(std::unique(tokens.begin(), tokens.end()), tokens.end());
-    return tokens;
-}
-
-static token_range_description create_token_range_description(
-        size_t index,
-        dht::token start,
-        dht::token end,
-        const noncopyable_function<std::pair<size_t, uint8_t> (dht::token)>& get_sharding_info) {
-    token_range_description desc;
-
-    desc.token_range_end = end;
-
-    auto [shard_count, ignore_msb] = get_sharding_info(end);
-    desc.streams = create_stream_ids(index, start, end, shard_count, ignore_msb);
-    desc.sharding_ignore_msb = ignore_msb;
-
-    return desc;
-}
-
-cdc::topology_description make_new_generation_description(
+std::pair<utils::UUID, cdc::topology_description> make_new_generation_data(
        const std::unordered_set<dht::token>& bootstrap_tokens,
        const noncopyable_function<std::pair<size_t, uint8_t>(dht::token)>& get_sharding_info,
        const locator::token_metadata_ptr tmptr) {
-    const auto tokens = get_tokens(bootstrap_tokens, tmptr);
-
-    utils::chunked_vector<token_range_description> vnode_descriptions;
-    vnode_descriptions.reserve(tokens.size());
-
-    vnode_descriptions.push_back(create_token_range_description(0, tokens.back(), tokens.front(), get_sharding_info));
-    for (size_t idx = 1; idx < tokens.size(); ++idx) {
-        vnode_descriptions.push_back(create_token_range_description(idx, tokens[idx - 1], tokens[idx], get_sharding_info));
-    }
-
-    return {std::move(vnode_descriptions)};
+    auto gen = topology_description_generator(bootstrap_tokens, tmptr, get_sharding_info).generate();
+    auto uuid = utils::make_random_uuid();
+    return {uuid, std::move(gen)};
 }

 db_clock::time_point new_generation_timestamp(bool add_delay, std::chrono::milliseconds ring_delay) {
@@ -395,9 +365,7 @@ future<cdc::generation_id> generation_service::legacy_make_new_generation(const
            return {sc > 0 ? sc : 1, get_sharding_ignore_msb(*endpoint, _gossiper)};
        }
    };
-
-    auto uuid = utils::make_random_uuid();
-    auto gen = make_new_generation_description(bootstrap_tokens, get_sharding_info, tmptr);
+    auto [uuid, gen] = make_new_generation_data(bootstrap_tokens, get_sharding_info, tmptr);

    // Our caller should ensure that there are normal tokens in the token ring.
    auto normal_token_owners = tmptr->count_normal_token_owners();
@@ -451,12 +419,8 @@ future<cdc::generation_id> generation_service::legacy_make_new_generation(const
 * but if the cluster already supports CDC, then every newly joining node will propose a new CDC generation,
 * which means it will gossip the generation's timestamp.
 */
-static std::optional<cdc::generation_id> get_generation_id_for(const gms::inet_address& endpoint, const gms::endpoint_state& eps) {
-    const auto* gen_id_ptr = eps.get_application_state_ptr(gms::application_state::CDC_GENERATION_ID);
-    if (!gen_id_ptr) {
-        return std::nullopt;
-    }
-    auto gen_id_string = gen_id_ptr->value();
+static std::optional<cdc::generation_id> get_generation_id_for(const gms::inet_address& endpoint, const gms::gossiper& g) {
+    auto gen_id_string = g.get_application_state_value(endpoint, gms::application_state::CDC_GENERATION_ID);
    cdc_log.trace("endpoint={}, gen_id_string={}", endpoint, gen_id_string);
    return gms::versioned_value::cdc_generation_id_from_string(gen_id_string);
 }
@@ -660,21 +624,21 @@ future<> generation_service::maybe_rewrite_streams_descriptions() {

    // For each CDC log table get the TTL setting (from CDC options) and the table's creation time
    std::vector<time_and_ttl> times_and_ttls;
-    _db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> t) {
-        auto& s = *t->schema();
+    for (auto& [_, cf] : _db.get_column_families()) {
+        auto& s = *cf->schema();
        auto base = cdc::get_base_table(_db, s.ks_name(), s.cf_name());
        if (!base) {
            // Not a CDC log table.
-            return;
+            continue;
        }
        auto& cdc_opts = base->cdc_options();
        if (!cdc_opts.enabled()) {
            // This table is named like a CDC log table but it's not one.
-            return;
+            continue;
        }

        times_and_ttls.push_back(time_and_ttl{as_timepoint(s.id().uuid()), cdc_opts.ttl()});
-    });
+    }

    if (times_and_ttls.empty()) {
        // There's no point in rewriting old generations' streams (they don't contain any data).
@@ -762,8 +726,8 @@ future<> generation_service::stop() {
        cdc_log.error("CDC stream rewrite failed: ", std::current_exception());
    }

-    if (_joined && (this_shard_id() == 0)) {
-        co_await leave_ring();
+    if (this_shard_id() == 0) {
+        co_await _gossiper.unregister_(shared_from_this());
    }

    _stopped = true;
@@ -775,6 +739,7 @@ generation_service::~generation_service() {

 future<> generation_service::after_join(std::optional<cdc::generation_id>&& startup_gen_id) {
    assert_shard_zero(__PRETTY_FUNCTION__);
+    assert(_sys_ks.local().bootstrap_complete());

    _gen_id = std::move(startup_gen_id);
    _gossiper.register_(shared_from_this());
@@ -792,24 +757,18 @@ future<> generation_service::after_join(std::optional<cdc::generation_id>&& star
    _cdc_streams_rewrite_complete = maybe_rewrite_streams_descriptions();
 }

-future<> generation_service::leave_ring() {
-    assert_shard_zero(__PRETTY_FUNCTION__);
-    _joined = false;
-    co_await _gossiper.unregister_(shared_from_this());
-}
-
-future<> generation_service::on_join(gms::inet_address ep, gms::endpoint_state_ptr ep_state, gms::permit_id pid) {
+future<> generation_service::on_join(gms::inet_address ep, gms::endpoint_state ep_state) {
    assert_shard_zero(__PRETTY_FUNCTION__);

-    auto val = ep_state->get_application_state_ptr(gms::application_state::CDC_GENERATION_ID);
+    auto val = ep_state.get_application_state_ptr(gms::application_state::CDC_GENERATION_ID);
    if (!val) {
        return make_ready_future();
    }

-    return on_change(ep, gms::application_state::CDC_GENERATION_ID, *val, pid);
+    return on_change(ep, gms::application_state::CDC_GENERATION_ID, *val);
 }

-future<> generation_service::on_change(gms::inet_address ep, gms::application_state app_state, const gms::versioned_value& v, gms::permit_id) {
+future<> generation_service::on_change(gms::inet_address ep, gms::application_state app_state, const gms::versioned_value& v) {
    assert_shard_zero(__PRETTY_FUNCTION__);

    if (app_state != gms::application_state::CDC_GENERATION_ID) {
@@ -829,21 +788,22 @@ future<> generation_service::check_and_repair_cdc_streams() {
    }

    std::optional<cdc::generation_id> latest = _gen_id;
-    _gossiper.for_each_endpoint_state([&] (const gms::inet_address& addr, const gms::endpoint_state& state) {
+    const auto& endpoint_states = _gossiper.get_endpoint_states();
+    for (const auto& [addr, state] : endpoint_states) {
        if (_gossiper.is_left(addr)) {
            cdc_log.info("check_and_repair_cdc_streams ignored node {} because it is in LEFT state", addr);
-            return;
+            continue;
        }
        if (!_gossiper.is_normal(addr)) {
            throw std::runtime_error(format("All nodes must be in NORMAL or LEFT state while performing check_and_repair_cdc_streams"
                    " ({} is in state {})", addr, _gossiper.get_gossip_status(state)));
        }

-        const auto gen_id = get_generation_id_for(addr, state);
+        const auto gen_id = get_generation_id_for(addr, _gossiper);
        if (!latest || (gen_id && get_ts(*gen_id) > get_ts(*latest))) {
            latest = gen_id;
        }
-    });
+    }

    auto tmptr = _token_metadata.get();
    auto sys_dist_ks = get_sys_dist_ks();
@@ -898,9 +858,24 @@ future<> generation_service::check_and_repair_cdc_streams() {
                " even though some node gossiped about it.",
                latest, db_clock::now());
            should_regenerate = true;
-        } else if (!is_cdc_generation_optimal(*gen, *tmptr)) {
-            should_regenerate = true;
-            cdc_log.info("CDC generation {} needs repair, regenerating", latest);
+        } else {
+            if (tmptr->sorted_tokens().size() != gen->entries().size()) {
+                // We probably have garbage streams from old generations
+                cdc_log.info("Generation size does not match the token ring, regenerating");
+                should_regenerate = true;
+            } else {
+                std::unordered_set<dht::token> gen_ends;
+                for (const auto& entry : gen->entries()) {
+                    gen_ends.insert(entry.token_range_end);
+                }
+                for (const auto& metadata_token : tmptr->sorted_tokens()) {
+                    if (!gen_ends.contains(metadata_token)) {
+                        cdc_log.warn("CDC generation {} missing token {}. Regenerating.", latest, metadata_token);
+                        should_regenerate = true;
+                        break;
+                    }
+                }
+            }
        }
    }

@@ -960,13 +935,17 @@ future<> generation_service::legacy_handle_cdc_generation(std::optional<cdc::gen
        co_return;
    }

-    if (!_sys_dist_ks.local_is_initialized() || !_sys_dist_ks.local().started()) {
-        on_internal_error(cdc_log, "Legacy handle CDC generation with sys.dist.ks. down");
+    if (!_sys_ks.local().bootstrap_complete() || !_sys_dist_ks.local_is_initialized()
+            || !_sys_dist_ks.local().started()) {
+        // The service should not be listening for generation changes until after the node
+        // is bootstrapped. Therefore we would previously assume that this condition
+        // can never become true and call on_internal_error here, but it turns out that
+        // it may become true on decommission: the node enters NEEDS_BOOTSTRAP
+        // state before leaving the token ring, so bootstrap_complete() becomes false.
+        // In that case we can simply return.
+        co_return;
    }

-    // The service should not be listening for generation changes until after the node
-    // is bootstrapped and since the node leaves the ring on decommission
-
    if (co_await container().map_reduce(and_reducer(), [ts = get_ts(*gen_id)] (generation_service& svc) {
        return !svc._cdc_metadata.prepare(ts);
    })) {
@@ -1029,12 +1008,12 @@ future<> generation_service::legacy_scan_cdc_generations() {
    assert_shard_zero(__PRETTY_FUNCTION__);

    std::optional<cdc::generation_id> latest;
-    _gossiper.for_each_endpoint_state([&] (const gms::inet_address& node, const gms::endpoint_state& eps) {
-        auto gen_id = get_generation_id_for(node, eps);
+    for (const auto& ep: _gossiper.get_endpoint_states()) {
+        auto gen_id = get_generation_id_for(ep.first, _gossiper);
        if (!latest || (gen_id && get_ts(*gen_id) > get_ts(*latest))) {
            latest = gen_id;
        }
-    });
+    }

    if (latest) {
        cdc_log.info("Latest generation seen during startup: {}", *latest);
@@ -1111,8 +1090,19 @@ shared_ptr<db::system_distributed_keyspace> generation_service::get_sys_dist_ks(
    return _sys_dist_ks.local_shared();
 }

+std::ostream& operator<<(std::ostream& os, const generation_id& gen_id) {
+    std::visit(make_visitor(
+    [&os] (const generation_id_v1& id) { os << id.ts; },
+    [&os] (const generation_id_v2& id) { os << "(" << id.ts << ", " << id.id << ")"; }
+    ), gen_id);
+    return os;
+}
+
 db_clock::time_point get_ts(const generation_id& gen_id) {
-    return std::visit([] (auto& id) { return id.ts; }, gen_id);
+    return std::visit(make_visitor(
+    [] (const generation_id_v1& id) { return id.ts; },
+    [] (const generation_id_v2& id) { return id.ts; }
+    ), gen_id);
 }

 } // namespace cdc
--- a/cdc/generation.hh
+++ b/cdc/generation.hh
@@ -92,13 +92,13 @@ struct token_range_description {
 * in the `_entries` vector. See the comment above `token_range_description` for explanation.
 */
 class topology_description {
-    utils::chunked_vector<token_range_description> _entries;
+    std::vector<token_range_description> _entries;
 public:
-    topology_description(utils::chunked_vector<token_range_description> entries);
+    topology_description(std::vector<token_range_description> entries);
    bool operator==(const topology_description&) const;

-    const utils::chunked_vector<token_range_description>& entries() const&;
-    utils::chunked_vector<token_range_description>&& entries() &&;
+    const std::vector<token_range_description>& entries() const&;
+    std::vector<token_range_description>&& entries() &&;
 };

 /**
@@ -133,28 +133,7 @@ public:
 */
 bool should_propose_first_generation(const gms::inet_address& me, const gms::gossiper&);

-/*
- * Checks if the CDC generation is optimal, which is true if its `topology_description` is consistent
- * with `token_metadata`.
-*/
-bool is_cdc_generation_optimal(const cdc::topology_description& gen, const locator::token_metadata& tm);
-
-/*
- * Generate a set of CDC stream identifiers such that for each shard
- * and vnode pair there exists a stream whose token falls into this vnode
- * and is owned by this shard. It is sometimes not possible to generate
- * a CDC stream identifier for some (vnode, shard) pair because not all
- * shards have to own tokens in a vnode. Small vnode can be totally owned
- * by a single shard. In such case, a stream identifier that maps to
- * end of the vnode is generated.
- *
- * Then build a cdc::topology_description which maps tokens to generated
- * stream identifiers, such that if token T is owned by shard S in vnode V,
- * it gets mapped to the stream identifier generated for (S, V).
- *
- * Run in seastar::async context.
- */
-cdc::topology_description make_new_generation_description(
+std::pair<utils::UUID, cdc::topology_description> make_new_generation_data(
    const std::unordered_set<dht::token>& bootstrap_tokens,
    const noncopyable_function<std::pair<size_t, uint8_t> (dht::token)>& get_sharding_info,
    const locator::token_metadata_ptr);
@@ -165,20 +144,9 @@ db_clock::time_point new_generation_timestamp(bool add_delay, std::chrono::milli
 // using `mutation_size_threshold` to decide on the mutation sizes. The partition key of each mutation
 // is given by `gen_uuid`. The timestamp of each cell in each mutation is given by `mutation_timestamp`.
 //
-// Works only for the CDC_GENERATIONS_V2 schema (in system_distributed keyspace).
-future<utils::chunked_vector<mutation>> get_cdc_generation_mutations_v2(
-    schema_ptr, utils::UUID gen_uuid, const cdc::topology_description&,
-    size_t mutation_size_threshold, api::timestamp_type mutation_timestamp);
-
-// The partition key of all rows in the single-partition CDC_GENERATIONS_V3 schema (in system keyspace).
-static constexpr auto CDC_GENERATIONS_V3_KEY = "cdc_generations";
-
-// Translates the CDC generation data given by a `cdc::topology_description` into a vector of mutations,
-// using `mutation_size_threshold` to decide on the mutation sizes. The first clustering key column is
-// given by `gen_uuid`. The timestamp of each cell in each mutation is given by `mutation_timestamp`.
-//
-// Works only for the CDC_GENERATIONS_V3 schema (in system keyspace).
-future<utils::chunked_vector<mutation>> get_cdc_generation_mutations_v3(
+// Works for only specific schemas: CDC_GENERATIONS_V2 (in system_distributed_keyspace)
+// and CDC_GENERATIONS_V3 (in system_keyspace).
+future<utils::chunked_vector<mutation>> get_cdc_generation_mutations(
    schema_ptr, utils::UUID gen_uuid, const cdc::topology_description&,
    size_t mutation_size_threshold, api::timestamp_type mutation_timestamp);

--- a/cdc/generation_id.hh
+++ b/cdc/generation_id.hh
@@ -28,35 +28,7 @@ struct generation_id_v2 {

 using generation_id = std::variant<generation_id_v1, generation_id_v2>;

+std::ostream& operator<<(std::ostream&, const generation_id&);
 db_clock::time_point get_ts(const generation_id&);

 } // namespace cdc
-
-template <>
-struct fmt::formatter<cdc::generation_id_v1> {
-    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
-    template <typename FormatContext>
-    auto format(const cdc::generation_id_v1& gen_id, FormatContext& ctx) const {
-        return fmt::format_to(ctx.out(), "{}", gen_id.ts);
-    }
-};
-
-template <>
-struct fmt::formatter<cdc::generation_id_v2> {
-    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
-    template <typename FormatContext>
-    auto format(const cdc::generation_id_v2& gen_id, FormatContext& ctx) const {
-        return fmt::format_to(ctx.out(), "({}, {})", gen_id.ts, gen_id.id);
-    }
-};
-
-template <>
-struct fmt::formatter<cdc::generation_id> {
-    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
-    template <typename FormatContext>
-    auto format(const cdc::generation_id& gen_id, FormatContext& ctx) const {
-        return std::visit([&ctx] (auto& id) {
-            return fmt::format_to(ctx.out(), "{}", id);
-        }, gen_id);
-    }
-};
--- a/cdc/generation_service.hh
+++ b/cdc/generation_service.hh
@@ -98,20 +98,19 @@ public:
     * Must be called on shard 0 - that's where the generation management happens.
     */
    future<> after_join(std::optional<cdc::generation_id>&& startup_gen_id);
-    future<> leave_ring();

    cdc::metadata& get_cdc_metadata() {
        return _cdc_metadata;
    }

-    virtual future<> before_change(gms::inet_address, gms::endpoint_state_ptr, gms::application_state, const gms::versioned_value&) override { return make_ready_future(); }
-    virtual future<> on_alive(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
-    virtual future<> on_dead(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
-    virtual future<> on_remove(gms::inet_address, gms::permit_id) override { return make_ready_future(); }
-    virtual future<> on_restart(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
+    virtual future<> before_change(gms::inet_address, gms::endpoint_state, gms::application_state, const gms::versioned_value&) override { return make_ready_future(); }
+    virtual future<> on_alive(gms::inet_address, gms::endpoint_state) override { return make_ready_future(); }
+    virtual future<> on_dead(gms::inet_address, gms::endpoint_state) override { return make_ready_future(); }
+    virtual future<> on_remove(gms::inet_address) override { return make_ready_future(); }
+    virtual future<> on_restart(gms::inet_address, gms::endpoint_state) override { return make_ready_future(); }

-    virtual future<> on_join(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override;
-    virtual future<> on_change(gms::inet_address, gms::application_state, const gms::versioned_value&, gms::permit_id) override;
+    virtual future<> on_join(gms::inet_address, gms::endpoint_state) override;
+    virtual future<> on_change(gms::inet_address, gms::application_state, const gms::versioned_value&) override;

    future<> check_and_repair_cdc_streams();

--- a/cdc/log.cc
+++ b/cdc/log.cc
@@ -160,7 +160,7 @@ public:
        });
    }

-    void on_before_create_column_family(const keyspace_metadata& ksm, const schema& schema, std::vector<mutation>& mutations, api::timestamp_type timestamp) override {
+    void on_before_create_column_family(const schema& schema, std::vector<mutation>& mutations, api::timestamp_type timestamp) override {
        if (schema.cdc_options().enabled()) {
            auto& db = _ctxt._proxy.get_db().local();
            auto logname = log_name(schema.cf_name());
@@ -395,6 +395,9 @@ bool cdc::options::operator==(const options& o) const {
    return enabled() == o.enabled() && _preimage == o._preimage && _postimage == o._postimage && _ttl == o._ttl
            && _delta_mode == o._delta_mode;
 }
+bool cdc::options::operator!=(const options& o) const {
+    return !(*this == o);
+}

 namespace cdc {

@@ -632,6 +635,9 @@ public:
    bool operator==(const collection_iterator& x) const {
        return _v == x._v;
    }
+    bool operator!=(const collection_iterator& x) const {
+        return !(*this == x);
+    }
 private:
    void next() {
        --_rem;
--- a/cdc/metadata.cc
+++ b/cdc/metadata.cc
@@ -40,7 +40,7 @@ static cdc::stream_id get_stream(

 // non-static for testing
 cdc::stream_id get_stream(
-        const utils::chunked_vector<cdc::token_range_description>& entries,
+        const std::vector<cdc::token_range_description>& entries,
        dht::token tok) {
    if (entries.empty()) {
        on_internal_error(cdc_log, "get_stream: entries empty");
--- a/cdc/split.cc
+++ b/cdc/split.cc
@@ -389,7 +389,7 @@ struct extract_changes_visitor {
    }

    void partition_delete(const tombstone& t) {
-        _result[t.timestamp].partition_deletions = partition_deletion{t};
+        _result[t.timestamp].partition_deletions = {t};
    }

    constexpr bool finished() const { return false; }
--- a/cell_locking.hh
+++ b/cell_locking.hh
@@ -93,6 +93,9 @@ public:
        bool operator==(const iterator& other) const {
            return _position == other._position;
        }
+        bool operator!=(const iterator& other) const {
+            return !(*this == other);
+        }
    };
 public:
    explicit partition_cells_range(const mutation_partition& mp) : _mp(mp) { }
--- a/checked-file-impl.hh
+++ b/checked-file-impl.hh
@@ -21,27 +21,27 @@ public:
            : file_impl(*get_file_impl(f)),  _error_handler(error_handler), _file(f) {
    }

-    virtual future<size_t> write_dma(uint64_t pos, const void* buffer, size_t len, io_intent* intent) override {
+    virtual future<size_t> write_dma(uint64_t pos, const void* buffer, size_t len, const io_priority_class& pc) override {
        return do_io_check(_error_handler, [&] {
-            return get_file_impl(_file)->write_dma(pos, buffer, len, intent);
+            return get_file_impl(_file)->write_dma(pos, buffer, len, pc);
        });
    }

-    virtual future<size_t> write_dma(uint64_t pos, std::vector<iovec> iov, io_intent* intent) override {
+    virtual future<size_t> write_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc) override {
        return do_io_check(_error_handler, [&] {
-            return get_file_impl(_file)->write_dma(pos, iov, intent);
+            return get_file_impl(_file)->write_dma(pos, iov, pc);
        });
    }

-    virtual future<size_t> read_dma(uint64_t pos, void* buffer, size_t len, io_intent* intent) override {
+    virtual future<size_t> read_dma(uint64_t pos, void* buffer, size_t len, const io_priority_class& pc) override {
        return do_io_check(_error_handler, [&] {
-            return get_file_impl(_file)->read_dma(pos, buffer, len, intent);
+            return get_file_impl(_file)->read_dma(pos, buffer, len, pc);
        });
    }

-    virtual future<size_t> read_dma(uint64_t pos, std::vector<iovec> iov, io_intent* intent) override {
+    virtual future<size_t> read_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc) override {
        return do_io_check(_error_handler, [&] {
-            return get_file_impl(_file)->read_dma(pos, iov, intent);
+            return get_file_impl(_file)->read_dma(pos, iov, pc);
        });
    }

@@ -99,9 +99,9 @@ public:
        });
    }

-    virtual future<temporary_buffer<uint8_t>> dma_read_bulk(uint64_t offset, size_t range_size, io_intent* intent) override {
+    virtual future<temporary_buffer<uint8_t>> dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc) override {
        return do_io_check(_error_handler, [&] {
-            return get_file_impl(_file)->dma_read_bulk(offset, range_size, intent);
+            return get_file_impl(_file)->dma_read_bulk(offset, range_size, pc);
        });
    }
 private:
--- a/clocks-impl.cc
+++ b/clocks-impl.cc
@@ -15,6 +15,12 @@

 std::atomic<int64_t> clocks_offset;

+std::ostream& operator<<(std::ostream& os, db_clock::time_point tp) {
+    auto t = db_clock::to_time_t(tp);
+    ::tm t_buf;
+    return os << std::put_time(::gmtime_r(&t, &t_buf), "%Y/%m/%d %T");
+}
+
 std::string format_timestamp(api::timestamp_type ts) {
    auto t = std::time_t(std::chrono::duration_cast<std::chrono::seconds>(api::timestamp_clock::duration(ts)).count());
    ::tm t_buf;
--- a/clustering_interval_set.hh
+++ b/clustering_interval_set.hh
@@ -75,7 +75,8 @@ public:
            const interval::interval_type& iv = *_i;
            return position_range{iv.lower().position(), iv.upper().position()};
        }
-        bool operator==(const position_range_iterator& other) const = default;
+        bool operator==(const position_range_iterator& other) const { return _i == other._i; }
+        bool operator!=(const position_range_iterator& other) const { return _i != other._i; }
        position_range_iterator& operator++() {
            ++_i;
            return *this;
--- a/cmake/Findrapidxml.cmake
+++ b/cmake/Findrapidxml.cmake
@@ -1,27 +0,0 @@
-#
-# Copyright 2023-present ScyllaDB
-#
-
-#
-# SPDX-License-Identifier: AGPL-3.0-or-later
-#
-find_path(rapidxml_INCLUDE_DIR
-  NAMES rapidxml.h rapidxml/rapidxml.hpp)
-
-mark_as_advanced(
-  rapidxml_INCLUDE_DIR)
-
-include(FindPackageHandleStandardArgs)
-
-find_package_handle_standard_args(rapidxml
-  REQUIRED_VARS
-    rapidxml_INCLUDE_DIR)
-
-if(rapidxml_FOUND)
-  if(NOT TARGET rapidxml::rapidxml)
-    add_library(rapidxml::rapidxml INTERFACE IMPORTED)
-    set_target_properties(rapidxml::rapidxml
-      PROPERTIES
-        INTERFACE_INCLUDE_DIRECTORIES ${rapidxml_INCLUDE_DIR})
-  endif()
-endif()
--- a/cmake/add_version_library.cmake
+++ b/cmake/add_version_library.cmake
@@ -1,31 +1,20 @@
 ###
 ### Generate version file and supply appropriate compile definitions for release.cc
 ###
-function(generate_scylla_version)
+function(add_version_library name source)
  set(version_file ${CMAKE_CURRENT_BINARY_DIR}/SCYLLA-VERSION-FILE)
  set(release_file ${CMAKE_CURRENT_BINARY_DIR}/SCYLLA-RELEASE-FILE)
-  set(product_file ${CMAKE_CURRENT_BINARY_DIR}/SCYLLA-PRODUCT-FILE)
  execute_process(
    COMMAND ${CMAKE_SOURCE_DIR}/SCYLLA-VERSION-GEN --output-dir "${CMAKE_CURRENT_BINARY_DIR}"
    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
-
  file(STRINGS ${version_file} scylla_version)
  file(STRINGS ${release_file} scylla_release)
-  file(STRINGS ${product_file} scylla_product)

-  string(REPLACE "-" "~" scylla_version_tilde ${scylla_version})
-
-  set(Scylla_VERSION "${scylla_version_tilde}" CACHE INTERNAL "")
-  set(Scylla_RELEASE "${scylla_release}" CACHE INTERNAL "")
-  set(Scylla_PRODUCT "${scylla_product}" CACHE INTERNAL "")
-endfunction(generate_scylla_version)
-
-function(add_version_library name source)
  add_library(${name} OBJECT ${source})
  target_compile_definitions(${name}
    PRIVATE
-      SCYLLA_VERSION=\"${Scylla_VERSION}\"
-      SCYLLA_RELEASE=\"${Scylla_RELEASE}\")
+      SCYLLA_VERSION=\"${scylla_version}\"
+      SCYLLA_RELEASE=\"${scylla_release}\")
  target_link_libraries(${name}
    PRIVATE
      Seastar::seastar)
--- a/cmake/add_whole_archive.cmake
+++ b/cmake/add_whole_archive.cmake
@@ -5,6 +5,15 @@
 # actually compiling a sample program.
 function(add_whole_archive name library)
  add_library(${name} INTERFACE)
-  target_link_libraries(${name} INTERFACE
-    "$<LINK_LIBRARY:WHOLE_ARCHIVE,${library}>")
+  if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.24)
+    target_link_libraries(${name} INTERFACE
+      "$<LINK_LIBRARY:WHOLE_ARCHIVE,${library}>")
+  else()
+    add_dependencies(${name} ${library})
+    target_include_directories(${name} INTERFACE
+      ${CMAKE_SOURCE_DIR})
+    target_link_options(auth INTERFACE
+      "$<$<CXX_COMPILER_ID:Clang>:SHELL:LINKER:-force_load $<TARGET_LINKER_FILE:${library}>>"
+      "$<$<CXX_COMPILER_ID:GNU>:SHELL:LINKER:--whole-archive $<TARGET_LINKER_FILE:${library}> LINKER:--no-whole-archive>")
+  endif()
 endfunction()
--- a/cmake/build_submodule.cmake
+++ b/cmake/build_submodule.cmake
@@ -1,50 +0,0 @@
-function(build_submodule name dir)
-  cmake_parse_arguments(parsed_args "NOARCH" "" "" ${ARGN})
-  set(version_release "${Scylla_VERSION}-${Scylla_RELEASE}")
-  set(product_version_release
-    "${Scylla_PRODUCT}-${Scylla_VERSION}-${Scylla_RELEASE}")
-  set(working_dir ${CMAKE_CURRENT_SOURCE_DIR}/${dir})
-  if(parsed_args_NOARCH)
-    set(arch "noarch")
-  else()
-    set(arch "${CMAKE_SYSTEM_PROCESSOR}")
-  endif()
-  set(reloc_args ${parsed_args_UNPARSED_ARGUMENTS})
-  set(reloc_pkg "${working_dir}/build/${Scylla_PRODUCT}-${name}-${version_release}.${arch}.tar.gz")
-  add_custom_command(
-    OUTPUT ${reloc_pkg}
-    COMMAND reloc/build_reloc.sh --version ${product_version_release} --nodeps ${reloc_args}
-    WORKING_DIRECTORY "${working_dir}"
-    JOB_POOL submodule_pool)
-  add_custom_target(dist-${name}-tar
-    DEPENDS ${reloc_pkg})
-  add_custom_target(dist-${name}-rpm
-    COMMAND reloc/build_rpm.sh --reloc-pkg ${reloc_pkg}
-    DEPENDS ${reloc_pkg}
-    WORKING_DIRECTORY "${working_dir}")
-  add_custom_target(dist-${name}-deb
-    COMMAND reloc/build_deb.sh --reloc-pkg ${reloc_pkg}
-    DEPENDS ${reloc_pkg}
-    WORKING_DIRECTORY "${working_dir}")
-  add_custom_target(dist-${name}
-    DEPENDS dist-${name}-tar dist-${name}-rpm dist-${name}-deb)
-endfunction()
-
-macro(dist_submodule name dir pkgs)
-  # defined as a macro, so that we can append the path to the dist tarball to
-  # specfied "pkgs"
-  cmake_parse_arguments(parsed_args "NOARCH" "" "" ${ARGN})
-  if(parsed_args_NOARCH)
-    set(arch "noarch")
-  else()
-    set(arch "${CMAKE_SYSTEM_PROCESSOR}")
-  endif()
-  set(pkg_name "${Scylla_PRODUCT}-${name}-${Scylla_VERSION}-${Scylla_RELEASE}.${arch}.tar.gz")
-  set(reloc_pkg "${CMAKE_SOURCE_DIR}/tools/${dir}/build/${pkg_name}")
-  set(dist_pkg "${CMAKE_CURRENT_BINARY_DIR}/${pkg_name}")
-  add_custom_command(
-    OUTPUT ${dist_pkg}
-    COMMAND ${CMAKE_COMMAND} -E copy ${reloc_pkg} ${dist_pkg}
-    DEPENDS dist-${name}-tar)
-  list(APPEND ${pkgs} "${dist_pkg}")
-endmacro()
--- a/cmake/generate_cql_grammar.cmake
+++ b/cmake/generate_cql_grammar.cmake
@@ -1,5 +1,7 @@
-find_program (ANTLR3 antlr3
-  REQUIRED)
+find_program (ANTLR3 antlr3)
+if(NOT ANTLR3)
+  message(FATAL "antlr3 is required")
+endif()

 # Parse antlr3 grammar files and generate C++ sources
 function(generate_cql_grammar)
--- a/cmake/mode.COVERAGE.cmake
+++ b/cmake/mode.COVERAGE.cmake
@@ -1,23 +0,0 @@
-set(Seastar_OptimizationLevel_COVERAGE "g")
-set(CMAKE_CXX_FLAGS_COVERAGE
-  ""
-  CACHE
-  INTERNAL
-  "")
-string(APPEND CMAKE_CXX_FLAGS_COVERAGE
-  " -O${Seastar_OptimizationLevel_SANITIZE}")
-
-set(Seastar_DEFINITIONS_COVERAGE
-  SCYLLA_BUILD_MODE=debug
-  DEBUG
-  SANITIZE
-  DEBUG_LSA_SANITIZER
-  SCYLLA_ENABLE_ERROR_INJECTION)
-
-set(CMAKE_CXX_FLAGS_COVERAGE
-  " -O${Seastar_OptimizationLevel_COVERAGE} -fprofile-instr-generate -fcoverage-mapping -g -gz")
-
-set(CMAKE_STATIC_LINKER_FLAGS_COVERAGE
-  "-fprofile-instr-generate -fcoverage-mapping")
-
-set(stack_usage_threshold_in_KB 40)
--- a/cmake/mode.RELEASE.cmake
+++ b/cmake/mode.RELEASE.cmake
@@ -12,15 +12,16 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64")
 else()
  set(clang_inline_threshold 2500)
 endif()
-add_compile_options(
-  "$<$<CXX_COMPILER_ID:GNU>:--param;inline-unit-growth=300>"
-  "$<$<CXX_COMPILER_ID:Clang>:-mllvm;-inline-threshold=${clang_inline_threshold}>"
+string(APPEND CMAKE_CXX_FLAGS_RELEASE
+  " $<$<CXX_COMPILER_ID:GNU>:--param inline-unit-growth=300"
+  " $<$<CXX_COMPILER_ID:Clang>:-mllvm -inline-threshold=${clang_inline_threshold}>"
  # clang generates 16-byte loads that break store-to-load forwarding
  # gcc also has some trouble: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103554
-  "-fno-slp-vectorize")
-set(Seastar_DEFINITIONS_RELEASE
+  " -fno-slp-vectorize")
+set(Seastar_DEFINITIONS_DEBUG
  SCYLLA_BUILD_MODE=release)

-add_link_options("LINKER:--gc-sections")
+set(CMAKE_STATIC_LINKER_FLAGS_RELEASE
+  "-Wl,--gc-sections")

 set(stack_usage_threshold_in_KB 13)
--- a/cmake/mode.SANITIZE.cmake
+++ b/cmake/mode.SANITIZE.cmake
@@ -1,17 +0,0 @@
-set(Seastar_OptimizationLevel_SANITIZE "s")
-set(CMAKE_CXX_FLAGS_SANITIZE
-  ""
-  CACHE
-  INTERNAL
-  "")
-string(APPEND CMAKE_CXX_FLAGS_SANITIZE
-  " -O${Seastar_OptimizationLevel_SANITIZE}")
-
-set(Seastar_DEFINITIONS_SANITIZE
-  SCYLLA_BUILD_MODE=sanitize
-  DEBUG
-  SANITIZE
-  DEBUG_LSA_SANITIZER
-  SCYLLA_ENABLE_ERROR_INJECTION)
-
-set(stack_usage_threshold_in_KB 50)
--- a/cmake/mode.common.cmake
+++ b/cmake/mode.common.cmake
@@ -1,7 +1,9 @@
 set(disabled_warnings
  c++11-narrowing
  mismatched-tags
+  missing-braces
  overloaded-virtual
+  parentheses-equality
  unsupported-friend)
 include(CheckCXXCompilerFlag)
 foreach(warning ${disabled_warnings})
@@ -11,117 +13,27 @@ foreach(warning ${disabled_warnings})
  endif()
 endforeach()
 list(TRANSFORM _supported_warnings PREPEND "-Wno-")
-add_compile_options(
-  "-Wall"
-  "-Werror"
-  "-Wno-error=deprecated-declarations"
-  "-Wimplicit-fallthrough"
-  ${_supported_warnings})
+string(JOIN " " CMAKE_CXX_FLAGS "-Wall" "-Werror" ${_supported_warnings})

 function(default_target_arch arch)
  set(x86_instruction_sets i386 i686 x86_64)
  if(CMAKE_SYSTEM_PROCESSOR IN_LIST x86_instruction_sets)
    set(${arch} "westmere" PARENT_SCOPE)
-  elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
-    # we always use intrinsics like vmull.p64 for speeding up crc32 calculations
-    # on the aarch64 architectures, and they require the crypto extension, so
-    # we have to add "+crypto" in the architecture flags passed to -march. the
-    # same applies to crc32 instructions, which need the ARMv8-A CRC32 extension
-    # please note, Seastar also sets -march when compiled with DPDK enabled.
+  elseif(CMAKE_SYSTEM_PROCESSOR EQUAL "aarch64")
    set(${arch} "armv8-a+crc+crypto" PARENT_SCOPE)
  else()
    set(${arch} "" PARENT_SCOPE)
  endif()
 endfunction()

-function(pad_at_begin output fill str length)
-  # pad the given `${str} with `${fill}`, right aligned. with the syntax of
-  # fmtlib:
-  #   fmt::print("{:#>{}}", str, length)
-  # where `#` is the `${fill}` char
-  string(LENGTH "${str}" str_len)
-  math(EXPR padding_len "${length} - ${str_len}")
-  if(padding_len GREATER 0)
-    string(REPEAT ${fill} ${padding_len} padding)
-  endif()
-  set(${output} "${padding}${str}" PARENT_SCOPE)
-endfunction()
-
-# The relocatable package includes its own dynamic linker. We don't
-# know the path it will be installed to, so for now use a very long
-# path so that patchelf doesn't need to edit the program headers.  The
-# kernel imposes a limit of 4096 bytes including the null. The other
-# constraint is that the build-id has to be in the first page, so we
-# can't use all 4096 bytes for the dynamic linker.
-# In here we just guess that 2000 extra / should be enough to cover
-# any path we get installed to but not so large that the build-id is
-# pushed to the second page.
-# At the end of the build we check that the build-id is indeed in the
-# first page. At install time we check that patchelf doesn't modify
-# the program headers.
-function(get_padded_dynamic_linker_option output length)
-  set(dynamic_linker_option "-dynamic-linker")
-  # capture the drive-generated command line first
-  execute_process(
-    COMMAND ${CMAKE_C_COMPILER} "-###" /dev/null -o t
-    ERROR_VARIABLE driver_command_line
-    ERROR_STRIP_TRAILING_WHITESPACE)
-  # extract the argument for the "-dynamic-linker" option
-  if(driver_command_line MATCHES ".*\"?${dynamic_linker_option}\"? \"?([^ \"]*)\"? .*")
-    set(dynamic_linker ${CMAKE_MATCH_1})
-  else()
-    message(FATAL_ERROR "Unable to find ${dynamic_linker_option} in driver-generated command: "
-      "${driver_command_line}")
-  endif()
-  # prefixing a path with "/"s does not actually change it means
-  pad_at_begin(padded_dynamic_linker "/" "${dynamic_linker}" ${length})
-  set(${output} "${dynamic_linker_option}=${padded_dynamic_linker}" PARENT_SCOPE)
-endfunction()
-
-add_compile_options("-ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
-
 default_target_arch(target_arch)
 if(target_arch)
-  add_compile_options("-march=${target_arch}")
+    string(APPEND CMAKE_CXX_FLAGS " -march=${target_arch}")
 endif()

 math(EXPR _stack_usage_threshold_in_bytes "${stack_usage_threshold_in_KB} * 1024")
 set(_stack_usage_threshold_flag "-Wstack-usage=${_stack_usage_threshold_in_bytes}")
 check_cxx_compiler_flag(${_stack_usage_threshold_flag} _stack_usage_flag_supported)
 if(_stack_usage_flag_supported)
-  add_compile_options("${_stack_usage_threshold_flag}")
+  string(APPEND CMAKE_CXX_FLAGS " ${_stack_usage_threshold_flag}")
 endif()
-
-# Force SHA1 build-id generation
-add_link_options("LINKER:--build-id=sha1")
-include(CheckLinkerFlag)
-set(Scylla_USE_LINKER
-    ""
-    CACHE
-    STRING
-    "Use specified linker instead of the default one")
-if(Scylla_USE_LINKER)
-    set(linkers "${Scylla_USE_LINKER}")
-else()
-    set(linkers "lld" "gold")
-endif()
-
-foreach(linker ${linkers})
-    set(linker_flag "-fuse-ld=${linker}")
-    check_linker_flag(CXX ${linker_flag} "CXX_LINKER_HAVE_${linker}")
-    if(CXX_LINKER_HAVE_${linker})
-        add_link_options("${linker_flag}")
-        break()
-    elseif(Scylla_USE_LINKER)
-        message(FATAL_ERROR "${Scylla_USE_LINKER} is not supported.")
-    endif()
-endforeach()
-
-if(DEFINED ENV{NIX_CC})
-  get_padded_dynamic_linker_option(dynamic_linker_option 0)
-else()
-  # gdb has a SO_NAME_MAX_PATH_SIZE of 512, so limit the path size to
-  # that. The 512 includes the null at the end, hence the 511 bellow.
-  get_padded_dynamic_linker_option(dynamic_linker_option 511)
-endif()
-add_link_options("${dynamic_linker_option}")
--- a/compaction/compaction.cc
+++ b/compaction/compaction.cc
--- a/compaction/compaction.hh
+++ b/compaction/compaction.hh
@@ -13,8 +13,8 @@
 #include "compaction/compaction_descriptor.hh"
 #include "gc_clock.hh"
 #include "compaction_weight_registration.hh"
+#include "service/priority_manager.hh"
 #include "utils/UUID.hh"
-#include "utils/pretty_printers.hh"
 #include "table_state.hh"
 #include <seastar/core/thread.hh>
 #include <seastar/core/abort_source.hh>
@@ -25,6 +25,21 @@ namespace sstables {

 bool is_eligible_for_compaction(const sstables::shared_sstable& sst) noexcept;

+class pretty_printed_data_size {
+    uint64_t _size;
+public:
+    pretty_printed_data_size(uint64_t size) : _size(size) {}
+    friend std::ostream& operator<<(std::ostream&, pretty_printed_data_size);
+};
+
+class pretty_printed_throughput {
+    uint64_t _size;
+    std::chrono::duration<float> _duration;
+public:
+    pretty_printed_throughput(uint64_t size, std::chrono::duration<float> dur) : _size(size), _duration(std::move(dur)) {}
+    friend std::ostream& operator<<(std::ostream&, pretty_printed_throughput);
+};
+
 // Return the name of the compaction type
 // as used over the REST api, e.g. "COMPACTION" or "CLEANUP".
 sstring compaction_name(compaction_type type);
@@ -48,7 +63,6 @@ struct compaction_info {
 };

 struct compaction_data {
-    uint64_t compaction_size = 0;
    uint64_t total_partitions = 0;
    uint64_t total_keys_written = 0;
    sstring stop_requested;
@@ -78,15 +92,12 @@ struct compaction_stats {
    uint64_t start_size = 0;
    uint64_t end_size = 0;
    uint64_t validation_errors = 0;
-    // Bloom filter checks during max purgeable calculation
-    uint64_t bloom_filter_checks = 0;

    compaction_stats& operator+=(const compaction_stats& r) {
        ended_at = std::max(ended_at, r.ended_at);
        start_size += r.start_size;
        end_size += r.end_size;
        validation_errors += r.validation_errors;
-        bloom_filter_checks += r.bloom_filter_checks;
        return *this;
    }
    friend compaction_stats operator+(const compaction_stats& l, const compaction_stats& r) {
@@ -101,27 +112,12 @@ struct compaction_result {
    compaction_stats stats;
 };

-class read_monitor_generator;
-
-class compaction_progress_monitor {
-    std::unique_ptr<read_monitor_generator> _generator = nullptr;
-    uint64_t _progress = 0;
-public:
-    void set_generator(std::unique_ptr<read_monitor_generator> generator);
-    void reset_generator();
-    // Returns number of bytes processed with _generator.
-    uint64_t get_progress() const;
-
-    friend class compaction;
-    friend future<compaction_result> scrub_sstables_validate_mode(sstables::compaction_descriptor, compaction_data&, table_state&, compaction_progress_monitor&);
-};
-
 // Compact a list of N sstables into M sstables.
 // Returns info about the finished compaction, which includes vector to new sstables.
 //
 // compaction_descriptor is responsible for specifying the type of compaction, and influencing
 // compaction behavior through its available member fields.
-future<compaction_result> compact_sstables(sstables::compaction_descriptor descriptor, compaction_data& cdata, table_state& table_s, compaction_progress_monitor& progress_monitor);
+future<compaction_result> compact_sstables(sstables::compaction_descriptor descriptor, compaction_data& cdata, table_state& table_s);

 // Return list of expired sstables for column family cf.
 // A sstable is fully expired *iff* its max_local_deletion_time precedes gc_before and its
@@ -134,4 +130,7 @@ get_fully_expired_sstables(const table_state& table_s, const std::vector<sstable
 // For tests, can drop after we virtualize sstables.
 flat_mutation_reader_v2 make_scrubbing_reader(flat_mutation_reader_v2 rd, compaction_type_options::scrub::mode scrub_mode, uint64_t& validation_errors);

+// For tests, can drop after we virtualize sstables.
+future<uint64_t> scrub_validate_mode_validate_reader(flat_mutation_reader_v2 rd, const compaction_data& info);
+
 }
--- a/compaction/compaction_backlog_manager.hh
+++ b/compaction/compaction_backlog_manager.hh
@@ -12,6 +12,7 @@
 #include <memory>
 #include <seastar/core/shared_ptr.hh>
 #include "sstables/shared_sstable.hh"
+#include "sstables/progress_monitor.hh"
 #include "timestamp.hh"

 class compaction_backlog_manager;
@@ -59,20 +60,18 @@ public:
    using ongoing_compactions = std::unordered_map<sstables::shared_sstable, backlog_read_progress_manager*>;

    struct impl {
-        // FIXME: Should provide strong exception safety guarantees
-        virtual void replace_sstables(const std::vector<sstables::shared_sstable>& old_ssts, const std::vector<sstables::shared_sstable>& new_ssts) = 0;
+        virtual void replace_sstables(std::vector<sstables::shared_sstable> old_ssts, std::vector<sstables::shared_sstable> new_ssts) = 0;
        virtual double backlog(const ongoing_writes& ow, const ongoing_compactions& oc) const = 0;
        virtual ~impl() { }
    };

    compaction_backlog_tracker(std::unique_ptr<impl> impl) : _impl(std::move(impl)) {}
    compaction_backlog_tracker(compaction_backlog_tracker&&);
-    compaction_backlog_tracker& operator=(compaction_backlog_tracker&&) = delete;
+    compaction_backlog_tracker& operator=(compaction_backlog_tracker&&) noexcept;
    compaction_backlog_tracker(const compaction_backlog_tracker&) = delete;
    ~compaction_backlog_tracker();

    double backlog() const;
-    // FIXME: Should provide strong exception safety guarantees
    void replace_sstables(const std::vector<sstables::shared_sstable>& old_ssts, const std::vector<sstables::shared_sstable>& new_ssts);
    void register_partially_written_sstable(sstables::shared_sstable sst, backlog_write_progress_manager& wp);
    void register_compacting_sstable(sstables::shared_sstable sst, backlog_read_progress_manager& rp);
--- a/compaction/compaction_descriptor.hh
+++ b/compaction/compaction_descriptor.hh
@@ -18,6 +18,7 @@
 #include "sstables/sstable_set.hh"
 #include "utils/UUID.hh"
 #include "dht/i_partitioner.hh"
+#include "compaction_weight_registration.hh"
 #include "compaction_fwd.hh"

 namespace sstables {
@@ -72,12 +73,6 @@ public:
            only, // scrub only quarantined sstables
        };
        quarantine_mode quarantine_operation_mode = quarantine_mode::include;
-
-        using quarantine_invalid_sstables = bool_class<class quarantine_invalid_sstables_tag>;
-
-        // Should invalid sstables be moved into quarantine.
-        // Only applies to validate-mode.
-        quarantine_invalid_sstables quarantine_sstables = quarantine_invalid_sstables::yes;
    };
    struct reshard {
    };
@@ -114,8 +109,8 @@ public:
        return compaction_type_options(upgrade{});
    }

-    static compaction_type_options make_scrub(scrub::mode mode, scrub::quarantine_invalid_sstables quarantine_sstables = scrub::quarantine_invalid_sstables::yes) {
-        return compaction_type_options(scrub{.operation_mode = mode, .quarantine_sstables = quarantine_sstables});
+    static compaction_type_options make_scrub(scrub::mode mode) {
+        return compaction_type_options(scrub{mode});
    }

    template <typename... Visitor>
@@ -123,11 +118,6 @@ public:
        return std::visit(std::forward<Visitor>(visitor)..., _options);
    }

-    template <typename OptionType>
-    const auto& as() const {
-        return std::get<OptionType>(_options);
-    }
-
    const options_variant& options() const { return _options; }

    compaction_type type() const;
@@ -161,12 +151,12 @@ struct compaction_descriptor {
    compaction_type_options options = compaction_type_options::make_regular();
    // If engaged, compaction will cleanup the input sstables by skipping non-owned ranges.
    compaction::owned_ranges_ptr owned_ranges;
-    // Required for reshard compaction.
-    const dht::sharder* sharder;

    compaction_sstable_creator_fn creator;
    compaction_sstable_replacer_fn replacer;

+    ::io_priority_class io_priority = default_priority_class();
+
    // Denotes if this compaction task is comprised solely of completely expired SSTables
    sstables::has_only_fully_expired has_only_fully_expired = has_only_fully_expired::no;

@@ -176,6 +166,7 @@ struct compaction_descriptor {
    static constexpr uint64_t default_max_sstable_bytes = std::numeric_limits<uint64_t>::max();

    explicit compaction_descriptor(std::vector<sstables::shared_sstable> sstables,
+                                   ::io_priority_class io_priority,
                                   int level = default_level,
                                   uint64_t max_sstable_bytes = default_max_sstable_bytes,
                                   run_id run_identifier = run_id::create_random_id(),
@@ -187,15 +178,18 @@ struct compaction_descriptor {
        , run_identifier(run_identifier)
        , options(options)
        , owned_ranges(std::move(owned_ranges_))
+        , io_priority(io_priority)
    {}

    explicit compaction_descriptor(sstables::has_only_fully_expired has_only_fully_expired,
-                                   std::vector<sstables::shared_sstable> sstables)
+                                   std::vector<sstables::shared_sstable> sstables,
+                                   ::io_priority_class io_priority)
        : sstables(std::move(sstables))
        , level(default_level)
        , max_sstable_bytes(default_max_sstable_bytes)
        , run_identifier(run_id::create_random_id())
        , options(compaction_type_options::make_regular())
+        , io_priority(io_priority)
        , has_only_fully_expired(has_only_fully_expired)
    {}

--- a/compaction/compaction_manager.cc
+++ b/compaction/compaction_manager.cc
--- a/Show More
+++ b/Show More