release: prepare for 5.0.4

Merge 'Fix mutation commutativity with shadowable tombstone'
from Tomasz Grabiec This series fixes lack of mutation associativity which manifests as sporadic failures in row_cache_test.cc::test_concurrent_reads_and_eviction due to differences in mutations applied and read. No known production impact. Refs https://github.com/scylladb/scylladb/issues/11307 Closes #11312 * github.com:scylladb/scylladb: test: mutation_test: Add explicit test for mutation commutativity test: random_mutation_generator: Workaround for non-associativity of mutations with shadowable tombstones db: mutation_partition: Drop unnecessary maybe_shadow() db: mutation_partition: Maintain shadowable tombstone invariant when applying a hard tombstone mutation_partition: row: make row marker shadowing symmetric (cherry picked from commit 484004e766)
2022-09-21 09:16:13 +03:00 · 2022-09-20 23:21:06 +02:00 · 2022-09-20 23:20:43 +02:00 · 2022-09-20 13:42:10 +03:00 · 2022-09-19 10:31:58 +03:00 · 2022-09-19 06:54:25 +03:00
2447 changed files with 49536 additions and 152751 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1,2 @@
 *.cc diff=cpp
 *.hh diff=cpp
-*.svg binary
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -2,14 +2,14 @@
 auth/* @elcallio @vladzcloudius

 # CACHE
-row_cache* @tgrabiec
-*mutation* @tgrabiec
-test/boost/mvcc* @tgrabiec
+row_cache* @tgrabiec @haaawk
+*mutation* @tgrabiec @haaawk
+test/boost/mvcc* @tgrabiec @haaawk

 # CDC
-cdc/* @kbr- @elcallio @piodul @jul-stas
-test/cql/cdc_* @kbr- @elcallio @piodul @jul-stas
-test/boost/cdc_* @kbr- @elcallio @piodul @jul-stas
+cdc/* @haaawk @kbr- @elcallio @piodul @jul-stas
+test/cql/cdc_* @haaawk @kbr- @elcallio @piodul @jul-stas
+test/boost/cdc_* @haaawk @kbr- @elcallio @piodul @jul-stas

 # COMMITLOG / BATCHLOG
 db/commitlog/* @elcallio
@@ -28,12 +28,8 @@ transport/*
 cql3/* @tgrabiec @psarna @cvybhu

 # COUNTERS
-counters* @jul-stas
-tests/counter_test* @jul-stas
-
-# DOCS
-docs/* @annastuchlik @tzach
-docs/alternator @annastuchlik @tzach @nyh @psarna
+counters* @haaawk @jul-stas
+tests/counter_test* @haaawk @jul-stas

 # GOSSIP
 gms/* @tgrabiec @asias
@@ -78,7 +74,7 @@ alternator/* @nyh @psarna
 test/alternator/* @nyh @psarna

 # HINTED HANDOFF
-db/hints/* @piodul @vladzcloudius
+db/hints/* @haaawk @piodul @vladzcloudius

 # REDIS
 redis/* @nyh @syuu1228
--- a/.github/workflows/docs-pages.yaml
+++ b/.github/workflows/docs-pages.yaml
@@ -1,35 +0,0 @@
-name: "Docs / Publish"
-# For more information,
-# see https://sphinx-theme.scylladb.com/stable/deployment/production.html#available-workflows
-
-on:
-  push:
-    branches:
-      - master
-    paths:
-      - "docs/**"
-  workflow_dispatch:
-
-jobs:
-  release:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          persist-credentials: false
-          fetch-depth: 0
-      - name: Set up Python
-        uses: actions/setup-python@v3
-        with:
-          python-version: 3.7
-      - name: Set up env
-        run: make -C docs setupenv
-      - name: Build docs
-        run: make -C docs multiversion
-      - name: Build redirects
-        run: make -C docs redirects
-      - name: Deploy docs to GitHub Pages
-        run: ./docs/_utils/deploy.sh
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/docs-pages@v2.yaml
+++ b/.github/workflows/docs-pages@v2.yaml
@@ -0,0 +1,29 @@
+name: "Docs / Publish"
+
+on:
+  push:
+    branches:
+    - master
+    paths:
+    - "docs/**"
+  workflow_dispatch:
+
+jobs:
+  release:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v2
+      with:
+        persist-credentials: false
+        fetch-depth: 0
+    - name: Set up Python
+      uses: actions/setup-python@v1
+      with:
+        python-version: 3.7
+    - name: Build docs
+      run: make -C docs multiversion
+    - name: Deploy
+      run: ./docs/_utils/deploy.sh
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/docs-pr.yaml
+++ b/.github/workflows/docs-pr.yaml
@@ -1,28 +0,0 @@
-name: "Docs / Build PR"
-# For more information,
-# see https://sphinx-theme.scylladb.com/stable/deployment/production.html#available-workflows
-
-on:
-  pull_request:
-    branches:
-      - master
-    paths:
-      - "docs/**"
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          persist-credentials: false
-          fetch-depth: 0
-      - name: Set up Python
-        uses: actions/setup-python@v3
-        with:
-          python-version: 3.7
-      - name: Set up env
-        run: make -C docs setupenv
-      - name: Build docs
-        run: make -C docs test
--- a/.github/workflows/docs-pr@v1.yaml
+++ b/.github/workflows/docs-pr@v1.yaml
@@ -0,0 +1,25 @@
+name: "Docs / Build PR"
+
+on:
+  pull_request:
+    branches:
+    - master
+    paths:
+    - "docs/**"
+
+jobs:
+  build:
+    name: Build
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v2
+      with:
+        persist-credentials: false
+        fetch-depth: 0
+    - name: Set up Python
+      uses: actions/setup-python@v1
+      with:
+        python-version: 3.7
+    - name: Build docs
+      run: make -C docs test
--- a/.gitignore
+++ b/.gitignore
@@ -22,7 +22,6 @@ resources
 .pytest_cache
 /expressions.tokens
 tags
-!db/tags/
 testlog
 test/*/*.reject
 .vscode
@@ -30,6 +29,3 @@ docs/_build
 docs/poetry.lock
 compile_commands.json
 .ccls-cache/
-.mypy_cache
-.envrc
-rust/Cargo.lock
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "seastar"]
 	path = seastar
-	url = ../seastar
+	url = ../scylla-seastar
 	ignore = dirty
 [submodule "swagger-ui"]
 	path = swagger-ui
--- a/.mailmap
+++ b/.mailmap
@@ -1,3 +0,0 @@
-Avi Kivity <avi@scylladb.com> Avi Kivity' via ScyllaDB development <scylladb-dev@googlegroups.com>
-Raphael S. Carvalho <raphaelsc@scylladb.com> Raphael S. Carvalho' via ScyllaDB development <scylladb-dev@googlegroups.com>
-Pavel Emelyanov <xemul@scylladb.com> Pavel Emelyanov' via ScyllaDB development <scylladb-dev@googlegroups.com>
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -189,8 +189,6 @@ set(swagger_files
    api/api-doc/storage_service.json
    api/api-doc/stream_manager.json
    api/api-doc/system.json
-    api/api-doc/task_manager.json
-    api/api-doc/task_manager_test.json
    api/api-doc/utils.json)

 set(swagger_gen_files)
@@ -303,8 +301,6 @@ set(scylla_sources
    api/storage_service.cc
    api/stream_manager.cc
    api/system.cc
-    api/task_manager.cc
-    api/task_manager_test.cc
    atomic_cell.cc
    auth/allow_all_authenticator.cc
    auth/allow_all_authorizer.cc
@@ -341,6 +337,7 @@ set(scylla_sources
    compaction/size_tiered_compaction_strategy.cc
    compaction/time_window_compaction_strategy.cc
    compress.cc
+    connection_notifier.cc
    converting_mutation_partition_applier.cc
    counters.cc
    cql3/abstract_marker.cc
@@ -353,7 +350,6 @@ set(scylla_sources
    cql3/cql3_type.cc
    cql3/expr/expression.cc
    cql3/expr/prepare_expr.cc
-    cql3/expr/restrictions.cc
    cql3/functions/aggregate_fcts.cc
    cql3/functions/castas_fcts.cc
    cql3/functions/error_injection_fcts.cc
@@ -367,6 +363,7 @@ set(scylla_sources
    cql3/prepare_context.cc
    cql3/query_options.cc
    cql3/query_processor.cc
+    cql3/relation.cc
    cql3/restrictions/statement_restrictions.cc
    cql3/result_set.cc
    cql3/role_name.cc
@@ -377,6 +374,7 @@ set(scylla_sources
    cql3/selection/selector_factories.cc
    cql3/selection/simple_selector.cc
    cql3/sets.cc
+    cql3/single_column_relation.cc
    cql3/statements/alter_keyspace_statement.cc
    cql3/statements/alter_service_level_statement.cc
    cql3/statements/alter_table_statement.cc
@@ -428,9 +426,8 @@ set(scylla_sources
    cql3/statements/sl_prop_defs.cc
    cql3/statements/truncate_statement.cc
    cql3/statements/update_statement.cc
-    cql3/statements/strongly_consistent_modification_statement.cc
-    cql3/statements/strongly_consistent_select_statement.cc
    cql3/statements/use_statement.cc
+    cql3/token_relation.cc
    cql3/type_json.cc
    cql3/untyped_result_set.cc
    cql3/update_parameters.cc
@@ -456,7 +453,6 @@ set(scylla_sources
    db/large_data_handler.cc
    db/legacy_schema_migrator.cc
    db/marshal/type_parser.cc
-    db/rate_limiter.cc
    db/schema_tables.cc
    db/size_estimates_virtual_reader.cc
    db/snapshot-ctl.cc
@@ -472,10 +468,10 @@ set(scylla_sources
    dht/murmur3_partitioner.cc
    dht/range_streamer.cc
    dht/token.cc
-    replica/distributed_loader.cc
+    distributed_loader.cc
    duration.cc
    exceptions/exceptions.cc
-    readers/mutation_readers.cc
+    flat_mutation_reader.cc
    frozen_mutation.cc
    frozen_schema.cc
    generic_server.cc
@@ -495,7 +491,7 @@ set(scylla_sources
    index/secondary_index_manager.cc
    init.cc
    keys.cc
-    utils/lister.cc
+    lister.cc
    locator/abstract_replication_strategy.cc
    locator/azure_snitch.cc
    locator/ec2_multi_region_snitch.cc
@@ -513,7 +509,7 @@ set(scylla_sources
    locator/token_metadata.cc
    lang/lua.cc
    main.cc
-    replica/memtable.cc
+    memtable.cc
    message/messaging_service.cc
    multishard_mutation_query.cc
    mutation.cc
@@ -522,7 +518,7 @@ set(scylla_sources
    mutation_partition_serializer.cc
    mutation_partition_view.cc
    mutation_query.cc
-    readers/mutation_reader.cc
+    mutation_reader.cc
    mutation_writer/feed_writers.cc
    mutation_writer/multishard_writer.cc
    mutation_writer/partition_based_splitting_writer.cc
@@ -532,14 +528,12 @@ set(scylla_sources
    partition_version.cc
    querier.cc
    query.cc
-    query_ranges_to_vnodes.cc
    query-result-set.cc
    raft/fsm.cc
    raft/log.cc
    raft/raft.cc
    raft/server.cc
    raft/tracker.cc
-    service/broadcast_tables/experimental/lang.cc
    range_tombstone.cc
    range_tombstone_list.cc
    tombstone_gc_options.cc
@@ -568,7 +562,6 @@ set(scylla_sources
    schema_registry.cc
    serializer.cc
    service/client_state.cc
-    service/forward_service.cc
    service/migration_manager.cc
    service/misc_services.cc
    service/pager/paging_state.cc
@@ -581,6 +574,7 @@ set(scylla_sources
    service/qos/qos_common.cc
    service/qos/service_level_controller.cc
    service/qos/standard_service_level_distributed_data_accessor.cc
+    service/raft/raft_gossip_failure_detector.cc
    service/raft/raft_group_registry.cc
    service/raft/raft_rpc.cc
    service/raft/raft_sys_table_storage.cc
@@ -619,7 +613,6 @@ set(scylla_sources
    streaming/stream_task.cc
    streaming/stream_transfer_task.cc
    table_helper.cc
-    tasks/task_manager.cc
    thrift/controller.cc
    thrift/handler.cc
    thrift/server.cc
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -18,5 +18,3 @@ If you need help formatting or sending patches, [check out these instructions](h
 The Scylla C++ source code uses the [Seastar coding style](https://github.com/scylladb/seastar/blob/master/coding-style.md) so please adhere to that in your patches. Note that Scylla code is written with `using namespace seastar`, so should not explicitly add the `seastar::` prefix to Seastar symbols. You will usually not need to add `using namespace seastar` to new source files, because most Scylla header files have `#include "seastarx.hh"`, which does this.

 Header files in Scylla must be self-contained, i.e., each can be included without having to include specific other headers first. To verify that your change did not break this property, run `ninja dev-headers`. If you added or removed header files, you must `touch configure.py` first - this will cause `configure.py` to be automatically re-run to generate a fresh list of header files.
-
-For more criteria on what reviewers consider good code, see the [review checklist](https://github.com/scylladb/scylla/blob/master/docs/dev/review-checklist.md).
--- a/HACKING.md
+++ b/HACKING.md
@@ -383,40 +383,6 @@ Open the link printed at the end. Be horrified. Go and write more tests.

 For more details see `./scripts/coverage.py --help`.

-### Resolving stack backtraces
-
-Scylla may print stack backtraces to the log for several reasons.
-For example:
- When aborting (e.g. due to assertion failure, internal error, or segfault)
- When detecting seastar reactor stalls (where a seastar task runs for a long time without yielding the cpu to other tasks on that shard)
-
-The backtraces contain code pointers so they are not very helpful without resolving into code locations.
-To resolve the backtraces, one needs the scylla relocatable package that contains the scylla binary (with debug information),
-as well as the dynamic libraries it is linked against.
-
-Builds from our automated build system are uploaded to the cloud
-and can be searched on http://backtrace.scylladb.com/
-
-Make sure you have the scylla server exact `build-id` to locate
-its respective relocatable package, required for decoding backtraces it prints.
-
-The build-id is printed to the system log when scylla starts.
-It can also be found by executing `scylla --build-id`, or
-by using the `file` utility, for example:
-```
-$ scylla --build-id
-4cba12e6eb290a406bfa4930918db23941fd4be3
-
-$ file scylla
-scylla: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////lib64/ld-linux-x86-64.so.2, for GNU/Linux 3.2.0, BuildID[sha1]=4cba12e6eb290a406bfa4930918db23941fd4be3, with debug_info, not stripped, too many notes (256)
-```
-
-To find the build-id of a coredump, use the `eu-unstrip` utility as follows:
-```
-$ eu-unstrip -n --core <coredump> | awk '/scylla$/ { s=$2; sub(/@.*$/, "", s); print s; exit(0); }'
-4cba12e6eb290a406bfa4930918db23941fd4be3
-```
-
 ### Core dump debugging

-See [debugging.md](docs/dev/debugging.md).
+See [debugging.md](debugging.md).
--- a/README.md
+++ b/README.md
@@ -42,7 +42,7 @@ For further information, please see:
 * [Docker image build documentation] for information on how to build Docker images.

 [developer documentation]: HACKING.md
-[build documentation]: docs/dev/building.md
+[build documentation]: docs/guides/building.md
 [docker image build documentation]: dist/docker/debian/README.md

 ## Running Scylla
@@ -65,7 +65,7 @@ $ ./tools/toolchain/dbuild ./build/release/scylla --help

 ## Testing

-See [test.py manual](docs/dev/testing.md).
+See [test.py manual](docs/guides/testing.md).

 ## Scylla APIs and compatibility
 By default, Scylla is compatible with Apache Cassandra and its APIs - CQL and
@@ -78,7 +78,7 @@ and the current compatibility of this feature as well as Scylla-specific extensi

 ## Documentation

-Documentation can be found [here](docs/dev/README.md).
+Documentation can be found [here](https://scylla.docs.scylladb.com).
 Seastar documentation can be found [here](http://docs.seastar.io/master/index.html).
 User documentation can be found [here](https://docs.scylladb.com/).

--- a/37
+++ b/37
@@ -1,12 +1,11 @@
 #!/bin/sh

 USAGE=$(cat <<-END
-Usage: $(basename "$0") [-h|--help] [-o|--output-dir PATH] [--date-stamp DATE] -- generate Scylla version and build information files.
+Usage: $(basename "$0") [-h|--help] [-o|--output-dir PATH] -- generate Scylla version and build information files.

 Options:
  -h|--help show this help message.
  -o|--output-dir PATH specify destination path at which the version files are to be created.
-  -d|--date-stamp DATE manually set date for release parameter

 By default, the script will attempt to parse 'version' file
 in the current directory, which should contain a string of
@@ -32,8 +31,6 @@ using '-o PATH' option.
 END
 )

-DATE=""
-
 while [[ $# -gt 0 ]]; do
 	opt="$1"
 	case $opt in
@@ -46,11 +43,6 @@ while [[ $# -gt 0 ]]; do
 			shift
 			shift
 			;;
-		--date-stamp)
-			DATE="$2"
-			shift
-			shift
-			;;
 		*)
 			echo "Unexpected argument found: $1"
 			echo
@@ -66,33 +58,24 @@ if [ -z "$OUTPUT_DIR" ]; then
 	OUTPUT_DIR="$SCRIPT_DIR/build"
 fi

-if [ -z "$DATE" ]; then
-  DATE=$(date --utc +%Y%m%d)
-fi
-
 # Default scylla product/version tags
 PRODUCT=scylla
-VERSION=5.2.0-dev
+VERSION=5.0.4

 if test -f version
 then
 	SCYLLA_VERSION=$(cat version | awk -F'-' '{print $1}')
 	SCYLLA_RELEASE=$(cat version | awk -F'-' '{print $2}')
 else
+	DATE=$(date --utc +%Y%m%d)
+	GIT_COMMIT=$(git -C "$SCRIPT_DIR" log --pretty=format:'%h' -n 1)
 	SCYLLA_VERSION=$VERSION
-	if [ -z "$SCYLLA_RELEASE" ]; then
-		DATE=$(date --utc +%Y%m%d)
-		GIT_COMMIT=$(git -C "$SCRIPT_DIR" log --pretty=format:'%h' -n 1 --abbrev=12)
-		# For custom package builds, replace "0" with "counter.your_name",
-		# where counter starts at 1 and increments for successive versions.
-		# This ensures that the package manager will select your custom
-		# package over the standard release.
-		SCYLLA_BUILD=0
-		SCYLLA_RELEASE=$SCYLLA_BUILD.$DATE.$GIT_COMMIT
-	elif [ -f "$OUTPUT_DIR/SCYLLA-RELEASE-FILE" ]; then
-		echo "setting SCYLLA_RELEASE only makes sense in clean builds" 1>&2
-		exit 1
-	fi
+	# For custom package builds, replace "0" with "counter.your_name",
+	# where counter starts at 1 and increments for successive versions.
+	# This ensures that the package manager will select your custom
+	# package over the standard release.
+	SCYLLA_BUILD=0
+	SCYLLA_RELEASE=$SCYLLA_BUILD.$DATE.$GIT_COMMIT
 fi

 if [ -f "$OUTPUT_DIR/SCYLLA-RELEASE-FILE" ]; then
--- a/2
+++ b/2
--- a/alternator/auth.cc
+++ b/alternator/auth.cc
@@ -129,12 +129,11 @@ future<std::string> get_key_from_roles(service::storage_proxy& proxy, std::strin
    std::vector<query::clustering_range> bounds{query::clustering_range::make_open_ended_both_sides()};
    const column_definition* salted_hash_col = schema->get_column_definition(bytes("salted_hash"));
    if (!salted_hash_col) {
-        co_await coroutine::return_exception(api_error::unrecognized_client(format("Credentials cannot be fetched for: {}", username)));
+        co_return coroutine::make_exception(api_error::unrecognized_client(format("Credentials cannot be fetched for: {}", username)));
    }
    auto selection = cql3::selection::selection::for_columns(schema, {salted_hash_col});
    auto partition_slice = query::partition_slice(std::move(bounds), {}, query::column_id_vector{salted_hash_col->id}, selection->get_query_options());
-    auto command = ::make_lw_shared<query::read_command>(schema->id(), schema->version(), partition_slice,
-            proxy.get_max_result_size(partition_slice), query::tombstone_limit(proxy.get_tombstone_limit()));
+    auto command = ::make_lw_shared<query::read_command>(schema->id(), schema->version(), partition_slice, proxy.get_max_result_size(partition_slice));
    auto cl = auth::password_authenticator::consistency_for_user(username);

    service::client_state client_state{service::client_state::internal_tag()};
@@ -146,11 +145,11 @@ future<std::string> get_key_from_roles(service::storage_proxy& proxy, std::strin

    auto result_set = builder.build();
    if (result_set->empty()) {
-        co_await coroutine::return_exception(api_error::unrecognized_client(format("User not found: {}", username)));
+        co_return coroutine::make_exception(api_error::unrecognized_client(format("User not found: {}", username)));
    }
    const bytes_opt& salted_hash = result_set->rows().front().front(); // We only asked for 1 row and 1 column
    if (!salted_hash) {
-        co_await coroutine::return_exception(api_error::unrecognized_client(format("No password found for user: {}", username)));
+        co_return coroutine::make_exception(api_error::unrecognized_client(format("No password found for user: {}", username)));
    }
    co_return value_cast<sstring>(utf8_type->deserialize(*salted_hash));
 }
--- a/alternator/controller.cc
+++ b/alternator/controller.cc
@@ -14,8 +14,6 @@
 #include "db/config.hh"
 #include "cdc/generation_service.hh"
 #include "service/memory_limiter.hh"
-#include "auth/service.hh"
-#include "service/qos/service_level_controller.hh"

 using namespace seastar;

@@ -30,8 +28,6 @@ controller::controller(
        sharded<db::system_distributed_keyspace>& sys_dist_ks,
        sharded<cdc::generation_service>& cdc_gen_svc,
        sharded<service::memory_limiter>& memory_limiter,
-        sharded<auth::service>& auth_service,
-        sharded<qos::service_level_controller>& sl_controller,
        const db::config& config)
    : _gossiper(gossiper)
    , _proxy(proxy)
@@ -39,8 +35,6 @@ controller::controller(
    , _sys_dist_ks(sys_dist_ks)
    , _cdc_gen_svc(cdc_gen_svc)
    , _memory_limiter(memory_limiter)
-    , _auth_service(auth_service)
-    , _sl_controller(sl_controller)
    , _config(config)
 {
 }
@@ -83,7 +77,7 @@ future<> controller::start_server() {
        auto get_cdc_metadata = [] (cdc::generation_service& svc) { return std::ref(svc.get_cdc_metadata()); };

        _executor.start(std::ref(_gossiper), std::ref(_proxy), std::ref(_mm), std::ref(_sys_dist_ks), sharded_parameter(get_cdc_metadata, std::ref(_cdc_gen_svc)), _ssg.value()).get();
-        _server.start(std::ref(_executor), std::ref(_proxy), std::ref(_gossiper), std::ref(_auth_service), std::ref(_sl_controller)).get();
+        _server.start(std::ref(_executor), std::ref(_proxy), std::ref(_gossiper)).get();
        // Note: from this point on, if start_server() throws for any reason,
        // it must first call stop_server() to stop the executor and server
        // services we just started - or Scylla will cause an assertion
--- a/alternator/controller.hh
+++ b/alternator/controller.hh
@@ -34,14 +34,6 @@ class gossiper;

 }

-namespace auth {
-class service;
-}
-
-namespace qos {
-class service_level_controller;
-}
-
 namespace alternator {

 // This is the official DynamoDB API version.
@@ -61,8 +53,6 @@ class controller : public protocol_server {
    sharded<db::system_distributed_keyspace>& _sys_dist_ks;
    sharded<cdc::generation_service>& _cdc_gen_svc;
    sharded<service::memory_limiter>& _memory_limiter;
-    sharded<auth::service>& _auth_service;
-    sharded<qos::service_level_controller>& _sl_controller;
    const db::config& _config;

    std::vector<socket_address> _listen_addresses;
@@ -78,8 +68,6 @@ public:
        sharded<db::system_distributed_keyspace>& sys_dist_ks,
        sharded<cdc::generation_service>& cdc_gen_svc,
        sharded<service::memory_limiter>& memory_limiter,
-        sharded<auth::service>& auth_service,
-        sharded<qos::service_level_controller>& sl_controller,
        const db::config& config);

    virtual sstring name() const override;
--- a/alternator/error.hh
+++ b/alternator/error.hh
@@ -73,9 +73,6 @@ public:
    static api_error serialization(std::string msg) {
        return api_error("SerializationException", std::move(msg));
    }
-    static api_error table_not_found(std::string msg) {
-        return api_error("TableNotFoundException", std::move(msg));
-    }
    static api_error internal(std::string msg) {
        return api_error("InternalServerError", std::move(msg), reply::status_type::internal_server_error);
    }
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
--- a/alternator/executor.hh
+++ b/alternator/executor.hh
@@ -81,10 +81,10 @@ namespace parsed {
 class path;
 };

+const std::map<sstring, sstring>& get_tags_of_table(schema_ptr schema);
+std::optional<std::string> find_tag(const schema& s, const sstring& tag);
+future<> update_tags(service::migration_manager& mm, schema_ptr schema, std::map<sstring, sstring>&& tags_map);
 schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& request);
-bool is_alternator_keyspace(const sstring& ks_name);
-// Wraps the db::get_tags_of_table and throws if the table is missing the tags extension.
-const std::map<sstring, sstring>& get_tags_of_table_or_throw(schema_ptr schema);

 // An attribute_path_map object is used to hold data for various attributes
 // paths (parsed::path) in a hierarchy of attribute paths. Each attribute path
@@ -144,11 +144,6 @@ template<typename T>
 using attribute_path_map = std::unordered_map<std::string, attribute_path_map_node<T>>;

 using attrs_to_get_node = attribute_path_map_node<std::monostate>;
-// attrs_to_get lists which top-level attribute are needed, and possibly also
-// which part of the top-level attribute is really needed (when nested
-// attribute paths appeared in the query).
-// Most code actually uses optional<attrs_to_get>. There, a disengaged
-// optional means we should get all attributes, not specific ones.
 using attrs_to_get = attribute_path_map<std::monostate>;


@@ -196,7 +191,6 @@ public:
    future<request_return_type> describe_stream(client_state& client_state, service_permit permit, rjson::value request);
    future<request_return_type> get_shard_iterator(client_state& client_state, service_permit permit, rjson::value request);
    future<request_return_type> get_records(client_state& client_state, tracing::trace_state_ptr, service_permit permit, rjson::value request);
-    future<request_return_type> describe_continuous_backups(client_state& client_state, service_permit permit, rjson::value request);

    future<> start();
    future<> stop() { return make_ready_future<>(); }
@@ -212,25 +206,21 @@ public:
 private:
    friend class rmw_operation;

+    static bool is_alternator_keyspace(const sstring& ks_name);
+    static sstring make_keyspace_name(const sstring& table_name);
    static void describe_key_schema(rjson::value& parent, const schema&, std::unordered_map<std::string,std::string> * = nullptr);
    static void describe_key_schema(rjson::value& parent, const schema& schema, std::unordered_map<std::string,std::string>&);
    
-public:
+public:    
    static std::optional<rjson::value> describe_single_item(schema_ptr,
        const query::partition_slice&,
        const cql3::selection::selection&,
        const query::result&,
-        const std::optional<attrs_to_get>&);
-
-    static std::vector<rjson::value> describe_multi_item(schema_ptr schema,
-        const query::partition_slice& slice,
-        const cql3::selection::selection& selection,
-        const query::result& query_result,
-        const std::optional<attrs_to_get>& attrs_to_get);
+        const attrs_to_get&);

    static void describe_single_item(const cql3::selection::selection&,
        const std::vector<bytes_opt>&,
-        const std::optional<attrs_to_get>&,
+        const attrs_to_get&,
        rjson::value&,
        bool = false);

--- a/alternator/expressions.cc
+++ b/alternator/expressions.cc
@@ -29,7 +29,7 @@
 namespace alternator {

 template <typename Func, typename Result = std::result_of_t<Func(expressionsParser&)>>
-Result do_with_parser(std::string_view input, Func&& f) {
+Result do_with_parser(std::string input, Func&& f) {
    expressionsLexer::InputStreamType input_stream{
        reinterpret_cast<const ANTLR_UINT8*>(input.data()),
        ANTLR_ENC_UTF8,
@@ -44,7 +44,7 @@ Result do_with_parser(std::string_view input, Func&& f) {
 }

 parsed::update_expression
-parse_update_expression(std::string_view query) {
+parse_update_expression(std::string query) {
    try {
        return do_with_parser(query,  std::mem_fn(&expressionsParser::update_expression));
    } catch (...) {
@@ -53,7 +53,7 @@ parse_update_expression(std::string_view query) {
 }

 std::vector<parsed::path>
-parse_projection_expression(std::string_view query) {
+parse_projection_expression(std::string query) {
    try {
        return do_with_parser(query,  std::mem_fn(&expressionsParser::projection_expression));
    } catch (...) {
@@ -62,7 +62,7 @@ parse_projection_expression(std::string_view query) {
 }

 parsed::condition_expression
-parse_condition_expression(std::string_view query) {
+parse_condition_expression(std::string query) {
    try {
        return do_with_parser(query,  std::mem_fn(&expressionsParser::condition_expression));
    } catch (...) {
--- a/alternator/expressions.hh
+++ b/alternator/expressions.hh
@@ -26,9 +26,9 @@ public:
    using runtime_error::runtime_error;
 };

-parsed::update_expression parse_update_expression(std::string_view query);
-std::vector<parsed::path> parse_projection_expression(std::string_view query);
-parsed::condition_expression parse_condition_expression(std::string_view query);
+parsed::update_expression parse_update_expression(std::string query);
+std::vector<parsed::path> parse_projection_expression(std::string query);
+parsed::condition_expression parse_condition_expression(std::string query);

 void resolve_update_expression(parsed::update_expression& ue,
        const rjson::value* expression_attribute_names,
--- a/alternator/serialization.cc
+++ b/alternator/serialization.cc
@@ -14,14 +14,11 @@
 #include "rapidjson/writer.h"
 #include "concrete_types.hh"
 #include "cql3/type_json.hh"
-#include "position_in_partition.hh"

 static logging::logger slogger("alternator-serialization");

 namespace alternator {

-bool is_alternator_keyspace(const sstring& ks_name);
-
 type_info type_info_from_string(std::string_view type) {
    static thread_local const std::unordered_map<std::string_view, type_info> type_infos = {
        {"S", {alternator_type::S, utf8_type}},
@@ -164,43 +161,32 @@ bytes get_key_column_value(const rjson::value& item, const column_definition& co
    return get_key_from_typed_value(*key_typed_value, column);
 }

-// Parses the JSON encoding for a key value, which is a map with a single
-// entry whose key is the type and the value is the encoded value.
-// If this type does not match the desired "type_str", an api_error::validation
-// error is thrown (the "name" parameter is the name of the column which will
-// mentioned in the exception message).
-// If the type does match, a reference to the encoded value is returned.
-static const rjson::value& get_typed_value(const rjson::value& key_typed_value, std::string_view type_str, std::string_view name, std::string_view value_name) {
-    if (!key_typed_value.IsObject() || key_typed_value.MemberCount() != 1 ||
-            !key_typed_value.MemberBegin()->value.IsString()) {
-        throw api_error::validation(
-                format("Malformed value object for {} {}: {}",
-                        value_name, name, key_typed_value));
-    }
-
-    auto it = key_typed_value.MemberBegin();
-    if (rjson::to_string_view(it->name) != type_str) {
-        throw api_error::validation(
-                format("Type mismatch: expected type {} for {} {}, got type {}",
-                        type_str, value_name, name, it->name));
-    }
-    return it->value;
-}
-
 // Parses the JSON encoding for a key value, which is a map with a single
 // entry, whose key is the type (expected to match the key column's type)
 // and the value is the encoded value.
 bytes get_key_from_typed_value(const rjson::value& key_typed_value, const column_definition& column) {
-    auto& value = get_typed_value(key_typed_value, type_to_string(column.type), column.name_as_text(), "key column");
-    std::string_view value_view = rjson::to_string_view(value);
+    if (!key_typed_value.IsObject() || key_typed_value.MemberCount() != 1 ||
+            !key_typed_value.MemberBegin()->value.IsString()) {
+        throw api_error::validation(
+                format("Malformed value object for key column {}: {}",
+                        column.name_as_text(), key_typed_value));
+    }
+
+    auto it = key_typed_value.MemberBegin();
+    if (it->name != type_to_string(column.type)) {
+        throw api_error::validation(
+                format("Type mismatch: expected type {} for key column {}, got type {}",
+                        type_to_string(column.type), column.name_as_text(), it->name));
+    }
+    std::string_view value_view = rjson::to_string_view(it->value);
    if (value_view.empty()) {
        throw api_error::validation(
                format("The AttributeValue for a key attribute cannot contain an empty string value. Key: {}", column.name_as_text()));
    }
    if (column.type == bytes_type) {
-        return rjson::base64_decode(value);
+        return rjson::base64_decode(it->value);
    } else {
-        return column.type->from_string(value_view);
+        return column.type->from_string(rjson::to_string_view(it->value));
    }

 }
@@ -251,39 +237,6 @@ clustering_key ck_from_json(const rjson::value& item, schema_ptr schema) {
    return clustering_key::from_exploded(raw_ck);
 }

-position_in_partition pos_from_json(const rjson::value& item, schema_ptr schema) {
-    auto ck = ck_from_json(item, schema);
-    if (is_alternator_keyspace(schema->ks_name())) {
-        return position_in_partition::for_key(std::move(ck));
-    }
-    const auto region_item = rjson::find(item, scylla_paging_region);
-    const auto weight_item = rjson::find(item, scylla_paging_weight);
-    if (bool(region_item) != bool(weight_item)) {
-        throw api_error::validation("Malformed value object: region and weight has to be either both missing or both present");
-    }
-    partition_region region;
-    bound_weight weight;
-    if (region_item) {
-        auto region_view = rjson::to_string_view(get_typed_value(*region_item, "S", scylla_paging_region, "key region"));
-        auto weight_view = rjson::to_string_view(get_typed_value(*weight_item, "N", scylla_paging_weight, "key weight"));
-        auto region = parse_partition_region(region_view);
-        if (weight_view == "-1") {
-            weight = bound_weight::before_all_prefixed;
-        } else if (weight_view == "0") {
-            weight = bound_weight::equal;
-        } else if (weight_view == "1") {
-            weight = bound_weight::after_all_prefixed;
-        } else {
-            throw std::runtime_error(fmt::format("Invalid value for weight: {}", weight_view));
-        }
-        return position_in_partition(region, weight, region == partition_region::clustered ? std::optional(std::move(ck)) : std::nullopt);
-    }
-    if (ck.is_empty()) {
-        return position_in_partition(position_in_partition::partition_start_tag_t());
-    }
-    return position_in_partition::for_key(std::move(ck));
-}
-
 big_decimal unwrap_number(const rjson::value& v, std::string_view diagnostic) {
    if (!v.IsObject() || v.MemberCount() != 1) {
        throw api_error::validation(format("{}: invalid number object", diagnostic));
--- a/alternator/serialization.hh
+++ b/alternator/serialization.hh
@@ -17,8 +17,6 @@
 #include "utils/rjson.hh"
 #include "utils/big_decimal.hh"

-class position_in_partition;
-
 namespace alternator {

 enum class alternator_type : int8_t {
@@ -35,9 +33,6 @@ struct type_representation {
    data_type dtype;
 };

-inline constexpr std::string_view scylla_paging_region(":scylla:paging:region");
-inline constexpr std::string_view scylla_paging_weight(":scylla:paging:weight");
-
 type_info type_info_from_string(std::string_view type);
 type_representation represent_type(alternator_type atype);

@@ -52,7 +47,6 @@ rjson::value json_key_column_value(bytes_view cell, const column_definition& col

 partition_key pk_from_json(const rjson::value& item, schema_ptr schema);
 clustering_key ck_from_json(const rjson::value& item, schema_ptr schema);
-position_in_partition pos_from_json(const rjson::value& item, schema_ptr schema);

 // If v encodes a number (i.e., it is a {"N": [...]}, returns an object representing it.  Otherwise,
 // raises ValidationException with diagnostic.
--- a/alternator/server.cc
+++ b/alternator/server.cc
@@ -16,11 +16,11 @@
 #include <seastar/util/short_streams.hh>
 #include "seastarx.hh"
 #include "error.hh"
-#include "service/qos/service_level_controller.hh"
 #include "utils/rjson.hh"
 #include "auth.hh"
 #include <cctype>
 #include "service/storage_proxy.hh"
+#include "locator/snitch_base.hh"
 #include "gms/gossiper.hh"
 #include "utils/overloaded_functor.hh"
 #include "utils/fb_utilities.hh"
@@ -152,10 +152,8 @@ public:

 protected:
    void generate_error_reply(reply& rep, const api_error& err) {
-        rjson::value results = rjson::empty_object();
-        rjson::add(results, "__type", rjson::from_string("com.amazonaws.dynamodb.v20120810#" + err._type));
-        rjson::add(results, "message", err._msg);
-        rep._content = rjson::print(std::move(results));
+        rep._content += "{\"__type\":\"com.amazonaws.dynamodb.v20120810#" + err._type + "\"," +
+                "\"message\":\"" + err._msg + "\"}";
        rep._status = err._http_code;
        slogger.trace("api_handler error case: {}", rep._content);
    }
@@ -201,9 +199,10 @@ protected:
        // It's very easy to get a list of all live nodes on the cluster,
        // using _gossiper().get_live_members(). But getting
        // just the list of live nodes in this DC needs more elaborate code:
-        auto& topology = _proxy.get_token_metadata_ptr()->get_topology();
-        sstring local_dc = topology.get_datacenter();
-        std::unordered_set<gms::inet_address> local_dc_nodes = topology.get_datacenter_endpoints().at(local_dc);
+        sstring local_dc = locator::i_endpoint_snitch::get_local_snitch_ptr()->get_datacenter(
+                utils::fb_utilities::get_broadcast_address());
+        std::unordered_set<gms::inet_address> local_dc_nodes =
+                _proxy.get_token_metadata_ptr()->get_topology().get_datacenter_endpoints().at(local_dc);
        for (auto& ip : local_dc_nodes) {
            if (_gossiper.is_alive(ip)) {
                rjson::push_back(results, rjson::from_string(ip.to_sstring()));
@@ -235,7 +234,7 @@ protected:
 future<std::string> server::verify_signature(const request& req, const chunked_content& content) {
    if (!_enforce_authorization) {
        slogger.debug("Skipping authorization");
-        return make_ready_future<std::string>();
+        return make_ready_future<std::string>("<unauthenticated request>");
    }
    auto host_it = req._headers.find("Host");
    if (host_it == req._headers.end()) {
@@ -365,9 +364,7 @@ static tracing::trace_state_ptr maybe_trace_query(service::client_state& client_
        tracing::add_session_param(trace_state, "alternator_op", op);
        tracing::add_query(trace_state, truncated_content_view(query, buf));
        tracing::begin(trace_state, format("Alternator {}", op), client_state.get_client_address());
-        if (!username.empty()) {
-            tracing::set_username(trace_state, auth::authenticated_user(username));
-        }
+        tracing::set_username(trace_state, auth::authenticated_user(username));
    }
    return trace_state;
 }
@@ -410,11 +407,7 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
    auto leave = defer([this] () noexcept { _pending_requests.leave(); });
    //FIXME: Client state can provide more context, e.g. client's endpoint address
    // We use unique_ptr because client_state cannot be moved or copied
-    executor::client_state client_state = username.empty()
-        ? service::client_state{service::client_state::internal_tag()}
-        : service::client_state{service::client_state::internal_tag(), _auth_service, _sl_controller, username};
-    co_await client_state.maybe_update_per_service_level_params();
-
+    executor::client_state client_state{executor::client_state::internal_tag()};
    tracing::trace_state_ptr trace_state = maybe_trace_query(client_state, username, op, content);
    tracing::trace(trace_state, op);
    rjson::value json_request = co_await _json_parser.parse(std::move(content));
@@ -447,14 +440,12 @@ void server::set_routes(routes& r) {
 //FIXME: A way to immediately invalidate the cache should be considered,
 // e.g. when the system table which stores the keys is changed.
 // For now, this propagation may take up to 1 minute.
-server::server(executor& exec, service::storage_proxy& proxy, gms::gossiper& gossiper, auth::service& auth_service, qos::service_level_controller& sl_controller)
+server::server(executor& exec, service::storage_proxy& proxy, gms::gossiper& gossiper)
        : _http_server("http-alternator")
        , _https_server("https-alternator")
        , _executor(exec)
        , _proxy(proxy)
        , _gossiper(gossiper)
-        , _auth_service(auth_service)
-        , _sl_controller(sl_controller)
        , _key_cache(1024, 1min, slogger)
        , _enforce_authorization(false)
        , _enabled_servers{}
@@ -529,9 +520,6 @@ server::server(executor& exec, service::storage_proxy& proxy, gms::gossiper& gos
        {"GetRecords", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
            return e.get_records(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
        }},
-        {"DescribeContinuousBackups", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.describe_continuous_backups(client_state, std::move(permit), std::move(json_request));
-        }},
    } {
 }

@@ -623,7 +611,7 @@ future<> server::json_parser::stop() {

 const char* api_error::what() const noexcept {
    if (_what_string.empty()) {
-        _what_string = format("{} {}: {}", static_cast<int>(_http_code), _type, _msg);
+        _what_string = format("{} {}: {}", _http_code, _type, _msg);
    }
    return _what_string.c_str();
 }
--- a/alternator/server.hh
+++ b/alternator/server.hh
@@ -15,7 +15,6 @@
 #include <seastar/net/tls.hh>
 #include <optional>
 #include "alternator/auth.hh"
-#include "service/qos/service_level_controller.hh"
 #include "utils/small_vector.hh"
 #include "utils/updateable_value.hh"
 #include <seastar/core/units.hh>
@@ -35,8 +34,6 @@ class server {
    executor& _executor;
    service::storage_proxy& _proxy;
    gms::gossiper& _gossiper;
-    auth::service& _auth_service;
-    qos::service_level_controller& _sl_controller;

    key_cache _key_cache;
    bool _enforce_authorization;
@@ -68,7 +65,7 @@ class server {
    json_parser _json_parser;

 public:
-    server(executor& executor, service::storage_proxy& proxy, gms::gossiper& gossiper, auth::service& service, qos::service_level_controller& sl_controller);
+    server(executor& executor, service::storage_proxy& proxy, gms::gossiper& gossiper);

    future<> init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds,
            bool enforce_authorization, semaphore* memory_limiter, utils::updateable_value<uint32_t> max_concurrent_requests);
--- a/alternator/streams.cc
+++ b/alternator/streams.cc
@@ -33,6 +33,7 @@
 #include "gms/feature_service.hh"

 #include "executor.hh"
+#include "tags_extension.hh"
 #include "rmw_operation.hh"

 /**
@@ -74,8 +75,8 @@ struct rapidjson::internal::TypeHelper<ValueType, utils::UUID>
    : public from_string_helper<ValueType, utils::UUID>
 {};

-static db_clock::time_point as_timepoint(const table_id& tid) {
-    return db_clock::time_point{utils::UUID_gen::unix_timestamp(tid.uuid())};
+static db_clock::time_point as_timepoint(const utils::UUID& uuid) {
+    return db_clock::time_point{utils::UUID_gen::unix_timestamp(uuid)};
 }

 /**
@@ -106,9 +107,6 @@ public:
    stream_arn(const UUID& uuid)
        : UUID(uuid)
    {}
-    stream_arn(const table_id& tid)
-        : UUID(tid.uuid())
-    {}
    stream_arn(std::string_view v)
        : UUID(v.substr(1))
    {
@@ -158,7 +156,7 @@ future<alternator::executor::request_return_type> alternator::executor::list_str
    // and we can probably expect this to be a single call.
    if (streams_start) {
        i = std::find_if(i, e, [&](data_dictionary::table t) {
-            return t.schema()->id().uuid() == streams_start
+            return t.schema()->id() == streams_start 
                && cdc::get_base_table(db.real_database(), *t.schema())
                && is_alternator_keyspace(t.schema()->ks_name())
                ;
@@ -433,7 +431,7 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
    auto db = _proxy.data_dictionary();

    try {
-        auto cf = db.find_column_family(table_id(stream_arn));
+        auto cf = db.find_column_family(stream_arn);
        schema = cf.schema();
        bs = cdc::get_base_table(db.real_database(), *schema);
    } catch (...) {        
@@ -720,7 +718,7 @@ future<executor::request_return_type> executor::get_shard_iterator(client_state&
    std::optional<shard_id> sid;

    try {
-        auto cf = db.find_column_family(table_id(stream_arn));
+        auto cf = db.find_column_family(stream_arn);
        schema = cf.schema();
        sid = rjson::get<shard_id>(request, "ShardId");
    } catch (...) {
@@ -805,7 +803,7 @@ future<executor::request_return_type> executor::get_records(client_state& client
    auto db = _proxy.data_dictionary();
    schema_ptr schema, base;
    try {
-        auto log_table = db.find_column_family(table_id(iter.table));
+        auto log_table = db.find_column_family(iter.table);
        schema = log_table.schema();
        base = cdc::get_base_table(db.real_database(), *schema);
    } catch (...) {        
@@ -835,14 +833,14 @@ future<executor::request_return_type> executor::get_records(client_state& client
    static const bytes op_column_name = cdc::log_meta_column_name_bytes("operation");
    static const bytes eor_column_name = cdc::log_meta_column_name_bytes("end_of_batch");

-    std::optional<attrs_to_get> key_names = boost::copy_range<attrs_to_get>(
+    auto key_names = boost::copy_range<attrs_to_get>(
        boost::range::join(std::move(base->partition_key_columns()), std::move(base->clustering_key_columns()))
        | boost::adaptors::transformed([&] (const column_definition& cdef) {
            return std::make_pair<std::string, attrs_to_get_node>(cdef.name_as_text(), {}); })
    );
    // Include all base table columns as values (in case pre or post is enabled).
    // This will include attributes not stored in the frozen map column
-    std::optional<attrs_to_get> attr_names = boost::copy_range<attrs_to_get>(base->regular_columns()
+    auto attr_names = boost::copy_range<attrs_to_get>(base->regular_columns()
        // this will include the :attrs column, which we will also force evaluating. 
        // But not having this set empty forces out any cdc columns from actual result 
        | boost::adaptors::transformed([] (const column_definition& cdef) {
@@ -879,7 +877,7 @@ future<executor::request_return_type> executor::get_records(client_state& client
        ++mul;
    }
    auto command = ::make_lw_shared<query::read_command>(schema->id(), schema->version(), partition_slice, _proxy.get_max_result_size(partition_slice),
-            query::tombstone_limit(_proxy.get_tombstone_limit()), query::row_limit(limit * mul));
+            query::row_limit(limit * mul));

    return _proxy.query(schema, std::move(command), std::move(partition_ranges), cl, service::storage_proxy::coordinator_query_options(default_timeout(), std::move(permit), client_state)).then(
            [this, schema, partition_slice = std::move(partition_slice), selection = std::move(selection), start_time = std::move(start_time), limit, key_names = std::move(key_names), attr_names = std::move(attr_names), type, iter, high_ts] (service::storage_proxy::coordinator_query_result qr) mutable {       
@@ -1047,10 +1045,10 @@ void executor::add_stream_options(const rjson::value& stream_specification, sche
    if (stream_enabled->GetBool()) {
        auto db = sp.data_dictionary();

-        if (!db.features().cdc) {
+        if (!db.features().cluster_supports_cdc()) {
            throw api_error::validation("StreamSpecification: streams (CDC) feature not enabled in cluster.");
        }
-        if (!db.features().alternator_streams) {
+        if (!db.features().cluster_supports_alternator_streams()) {
            throw api_error::validation("StreamSpecification: alternator streams feature not enabled in cluster.");
        }

--- a/alternator/tags_extension.hh
+++ b/alternator/tags_extension.hh
@@ -12,7 +12,7 @@
 #include "schema.hh"
 #include "db/extensions.hh"

-namespace db {
+namespace alternator {

 class tags_extension : public schema_extension {
 public:
@@ -37,9 +37,4 @@ private:
    std::map<sstring, sstring> _tags;
 };

-// Information whether the view updates are synchronous is stored using the
-// SYNCHRONOUS_VIEW_UPDATES_TAG_KEY tag. Value of this tag is a stored as a
-// serialized boolean value ("true" or "false")
-static const sstring SYNCHRONOUS_VIEW_UPDATES_TAG_KEY("system:synchronous_view_updates");
-
 }
--- a/alternator/ttl.cc
+++ b/alternator/ttl.cc
@@ -13,7 +13,6 @@
 #include <seastar/core/coroutine.hh>
 #include <seastar/core/sleep.hh>
 #include <seastar/core/future.hh>
-#include <seastar/core/lowres_clock.hh>
 #include <seastar/coroutine/maybe_yield.hh>
 #include <boost/multiprecision/cpp_int.hpp>

@@ -45,8 +44,6 @@
 #include "alternator/controller.hh"
 #include "alternator/serialization.hh"
 #include "dht/sharder.hh"
-#include "db/config.hh"
-#include "db/tags/utils.hh"

 #include "ttl.hh"

@@ -65,7 +62,7 @@ static const sstring TTL_TAG_KEY("system:ttl_attribute");

 future<executor::request_return_type> executor::update_time_to_live(client_state& client_state, service_permit permit, rjson::value request) {
    _stats.api_operations.update_time_to_live++;
-    if (!_proxy.data_dictionary().features().alternator_ttl) {
+    if (!_proxy.data_dictionary().features().cluster_supports_alternator_ttl()) {
        co_return api_error::unknown_operation("UpdateTimeToLive not yet supported. Experimental support is available if the 'alternator-ttl' experimental feature is enabled on all nodes.");
    }

@@ -92,7 +89,7 @@ future<executor::request_return_type> executor::update_time_to_live(client_state
    }
    sstring attribute_name(v->GetString(), v->GetStringLength());

-    std::map<sstring, sstring> tags_map = get_tags_of_table_or_throw(schema);
+    std::map<sstring, sstring> tags_map = get_tags_of_table(schema);
    if (enabled) {
        if (tags_map.contains(TTL_TAG_KEY)) {
            co_return api_error::validation("TTL is already enabled");
@@ -109,7 +106,7 @@ future<executor::request_return_type> executor::update_time_to_live(client_state
        }
        tags_map.erase(TTL_TAG_KEY);
    }
-    co_await db::update_tags(_mm, schema, std::move(tags_map));
+    co_await update_tags(_mm, schema, std::move(tags_map));
    // Prepare the response, which contains a TimeToLiveSpecification
    // basically identical to the request's
    rjson::value response = rjson::empty_object();
@@ -120,7 +117,7 @@ future<executor::request_return_type> executor::update_time_to_live(client_state
 future<executor::request_return_type> executor::describe_time_to_live(client_state& client_state, service_permit permit, rjson::value request) {
    _stats.api_operations.describe_time_to_live++;
    schema_ptr schema = get_table(_proxy, request);
-    std::map<sstring, sstring> tags_map = get_tags_of_table_or_throw(schema);
+    std::map<sstring, sstring> tags_map = get_tags_of_table(schema);
    rjson::value desc = rjson::empty_object();
    auto i = tags_map.find(TTL_TAG_KEY);
    if (i == tags_map.end()) {
@@ -136,7 +133,7 @@ future<executor::request_return_type> executor::describe_time_to_live(client_sta

 // expiration_service is a sharded service responsible for cleaning up expired
 // items in all tables with per-item expiration enabled. Currently, this means
-// Alternator tables with TTL configured via a UpdateTimeToLive request.
+// Alternator tables with TTL configured via a UpdateTimeToLeave request.
 //
 // Here is a brief overview of how the expiration service works:
 //
@@ -150,26 +147,28 @@ future<executor::request_return_type> executor::describe_time_to_live(client_sta
 // To avoid scanning the same items RF times in RF replicas, only one node is
 // responsible for scanning a token range at a time. Normally, this is the
 // node owning this range as a "primary range" (the first node in the ring
-// with this range), but when this node is down, the secondary owner (the
-// second in the ring) may take over.
+// with this range), but when this node is down, other nodes may take over
+// (FIXME: this is not implemented yet).
 // An expiration thread is reponsible for all tables which need expiration
-// scans. Currently, the different tables are scanned sequentially (not in
-// parallel).
+// scans. FIXME: explain how this is done with multiple tables - parallel,
+// staggered, or what?
 // The expiration thread scans item using CL=QUORUM to ensures that it reads
 // a consistent expiration-time attribute. This means that the items are read
 // locally and in addition QUORUM-1 additional nodes (one additional node
 // when RF=3) need to read the data and send digests.
+// FIXME: explain if we can read the exact attribute or the entire map.
 // When the expiration thread decides that an item has expired and wants
 // to delete it, it does it using a CL=QUORUM write. This allows this
 // deletion to be visible for consistent (quorum) reads. The deletion,
 // like user deletions, will also appear on the CDC log and therefore
-// Alternator Streams if enabled - currently as ordinary deletes (the
-// userIdentity flag is currently missing this is issue #11523).
-expiration_service::expiration_service(data_dictionary::database db, service::storage_proxy& proxy, gms::gossiper& g)
+// Alternator Streams if enabled (FIXME: explain how we mark the
+// deletion different from user deletes. We don't do it yet.).
+expiration_service::expiration_service(data_dictionary::database db, service::storage_proxy& proxy)
        : _db(db)
        , _proxy(proxy)
-        , _gossiper(g)
 {
+    //FIXME: add metrics for the service
+    //setup_metrics();
 }

 // Convert the big_decimal used to represent expiration time to an integer.
@@ -282,13 +281,10 @@ static future<> expire_item(service::storage_proxy& proxy,
        auto ck = clustering_key::from_exploded(exploded_ck);
        m.partition().clustered_row(*schema, ck).apply(tombstone(ts, gc_clock::now()));
    }
-    std::vector<mutation> mutations;
-    mutations.push_back(std::move(m));
-    return proxy.mutate(std::move(mutations),
+    return proxy.mutate(std::vector<mutation>{std::move(m)},
        db::consistency_level::LOCAL_QUORUM,
        executor::default_timeout(), // FIXME - which timeout?
-        qs.get_trace_state(), qs.get_permit(),
-        db::allow_per_partition_rate_limit::no);
+        qs.get_trace_state(), qs.get_permit());
 }

 static size_t random_offset(size_t min, size_t max) {
@@ -367,7 +363,7 @@ static std::vector<std::pair<dht::token_range, gms::inet_address>> get_secondary
 // 2. The primary replica for this token is currently marked down.
 // 3. In this node, this shard is responsible for this token.
 // We use the <secondary> case to handle the possibility that some of the
-// nodes in the system are down. A dead node will not be expiring
+// nodes in the system are down. A dead node will not be expiring expiring
 // the tokens owned by it, so we want the secondary owner to take over its
 // primary ranges.
 //
@@ -380,11 +376,12 @@ static std::vector<std::pair<dht::token_range, gms::inet_address>> get_secondary
 enum primary_or_secondary_t {primary, secondary};
 template<primary_or_secondary_t primary_or_secondary>
 class token_ranges_owned_by_this_shard {
-    // ranges_holder_primary holds just the primary ranges themselves
-    class ranges_holder_primary {
+    template<primary_or_secondary_t> class ranges_holder;
+    // ranges_holder<primary> holds just the primary ranges themselves
+    template<> class ranges_holder<primary> {
        const dht::token_range_vector _token_ranges;
     public:
-        ranges_holder_primary(const locator::effective_replication_map_ptr& erm, gms::gossiper& g, gms::inet_address ep)
+        ranges_holder(const locator::effective_replication_map_ptr& erm, gms::inet_address ep)
            : _token_ranges(erm->get_primary_ranges(ep)) {}
        std::size_t size() const { return _token_ranges.size(); }
        const dht::token_range& operator[](std::size_t i) const {
@@ -396,13 +393,13 @@ class token_ranges_owned_by_this_shard {
    };
    // ranges_holder<secondary> holds the secondary token ranges plus each
    // range's primary owner, needed to implement should_skip().
-    class ranges_holder_secondary {
+    template<> class ranges_holder<secondary> {
        std::vector<std::pair<dht::token_range, gms::inet_address>> _token_ranges;
        gms::gossiper& _gossiper;
     public:
-        ranges_holder_secondary(const locator::effective_replication_map_ptr& erm, gms::gossiper& g, gms::inet_address ep)
+        ranges_holder(const locator::effective_replication_map_ptr& erm, gms::inet_address ep)
            : _token_ranges(get_secondary_ranges(erm, ep))
-            , _gossiper(g) {}
+            , _gossiper(gms::get_local_gossiper()) {}
        std::size_t size() const { return _token_ranges.size(); }
        const dht::token_range& operator[](std::size_t i) const {
            return _token_ranges[i].first;
@@ -417,21 +414,17 @@ class token_ranges_owned_by_this_shard {
    // _token_ranges will contain a list of token ranges owned by this node.
    // We'll further need to split each such range to the pieces owned by
    // the current shard, using _intersecter.
-    using ranges_holder = std::conditional_t<
-            primary_or_secondary == primary_or_secondary_t::primary,
-            ranges_holder_primary,
-            ranges_holder_secondary>;
-    const ranges_holder _token_ranges;
+    const ranges_holder<primary_or_secondary> _token_ranges;
    // NOTICE: _range_idx is used modulo _token_ranges size when accessing
    // the data to ensure that it doesn't go out of bounds
    size_t _range_idx;
    size_t _end_idx;
    std::optional<dht::selective_token_range_sharder> _intersecter;
 public:
-    token_ranges_owned_by_this_shard(replica::database& db, gms::gossiper& g, schema_ptr s)
+    token_ranges_owned_by_this_shard(replica::database& db, schema_ptr s)
        :  _s(s)
        , _token_ranges(db.find_keyspace(s->ks_name()).get_effective_replication_map(),
-                g, utils::fb_utilities::get_broadcast_address())
+                utils::fb_utilities::get_broadcast_address())
        , _range_idx(random_offset(0, _token_ranges.size() - 1))
        , _end_idx(_range_idx + _token_ranges.size())
    {
@@ -509,11 +502,9 @@ struct scan_ranges_context {
        selection = cql3::selection::selection::wildcard(s);
        query::partition_slice::option_set opts = selection->get_query_options();
        opts.set<query::partition_slice::option::allow_short_read>();
-        // It is important that the scan bypass cache to avoid polluting it:
-        opts.set<query::partition_slice::option::bypass_cache>();
        std::vector<query::clustering_range> ck_bounds{query::clustering_range::make_open_ended_both_sides()};
        auto partition_slice = query::partition_slice(std::move(ck_bounds), {}, std::move(regular_columns), opts);
-        command = ::make_lw_shared<query::read_command>(s->id(), s->version(), partition_slice, proxy.get_max_result_size(partition_slice), query::tombstone_limit(proxy.get_tombstone_limit()));
+        command = ::make_lw_shared<query::read_command>(s->id(), s->version(), partition_slice, proxy.get_max_result_size(partition_slice));
        executor::client_state client_state{executor::client_state::internal_tag()};
        tracing::trace_state_ptr trace_state;
        // NOTICE: empty_service_permit is used because the TTL service has fixed parallelism
@@ -530,14 +521,13 @@ struct scan_ranges_context {
 // Scan data in a list of token ranges in one table, looking for expired
 // items and deleting them.
 // Because of issue #9167, partition_ranges must have a single partition
-// range for this code to work correctly.
+// for this code to work correctly.
 static future<> scan_table_ranges(
        service::storage_proxy& proxy,
        const scan_ranges_context& scan_ctx,
        dht::partition_range_vector&& partition_ranges,
        abort_source& abort_source,
-        named_semaphore& page_sem,
-        expiration_service::stats& expiration_stats)
+        named_semaphore& page_sem)
 {
    const schema_ptr& s = scan_ctx.s;
    assert (partition_ranges.size() == 1); // otherwise issue #9167 will cause incorrect results.
@@ -605,7 +595,6 @@ static future<> scan_table_ranges(
                expired = is_expired(n, now);
            }
            if (expired) {
-                expiration_stats.items_deleted++;
                // FIXME: maybe don't recalculate new_timestamp() all the time
                // FIXME: if expire_item() throws on timeout, we need to retry it.
                auto ts = api::new_timestamp();
@@ -617,7 +606,7 @@ static future<> scan_table_ranges(
    }
 }

-// scan_table() scans, in one table, data "owned" by this shard, looking for
+// scan_table() scans data in one table "owned" by this shard, looking for
 // expired items and deleting them.
 // We consider each node to "own" its primary token ranges, i.e., the tokens
 // that this node is their first replica in the ring. Inside the node, each
@@ -639,16 +628,13 @@ static future<> scan_table_ranges(
 static future<bool> scan_table(
    service::storage_proxy& proxy,
    data_dictionary::database db,
-    gms::gossiper& gossiper,
    schema_ptr s,
    abort_source& abort_source,
-    named_semaphore& page_sem,
-    expiration_service::stats& expiration_stats)
+    named_semaphore& page_sem)
 {
    // Check if an expiration-time attribute is enabled for this table.
    // If not, just return false immediately.
-    // FIXME: the setting of the TTL may change in the middle of a long scan!
-    std::optional<std::string> attribute_name = db::find_tag(*s, TTL_TAG_KEY);
+    std::optional<std::string> attribute_name = find_tag(*s, TTL_TAG_KEY);
    if (!attribute_name) {
        co_return false;
    }
@@ -689,10 +675,11 @@ static future<bool> scan_table(
        tlogger.info("table {} TTL column has unsupported type, not scanning", s->cf_name());
        co_return false;
    }
-    expiration_stats.scan_table++;
    // FIXME: need to pace the scan, not do it all at once.
+    // FIXME: consider if we should ask the scan without caching?
+    // can we use cache but not fill it?
    scan_ranges_context scan_ctx{s, proxy, std::move(column_name), std::move(member)};
-    token_ranges_owned_by_this_shard<primary> my_ranges(db.real_database(), gossiper, s);
+    token_ranges_owned_by_this_shard<primary> my_ranges(db.real_database(), s);
    while (std::optional<dht::partition_range> range = my_ranges.next_partition_range()) {
        // Note that because of issue #9167 we need to run a separate
        // query on each partition range, and can't pass several of
@@ -703,7 +690,7 @@ static future<bool> scan_table(
        // we fail the entire scan (and rescan from the beginning). Need to
        // reconsider this. Saving the scan position might be a good enough
        // solution for this problem.
-        co_await scan_table_ranges(proxy, scan_ctx, std::move(partition_ranges), abort_source, page_sem, expiration_stats);
+        co_await scan_table_ranges(proxy, scan_ctx, std::move(partition_ranges), abort_source, page_sem);
    }
    // If each node only scans its own primary ranges, then when any node is
    // down part of the token range will not get scanned. This can be viewed
@@ -712,12 +699,11 @@ static future<bool> scan_table(
    // by tasking another node to take over scanning of the dead node's primary
    // ranges. What we do here is that this node will also check expiration
    // on its *secondary* ranges - but only those whose primary owner is down.
-    token_ranges_owned_by_this_shard<secondary> my_secondary_ranges(db.real_database(), gossiper, s);
+    token_ranges_owned_by_this_shard<secondary> my_secondary_ranges(db.real_database(), s);
    while (std::optional<dht::partition_range> range = my_secondary_ranges.next_partition_range()) {
-        expiration_stats.secondary_ranges_scanned++;
        dht::partition_range_vector partition_ranges;
        partition_ranges.push_back(std::move(*range));
-        co_await scan_table_ranges(proxy, scan_ctx, std::move(partition_ranges), abort_source, page_sem, expiration_stats);
+        co_await scan_table_ranges(proxy, scan_ctx, std::move(partition_ranges), abort_source, page_sem);
    }
    co_return true;
 }
@@ -730,7 +716,6 @@ future<> expiration_service::run() {
    // also need to notice when a new table is added, a table is
    // deleted or when ttl is enabled or disabled for a table!
    for (;;) {
-        auto start = lowres_clock::now();
        // _db.tables() may change under our feet during a
        // long-living loop, so we must keep our own copy of the list of
        // schemas.
@@ -744,7 +729,7 @@ future<> expiration_service::run() {
                co_return;
            }
            try {
-                co_await scan_table(_proxy, _db, _gossiper, s, _abort_source, _page_sem, _expiration_stats);
+                co_await scan_table(_proxy, _db, s, _abort_source, _page_sem);
            } catch (...) {
                // The scan of a table may fail in the middle for many
                // reasons, including network failure and even the table
@@ -763,30 +748,17 @@ future<> expiration_service::run() {
                }
            }
        }
-        _expiration_stats.scan_passes++;
-        // The TTL scanner runs above once over all tables, at full steam.
-        // After completing such a scan, we sleep until it's time start
-        // another scan. TODO: If the scan went too fast, we can slow it down
-        // in the next iteration by reducing the scanner's scheduling-group
-        // share (if using a separate scheduling group), or introduce
-        // finer-grain sleeps into the scanning code.
-        std::chrono::milliseconds scan_duration(std::chrono::duration_cast<std::chrono::milliseconds>(lowres_clock::now() - start));
-        std::chrono::milliseconds period(long(_db.get_config().alternator_ttl_period_in_seconds() * 1000));
-        if (scan_duration < period) {
-            try {
-                tlogger.info("sleeping {} seconds until next period", (period - scan_duration).count()/1000.0);
-                co_await seastar::sleep_abortable(period - scan_duration, _abort_source);
-            } catch(seastar::sleep_aborted&) {}
-        } else {
-                tlogger.warn("scan took {} seconds, longer than period - not sleeping", scan_duration.count()/1000.0);
-        }
+        // FIXME: replace this silly 1-second sleep by something smarter.
+        try {
+            co_await seastar::sleep_abortable(std::chrono::seconds(1), _abort_source);
+        } catch(seastar::sleep_aborted&) {}
    }
 }

 future<> expiration_service::start() {
    // Called by main() on each shard to start the expiration-service
    // thread. Just runs run() in the background and allows stop().
-    if (_db.features().alternator_ttl) {
+    if (_db.features().cluster_supports_alternator_ttl()) {
        if (!shutting_down()) {
            _end = run().handle_exception([] (std::exception_ptr ep) {
                tlogger.error("expiration_service failed: {}", ep);
@@ -808,18 +780,4 @@ future<> expiration_service::stop() {
    return std::move(*_end);
 }

-expiration_service::stats::stats() {
-    _metrics.add_group("expiration", {
-        seastar::metrics::make_total_operations("scan_passes", scan_passes,
-            seastar::metrics::description("number of passes over the database")),
-        seastar::metrics::make_total_operations("scan_table", scan_table,
-            seastar::metrics::description("number of table scans (counting each scan of each table that enabled expiration)")),
-        seastar::metrics::make_total_operations("items_deleted", items_deleted,
-            seastar::metrics::description("number of items deleted after expiration")),
-        seastar::metrics::make_total_operations("secondary_ranges_scanned", secondary_ranges_scanned,
-            seastar::metrics::description("number of token ranges scanned by this node while their primary owner was down")),
-    });
-}
-
-
 } // namespace alternator
--- a/alternator/ttl.hh
+++ b/alternator/ttl.hh
@@ -14,10 +14,6 @@
 #include <seastar/core/semaphore.hh>
 #include "data_dictionary/data_dictionary.hh"

-namespace gms {
-class gossiper;
-}
-
 namespace replica {
 class database;
 }
@@ -32,26 +28,8 @@ namespace alternator {
 // items in all tables with per-item expiration enabled. Currently, this means
 // Alternator tables with TTL configured via a UpdateTimeToLeave request.
 class expiration_service final : public seastar::peering_sharded_service<expiration_service> {
-public:
-    // Object holding per-shard statistics related to the expiration service.
-    // While this object is alive, these metrics are also registered to be
-    // visible by the metrics REST API, with the "expiration_" prefix.
-    class stats {
-    public:
-        stats();
-        uint64_t scan_passes = 0;
-        uint64_t scan_table = 0;
-        uint64_t items_deleted = 0;
-        uint64_t secondary_ranges_scanned = 0;
-    private:
-        // The metric_groups object holds this stat object's metrics registered
-        // as long as the stats object is alive.
-        seastar::metrics::metric_groups _metrics;
-    };
-private:
    data_dictionary::database _db;
    service::storage_proxy& _proxy;
-    gms::gossiper& _gossiper;
    // _end is set by start(), and resolves when the the background service
    // started by it ends. To ask the background service to end, _abort_source
    // should be triggered. stop() below uses both _abort_source and _end.
@@ -60,12 +38,11 @@ private:
    // Ensures that at most 1 page of scan results at a time is processed by the TTL service
    named_semaphore _page_sem{1, named_semaphore_exception_factory{"alternator_ttl"}};
    bool shutting_down() { return _abort_source.abort_requested(); }
-    stats _expiration_stats;
 public:
    // sharded_service<expiration_service>::start() creates this object on
    // all shards, so calls this constructor on each shard. Later, the
    // additional start() function should be invoked on all shards.
-    expiration_service(data_dictionary::database, service::storage_proxy&, gms::gossiper&);
+    expiration_service(data_dictionary::database, service::storage_proxy&);
    future<> start();
    future<> run();
    // sharded_service<expiration_service>::stop() calls the following stop()
--- a/api/api-doc/authorization_cache.json
+++ b/api/api-doc/authorization_cache.json
@@ -1,29 +0,0 @@
-{
-  "apiVersion":"0.0.1",
-  "swaggerVersion":"1.2",
-  "basePath":"{{Protocol}}://{{Host}}",
-  "resourcePath":"/authorization_cache",
-  "produces":[
-    "application/json"
-  ],
-  "apis":[
-    {
-      "path":"/authorization_cache/reset",
-      "operations":[
-        {
-          "method":"POST",
-          "summary":"Reset cache",
-          "type":"void",
-          "nickname":"authorization_cache_reset",
-          "produces":[
-            "application/json"
-          ],
-          "parameters":[
-          ]
-        }
-      ]
-    }
-  ],
-  "models":{
-  }
-}
--- a/api/api-doc/compaction_manager.json
+++ b/api/api-doc/compaction_manager.json
@@ -134,7 +134,7 @@
                  },
                  {
                     "name":"tables",
-                     "description":"Comma-separated tables to stop compaction in",
+                     "description":"Comma-seperated tables to stop compaction in",
                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -667,7 +667,7 @@
                  },
                  {
                     "name":"kn",
-                     "description":"Comma-separated keyspaces name that their snapshot will be deleted",
+                     "description":"Comma seperated keyspaces name that their snapshot will be deleted",
                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
@@ -723,7 +723,7 @@
                  },
                  {
                     "name":"cf",
-                     "description":"Comma-separated column family names",
+                     "description":"Comma seperated column family names",
                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
@@ -755,7 +755,7 @@
                  },
                  {
                     "name":"cf",
-                     "description":"Comma-separated column family names",
+                     "description":"Comma seperated column family names",
                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
@@ -787,7 +787,7 @@
                  },
                  {
                     "name":"cf",
-                     "description":"Comma-separated table names",
+                     "description":"Comma-seperated table names",
                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
@@ -862,7 +862,7 @@
                  },
                  {
                     "name":"cf",
-                     "description":"Comma-separated column family names",
+                     "description":"Comma seperated column family names",
                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
@@ -902,7 +902,7 @@
                  },
                  {
                     "name":"cf",
-                     "description":"Comma-separated column family names",
+                     "description":"Comma seperated column family names",
                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
@@ -934,7 +934,7 @@
                  },
                  {
                     "name":"cf",
-                     "description":"Comma-separated column family names",
+                     "description":"Comma seperated column family names",
                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
@@ -1228,7 +1228,7 @@
         "operations":[
            {
               "method":"POST",
-               "summary":"Removes a node from the cluster. Replicated data that logically belonged to this node is redistributed among the remaining nodes.",
+               "summary":"Removes token (and all data associated with enpoint that had it) from the ring",
               "type":"void",
               "nickname":"remove_node",
               "produces":[
@@ -1245,7 +1245,7 @@
                  },
                  {
                     "name":"ignore_nodes",
-                     "description":"Comma-separated list of dead nodes to ignore in removenode operation. Use the same method for all nodes to ignore: either Host IDs or ip addresses.",
+                     "description":"List of dead nodes to ingore in removenode operation",
                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
@@ -2073,7 +2073,7 @@
                  },
                  {
                     "name":"cf",
-                     "description":"Comma-separated column family names",
+                     "description":"Comma seperated column family names",
                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
@@ -2100,7 +2100,7 @@
                  },
                  {
                     "name":"cf",
-                     "description":"Comma-separated column family names",
+                     "description":"Comma seperated column family names",
                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
@@ -2641,7 +2641,7 @@
            "version":{
               "type":"string",
               "enum":[
-                  "ka", "la", "mc", "md", "me"
+                  "ka", "la", "mc", "md"
               ],
               "description":"SSTable version"
            },
--- a/api/api-doc/system.json
+++ b/api/api-doc/system.json
@@ -52,45 +52,6 @@
            }
         ]
      },
-      {
-         "path":"/system/log",
-         "operations":[
-            {
-               "method":"POST",
-               "summary":"Write a message to the Scylla log",
-               "type":"void",
-               "nickname":"write_log_message",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"message",
-                     "description":"The message to write to the log",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"query"
-                  },
-                  {
-                     "name":"level",
-                     "description":"The logging level to use",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "enum":[
-                        "error",
-                        "warn",
-                        "info",
-                        "debug",
-                        "trace"
-                     ],
-                     "paramType":"query"
-                  }
-               ]
-            }
-         ]
-      },
      {
         "path":"/system/drop_sstable_caches",
         "operations":[
--- a/api/api-doc/task_manager.json
+++ b/api/api-doc/task_manager.json
@@ -1,251 +0,0 @@
-{
-    "apiVersion":"0.0.1",
-    "swaggerVersion":"1.2",
-    "basePath":"{{Protocol}}://{{Host}}",
-    "resourcePath":"/task_manager",
-    "produces":[
-       "application/json"
-    ],
-    "apis":[
-       {
-          "path":"/task_manager/list_modules",
-          "operations":[
-             {
-                "method":"GET",
-                "summary":"Get all modules names",
-                "type":"array",
-                "items":{
-                   "type":"string"
-                },
-                "nickname":"get_modules",
-                "produces":[
-                   "application/json"
-                ],
-                "parameters":[
-                ]
-             }
-          ]
-       },
-       {
-          "path":"/task_manager/list_module_tasks/{module}",
-          "operations":[
-             {
-                "method":"GET",
-                "summary":"Get a list of tasks",
-                "type":"array",
-                "items":{
-                    "type":"task_stats"
-                },
-                "nickname":"get_tasks",
-                "produces":[
-                   "application/json"
-                ],
-                "parameters":[
-                    {
-                        "name":"module",
-                        "description":"The module to query about",
-                        "required":true,
-                        "allowMultiple":false,
-                        "type":"string",
-                        "paramType":"path"
-                    },
-                    {
-                        "name":"internal",
-                        "description":"Boolean flag indicating whether internal tasks should be shown (false by default)",
-                        "required":false,
-                        "allowMultiple":false,
-                        "type":"boolean",
-                        "paramType":"query"
-                    },
-                    {
-                        "name":"keyspace",
-                        "description":"The keyspace to query about",
-                        "required":false,
-                        "allowMultiple":false,
-                        "type":"string",
-                        "paramType":"query"
-                    },
-                    {
-                        "name":"table",
-                        "description":"The table to query about",
-                        "required":false,
-                        "allowMultiple":false,
-                        "type":"string",
-                        "paramType":"query"
-                    }
-                ]
-             }
-          ]
-       },
-       {
-          "path":"/task_manager/task_status/{task_id}",
-          "operations":[
-             {
-                "method":"GET",
-                "summary":"Get task status",
-                "type":"task_status",
-                "nickname":"get_task_status",
-                "produces":[
-                   "application/json"
-                ],
-                "parameters":[
-                    {
-                        "name":"task_id",
-                        "description":"The uuid of a task to query about",
-                        "required":true,
-                        "allowMultiple":false,
-                        "type":"string",
-                        "paramType":"path"
-                    }
-                ]
-             }
-          ]
-       },
-       {
-          "path":"/task_manager/abort_task/{task_id}",
-          "operations":[
-             {
-                "method":"POST",
-                "summary":"Abort running task and its descendants",
-                "type":"void",
-                "nickname":"abort_task",
-                "produces":[
-                   "application/json"
-                ],
-                "parameters":[
-                   {
-                      "name":"task_id",
-                      "description":"The uuid of a task to abort",
-                      "required":true,
-                      "allowMultiple":false,
-                      "type":"string",
-                      "paramType":"path"
-                   }
-                ]
-             }
-          ]
-       },
-       {
-        "path":"/task_manager/wait_task/{task_id}",
-        "operations":[
-           {
-              "method":"GET",
-              "summary":"Wait for a task to complete",
-              "type":"task_status",
-              "nickname":"wait_task",
-              "produces":[
-                 "application/json"
-              ],
-              "parameters":[
-                 {
-                    "name":"task_id",
-                    "description":"The uuid of a task to wait for",
-                    "required":true,
-                    "allowMultiple":false,
-                    "type":"string",
-                    "paramType":"path"
-                 }
-              ]
-           }
-        ]
-     }
-    ],
-    "models":{
-       "task_stats" :{
-           "id": "task_stats",
-           "description":"A task statistics object",
-           "properties":{
-             "task_id":{
-                "type":"string",
-                "description":"The uuid of a task"
-             },
-             "state":{
-                "type":"string",
-                "enum":[
-                  "created",
-                  "running",
-                  "done",
-                  "failed"
-                ],
-                "description":"The state of a task"
-             }
-           }
-       },
-       "task_status":{
-          "id":"task_status",
-          "description":"A task status object",
-          "properties":{
-             "id":{
-                "type":"string",
-                "description":"The uuid of the task"
-             },
-             "type":{
-                "type":"string",
-                "description":"The description of the task"
-             },
-             "state":{
-               "type":"string",
-               "enum":[
-                 "created",
-                 "running",
-                 "done",
-                 "failed"
-               ],
-                "description":"The state of the task"
-             },
-             "is_abortable":{
-                "type":"boolean",
-                "description":"Boolean flag indicating whether the task can be aborted"
-             },
-             "start_time":{
-                "type":"datetime",
-                "description":"The start time of the task"
-             },
-             "end_time":{
-                "type":"datetime",
-                "description":"The end time of the task (unspecified when the task is not completed)"
-             },
-             "error":{
-                "type":"string",
-                "description":"Error string, if the task failed"
-             },
-             "parent_id":{
-               "type":"string",
-               "description":"The uuid of the parent task"
-            },
-            "sequence_number":{
-               "type":"long",
-               "description":"The running sequence number of the task"
-            },
-            "shard":{
-               "type":"long",
-               "description":"The number of a shard the task is running on"
-            },
-            "keyspace":{
-               "type":"string",
-               "description":"The keyspace the task is working on (if applicable)"
-            },
-            "table":{
-               "type":"string",
-               "description":"The table the task is working on (if applicable)"
-            },
-            "entity":{
-               "type":"string",
-               "description":"Task-specific entity description"
-            },
-            "progress_units":{
-               "type":"string",
-               "description":"A description of the progress units"
-            },
-            "progress_total":{
-               "type":"double",
-               "description":"The total number of units to complete for the task"
-            },
-            "progress_completed":{
-               "type":"double",
-               "description":"The number of units completed so far"
-            }
-          }
-       }
-    }
- }
--- a/api/api-doc/task_manager_test.json
+++ b/api/api-doc/task_manager_test.json
@@ -1,185 +0,0 @@
-{
-    "apiVersion":"0.0.1",
-    "swaggerVersion":"1.2",
-    "basePath":"{{Protocol}}://{{Host}}",
-    "resourcePath":"/task_manager_test",
-    "produces":[
-       "application/json"
-    ],
-    "apis":[
-       {
-          "path":"/task_manager_test/test_module",
-          "operations":[
-             {
-                "method":"POST",
-                "summary":"Register test module in task manager",
-                "type":"void",
-                "nickname":"register_test_module",
-                "produces":[
-                   "application/json"
-                ],
-                "parameters":[
-                ]
-             },
-             {
-                "method":"DELETE",
-                "summary":"Unregister test module in task manager",
-                "type":"void",
-                "nickname":"unregister_test_module",
-                "produces":[
-                   "application/json"
-                ],
-                "parameters":[
-                ]
-             }
-          ]
-       },
-       {
-          "path":"/task_manager_test/test_task",
-          "operations":[
-             {
-                "method":"POST",
-                "summary":"Register test task",
-                "type":"string",
-                "nickname":"register_test_task",
-                "produces":[
-                   "application/json"
-                ],
-                "parameters":[
-                    {
-                        "name":"task_id",
-                        "description":"The uuid of a task to register",
-                        "required":false,
-                        "allowMultiple":false,
-                        "type":"string",
-                        "paramType":"query"
-                    },
-                    {
-                        "name":"shard",
-                        "description":"The shard of the task",
-                        "required":false,
-                        "allowMultiple":false,
-                        "type":"long",
-                        "paramType":"query"
-                    },
-                    {
-                        "name":"parent_id",
-                        "description":"The uuid of a parent task",
-                        "required":false,
-                        "allowMultiple":false,
-                        "type":"string",
-                        "paramType":"query"
-                    },
-                    {
-                        "name":"keyspace",
-                        "description":"The keyspace the task is working on",
-                        "required":false,
-                        "allowMultiple":false,
-                        "type":"string",
-                        "paramType":"query"
-                    },
-                    {
-                        "name":"table",
-                        "description":"The table the task is working on",
-                        "required":false,
-                        "allowMultiple":false,
-                        "type":"string",
-                        "paramType":"query"
-                    },
-                    {
-                        "name":"type",
-                        "description":"The type of the task",
-                        "required":false,
-                        "allowMultiple":false,
-                        "type":"string",
-                        "paramType":"query"
-                    },
-                    {
-                        "name":"entity",
-                        "description":"Task-specific entity description",
-                        "required":false,
-                        "allowMultiple":false,
-                        "type":"string",
-                        "paramType":"query"
-                    }
-                ]
-             },
-             {
-                "method":"DELETE",
-                "summary":"Unregister test task",
-                "type":"void",
-                "nickname":"unregister_test_task",
-                "produces":[
-                   "application/json"
-                ],
-                "parameters":[
-                    {
-                        "name":"task_id",
-                        "description":"The uuid of a task to register",
-                        "required":true,
-                        "allowMultiple":false,
-                        "type":"string",
-                        "paramType":"query"
-                    }
-                ]
-             }
-          ]
-       },
-       {
-          "path":"/task_manager_test/finish_test_task/{task_id}",
-          "operations":[
-             {
-                "method":"POST",
-                "summary":"Finish test task",
-                "type":"void",
-                "nickname":"finish_test_task",
-                "produces":[
-                   "application/json"
-                ],
-                "parameters":[
-                   {
-                      "name":"task_id",
-                      "description":"The uuid of a task to finish",
-                      "required":true,
-                      "allowMultiple":false,
-                      "type":"string",
-                      "paramType":"path"
-                   },
-                   {
-                      "name":"error",
-                      "description":"The error with which task fails (if it does)",
-                      "required":false,
-                      "allowMultiple":false,
-                      "type":"string",
-                      "paramType":"query"
-                   }
-                ]
-             }
-          ]
-       },
-       {
-         "path":"/task_manager_test/ttl",
-         "operations":[
-            {
-               "method":"POST",
-               "summary":"Set ttl in seconds and get last value",
-               "type":"long",
-               "nickname":"get_and_update_ttl",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"ttl",
-                     "description":"The number of seconds for which the tasks will be kept in memory after it finishes",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"long",
-                     "paramType":"query"
-                  }
-               ]
-            }
-         ]
-      }
-    ]
- }
--- a/api/api.cc
+++ b/api/api.cc
@@ -24,13 +24,10 @@
 #include "compaction_manager.hh"
 #include "hinted_handoff.hh"
 #include "error_injection.hh"
-#include "authorization_cache.hh"
 #include <seastar/http/exception.hh>
 #include "stream_manager.hh"
 #include "system.hh"
 #include "api/config.hh"
-#include "task_manager.hh"
-#include "task_manager_test.hh"

 logging::logger apilog("api");

@@ -99,9 +96,9 @@ future<> unset_rpc_controller(http_context& ctx) {
    return ctx.http_server.set_routes([&ctx] (routes& r) { unset_rpc_controller(ctx, r); });
 }

-future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, sharded<gms::gossiper>& g, sharded<cdc::generation_service>& cdc_gs, sharded<db::system_keyspace>& sys_ks) {
-    return register_api(ctx, "storage_service", "The storage service API", [&ss, &g, &cdc_gs, &sys_ks] (http_context& ctx, routes& r) {
-            set_storage_service(ctx, r, ss, g.local(), cdc_gs, sys_ks);
+future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, sharded<gms::gossiper>& g, sharded<cdc::generation_service>& cdc_gs) {
+    return register_api(ctx, "storage_service", "The storage service API", [&ss, &g, &cdc_gs] (http_context& ctx, routes& r) {
+            set_storage_service(ctx, r, ss, g.local(), cdc_gs);
        });
 }

@@ -129,17 +126,6 @@ future<> unset_server_repair(http_context& ctx) {
    return ctx.http_server.set_routes([&ctx] (routes& r) { unset_repair(ctx, r); });
 }

-future<> set_server_authorization_cache(http_context &ctx, sharded<auth::service> &auth_service) {
-    return register_api(ctx, "authorization_cache",
-                "The authorization cache API", [&auth_service] (http_context &ctx, routes &r) {
-                     set_authorization_cache(ctx, r, auth_service);
-                 });
-}
-
-future<> unset_server_authorization_cache(http_context& ctx) {
-    return ctx.http_server.set_routes([&ctx] (routes& r) { unset_authorization_cache(ctx, r); });
-}
-
 future<> set_server_snapshot(http_context& ctx, sharded<db::snapshot_ctl>& snap_ctl) {
    return ctx.http_server.set_routes([&ctx, &snap_ctl] (routes& r) { set_snapshot(ctx, r, snap_ctl); });
 }
@@ -148,14 +134,8 @@ future<> unset_server_snapshot(http_context& ctx) {
    return ctx.http_server.set_routes([&ctx] (routes& r) { unset_snapshot(ctx, r); });
 }

-future<> set_server_snitch(http_context& ctx, sharded<locator::snitch_ptr>& snitch) {
-    return register_api(ctx, "endpoint_snitch_info", "The endpoint snitch info API", [&snitch] (http_context& ctx, routes& r) {
-        set_endpoint_snitch(ctx, r, snitch);
-    });
-}
-
-future<> unset_server_snitch(http_context& ctx) {
-    return ctx.http_server.set_routes([&ctx] (routes& r) { unset_endpoint_snitch(ctx, r); });
+future<> set_server_snitch(http_context& ctx) {
+    return register_api(ctx, "endpoint_snitch_info", "The endpoint snitch info API", set_endpoint_snitch);
 }

 future<> set_server_gossip(http_context& ctx, sharded<gms::gossiper>& g) {
@@ -253,56 +233,5 @@ future<> set_server_done(http_context& ctx) {
    });
 }

-future<> set_server_task_manager(http_context& ctx) {
-    auto rb = std::make_shared < api_registry_builder > (ctx.api_doc);
-
-    return ctx.http_server.set_routes([rb, &ctx](routes& r) {
-        rb->register_function(r, "task_manager",
-                "The task manager API");
-        set_task_manager(ctx, r);
-    });
-}
-
-#ifndef SCYLLA_BUILD_MODE_RELEASE
-
-future<> set_server_task_manager_test(http_context& ctx, lw_shared_ptr<db::config> cfg) {
-    auto rb = std::make_shared < api_registry_builder > (ctx.api_doc);
-
-    return ctx.http_server.set_routes([rb, &ctx, &cfg = *cfg](routes& r) mutable {
-        rb->register_function(r, "task_manager_test",
-                "The task manager test API");
-        set_task_manager_test(ctx, r, cfg);
-    });
-}
-
-#endif
-
-void req_params::process(const request& req) {
-    // Process mandatory parameters
-    for (auto& [name, ent] : params) {
-        if (!ent.is_mandatory) {
-            continue;
-        }
-        try {
-            ent.value = req.param[name];
-        } catch (std::out_of_range&) {
-            throw httpd::bad_param_exception(fmt::format("Mandatory parameter '{}' was not provided", name));
-        }
-    }
-
-    // Process optional parameters
-    for (auto& [name, value] : req.query_parameters) {
-        try {
-            auto& ent = params.at(name);
-            if (ent.is_mandatory) {
-                throw httpd::bad_param_exception(fmt::format("Parameter '{}' is expected to be provided as part of the request url", name));
-            }
-            ent.value = value;
-        } catch (std::out_of_range&) {
-            throw httpd::bad_param_exception(fmt::format("Unsupported optional parameter '{}'", name));
-        }
-    }
-}
-
 }

--- a/api/api.hh
+++ b/api/api.hh
@@ -137,14 +137,6 @@ future<json::json_return_type>  sum_timer_stats(distributed<T>& d, utils::timed_
    });
 }

-template<class T, class F>
-future<json::json_return_type>  sum_timer_stats(distributed<T>& d, utils::timed_rate_moving_average_summary_and_histogram F::*f) {
-    return d.map_reduce0([f](const T& p) {return (p.get_stats().*f).rate();}, utils::rate_moving_average_and_histogram(),
-            std::plus<utils::rate_moving_average_and_histogram>()).then([](const utils::rate_moving_average_and_histogram& val) {
-        return make_ready_future<json::json_return_type>(timer_to_json(val));
-    });
-}
-
 inline int64_t min_int64(int64_t a, int64_t b) {
    return std::min(a,b);
 }
@@ -245,67 +237,6 @@ public:
    operator T() const { return value; }
 };

-using mandatory = bool_class<struct mandatory_tag>;
-
-class req_params {
-public:
-    struct def {
-        std::optional<sstring> value;
-        mandatory is_mandatory = mandatory::no;
-
-        def(std::optional<sstring> value_ = std::nullopt, mandatory is_mandatory_ = mandatory::no)
-            : value(std::move(value_))
-            , is_mandatory(is_mandatory_)
-        { }
-
-        def(mandatory is_mandatory_)
-            : is_mandatory(is_mandatory_)
-        { }
-    };
-
-private:
-    std::unordered_map<sstring, def> params;
-
-public:
-    req_params(std::initializer_list<std::pair<sstring, def>> l) {
-        for (const auto& [name, ent] : l) {
-            add(std::move(name), std::move(ent));
-        }
-    }
-
-    void add(sstring name, def ent) {
-        params.emplace(std::move(name), std::move(ent));
-    }
-
-    void process(const request& req);
-
-    const std::optional<sstring>& get(const char* name) const {
-        return params.at(name).value;
-    }
-
-    template <typename T = sstring>
-    const std::optional<T> get_as(const char* name) const {
-        return get(name);
-    }
-
-    template <typename T = sstring>
-    requires std::same_as<T, bool>
-    const std::optional<bool> get_as(const char* name) const {
-        auto value = get(name);
-        if (!value) {
-            return std::nullopt;
-        }
-        std::transform(value->begin(), value->end(), value->begin(), ::tolower);
-        if (value == "true" || value == "yes" || value == "1") {
-            return true;
-        }
-        if (value == "false" || value == "no" || value == "0") {
-            return false;
-        }
-        throw boost::bad_lexical_cast{};
-    }
-};
-
 utils_json::estimated_histogram time_to_json_histogram(const utils::time_estimated_histogram& val);

 }
--- a/api/api_init.hh
+++ b/api/api_init.hh
@@ -11,7 +11,6 @@
 #include <seastar/core/future.hh>

 #include "replica/database_fwd.hh"
-#include "tasks/task_manager.hh"
 #include "seastarx.hh"

 namespace service {
@@ -32,7 +31,6 @@ namespace locator {

 class token_metadata;
 class shared_token_metadata;
-class snitch_ptr;

 } // namespace locator

@@ -44,7 +42,6 @@ class config;
 namespace view {
 class view_builder;
 }
-class system_keyspace;
 }
 namespace netw { class messaging_service; }
 class repair_service;
@@ -56,8 +53,6 @@ class gossiper;

 }

-namespace auth { class service; }
-
 namespace api {

 struct http_context {
@@ -68,12 +63,11 @@ struct http_context {
    distributed<service::storage_proxy>& sp;
    service::load_meter& lmeter;
    const sharded<locator::shared_token_metadata>& shared_token_metadata;
-    sharded<tasks::task_manager>& tm;

    http_context(distributed<replica::database>& _db,
            distributed<service::storage_proxy>& _sp,
-            service::load_meter& _lm, const sharded<locator::shared_token_metadata>& _stm, sharded<tasks::task_manager>& _tm)
-            : db(_db), sp(_sp), lmeter(_lm), shared_token_metadata(_stm), tm(_tm) {
+            service::load_meter& _lm, const sharded<locator::shared_token_metadata>& _stm)
+            : db(_db), sp(_sp), lmeter(_lm), shared_token_metadata(_stm) {
    }

    const locator::token_metadata& get_token_metadata();
@@ -81,9 +75,8 @@ struct http_context {

 future<> set_server_init(http_context& ctx);
 future<> set_server_config(http_context& ctx, const db::config& cfg);
-future<> set_server_snitch(http_context& ctx, sharded<locator::snitch_ptr>& snitch);
-future<> unset_server_snitch(http_context& ctx);
-future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, sharded<gms::gossiper>& g, sharded<cdc::generation_service>& cdc_gs, sharded<db::system_keyspace>& sys_ks);
+future<> set_server_snitch(http_context& ctx);
+future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, sharded<gms::gossiper>& g, sharded<cdc::generation_service>& cdc_gs);
 future<> set_server_sstables_loader(http_context& ctx, sharded<sstables_loader>& sst_loader);
 future<> unset_server_sstables_loader(http_context& ctx);
 future<> set_server_view_builder(http_context& ctx, sharded<db::view::view_builder>& vb);
@@ -94,8 +87,6 @@ future<> set_transport_controller(http_context& ctx, cql_transport::controller&
 future<> unset_transport_controller(http_context& ctx);
 future<> set_rpc_controller(http_context& ctx, thrift_controller& ctl);
 future<> unset_rpc_controller(http_context& ctx);
-future<> set_server_authorization_cache(http_context& ctx, sharded<auth::service> &auth_service);
-future<> unset_server_authorization_cache(http_context& ctx);
 future<> set_server_snapshot(http_context& ctx, sharded<db::snapshot_ctl>& snap_ctl);
 future<> unset_server_snapshot(http_context& ctx);
 future<> set_server_gossip(http_context& ctx, sharded<gms::gossiper>& g);
@@ -111,7 +102,5 @@ future<> set_server_gossip_settle(http_context& ctx, sharded<gms::gossiper>& g);
 future<> set_server_cache(http_context& ctx);
 future<> set_server_compaction_manager(http_context& ctx);
 future<> set_server_done(http_context& ctx);
-future<> set_server_task_manager(http_context& ctx);
-future<> set_server_task_manager_test(http_context& ctx, lw_shared_ptr<db::config> cfg);

 }
--- a/api/authorization_cache.cc
+++ b/api/authorization_cache.cc
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 2022-present ScyllaDB
- */
-
-/*
- * SPDX-License-Identifier: AGPL-3.0-or-later
- */
-
-#include "api/api-doc/authorization_cache.json.hh"
-
-#include "api/authorization_cache.hh"
-#include "api/api.hh"
-#include "auth/common.hh"
-
-namespace api {
-using namespace json;
-
-void set_authorization_cache(http_context& ctx, routes& r, sharded<auth::service> &auth_service) {
-    httpd::authorization_cache_json::authorization_cache_reset.set(r, [&auth_service] (std::unique_ptr<request> req) -> future<json::json_return_type> {
-        co_await auth_service.invoke_on_all([] (auth::service& auth) -> future<>  {
-            auth.reset_authorization_cache();
-            return make_ready_future<>();
-        });
-
-        co_return json_void();
-    });
-}
-
-void unset_authorization_cache(http_context& ctx, routes& r) {
-    httpd::authorization_cache_json::authorization_cache_reset.unset(r);
-}
-
-}
--- a/api/authorization_cache.hh
+++ b/api/authorization_cache.hh
@@ -1,18 +0,0 @@
-/*
- * Copyright (C) 2022-present ScyllaDB
- */
-
-/*
- * SPDX-License-Identifier: AGPL-3.0-or-later
- */
-
-#pragma once
-
-#include "api.hh"
-
-namespace api {
-
-void set_authorization_cache(http_context& ctx, routes& r, sharded<auth::service> &auth_service);
-void unset_authorization_cache(http_context& ctx, routes& r);
-
-}
--- a/api/collectd.cc
+++ b/api/collectd.cc
@@ -29,11 +29,8 @@ static auto transformer(const std::vector<collectd_value>& values) {
        case scollectd::data_type::GAUGE:
            collected_value.values.push(v.d());
            break;
-        case scollectd::data_type::COUNTER:
-            collected_value.values.push(v.ui());
-            break;
-        case scollectd::data_type::REAL_COUNTER:
-            collected_value.values.push(v.d());
+        case scollectd::data_type::DERIVE:
+            collected_value.values.push(v.i());
            break;
        default:
            collected_value.values.push(v.ui());
--- a/api/column_family.cc
+++ b/api/column_family.cc
@@ -14,7 +14,7 @@
 #include "sstables/metadata_collector.hh"
 #include "utils/estimated_histogram.hh"
 #include <algorithm>
-#include "db/system_keyspace.hh"
+#include "db/system_keyspace_view_types.hh"
 #include "db/data_listeners.hh"
 #include "storage_service.hh"
 #include "unimplemented.hh"
@@ -43,7 +43,7 @@ std::tuple<sstring, sstring> parse_fully_qualified_cf_name(sstring name) {
    return std::make_tuple(name.substr(0, pos), name.substr(end));
 }

-const table_id& get_uuid(const sstring& ks, const sstring& cf, const replica::database& db) {
+const utils::UUID& get_uuid(const sstring& ks, const sstring& cf, const replica::database& db) {
    try {
        return db.find_uuid(ks, cf);
    } catch (replica::no_such_column_family& e) {
@@ -51,7 +51,7 @@ const table_id& get_uuid(const sstring& ks, const sstring& cf, const replica::da
    }
 }

-const table_id& get_uuid(const sstring& name, const replica::database& db) {
+const utils::UUID& get_uuid(const sstring& name, const replica::database& db) {
    auto [ks, cf] = parse_fully_qualified_cf_name(name);
    return get_uuid(ks, cf, db);
 }
@@ -79,14 +79,14 @@ future<json::json_return_type>  get_cf_stats(http_context& ctx,
 }

 static future<json::json_return_type>  get_cf_stats_count(http_context& ctx, const sstring& name,
-        utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
+        utils::timed_rate_moving_average_and_histogram replica::column_family_stats::*f) {
    return map_reduce_cf(ctx, name, int64_t(0), [f](const replica::column_family& cf) {
        return (cf.get_stats().*f).hist.count;
    }, std::plus<int64_t>());
 }

 static future<json::json_return_type>  get_cf_stats_sum(http_context& ctx, const sstring& name,
-        utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
+        utils::timed_rate_moving_average_and_histogram replica::column_family_stats::*f) {
    auto uuid = get_uuid(name, ctx.db.local());
    return ctx.db.map_reduce0([uuid, f](replica::database& db) {
        // Histograms information is sample of the actual load
@@ -102,7 +102,7 @@ static future<json::json_return_type>  get_cf_stats_sum(http_context& ctx, const


 static future<json::json_return_type>  get_cf_stats_count(http_context& ctx,
-        utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
+        utils::timed_rate_moving_average_and_histogram replica::column_family_stats::*f) {
    return map_reduce_cf(ctx, int64_t(0), [f](const replica::column_family& cf) {
        return (cf.get_stats().*f).hist.count;
    }, std::plus<int64_t>());
@@ -110,7 +110,7 @@ static future<json::json_return_type>  get_cf_stats_count(http_context& ctx,

 static future<json::json_return_type>  get_cf_histogram(http_context& ctx, const sstring& name,
        utils::timed_rate_moving_average_and_histogram replica::column_family_stats::*f) {
-    auto uuid = get_uuid(name, ctx.db.local());
+    utils::UUID uuid = get_uuid(name, ctx.db.local());
    return ctx.db.map_reduce0([f, uuid](const replica::database& p) {
        return (p.find_column_family(uuid).get_stats().*f).hist;},
            utils::ihistogram(),
@@ -120,19 +120,7 @@ static future<json::json_return_type>  get_cf_histogram(http_context& ctx, const
    });
 }

-static future<json::json_return_type>  get_cf_histogram(http_context& ctx, const sstring& name,
-        utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
-    auto uuid = get_uuid(name, ctx.db.local());
-    return ctx.db.map_reduce0([f, uuid](const replica::database& p) {
-        return (p.find_column_family(uuid).get_stats().*f).hist;},
-            utils::ihistogram(),
-            std::plus<utils::ihistogram>())
-            .then([](const utils::ihistogram& val) {
-                return make_ready_future<json::json_return_type>(to_json(val));
-    });
-}
-
-static future<json::json_return_type> get_cf_histogram(http_context& ctx, utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
+static future<json::json_return_type> get_cf_histogram(http_context& ctx, utils::timed_rate_moving_average_and_histogram replica::column_family_stats::*f) {
    std::function<utils::ihistogram(const replica::database&)> fun = [f] (const replica::database& db)  {
        utils::ihistogram res;
        for (auto i : db.get_column_families()) {
@@ -148,8 +136,8 @@ static future<json::json_return_type> get_cf_histogram(http_context& ctx, utils:
 }

 static future<json::json_return_type>  get_cf_rate_and_histogram(http_context& ctx, const sstring& name,
-        utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
-    auto uuid = get_uuid(name, ctx.db.local());
+        utils::timed_rate_moving_average_and_histogram replica::column_family_stats::*f) {
+    utils::UUID uuid = get_uuid(name, ctx.db.local());
    return ctx.db.map_reduce0([f, uuid](const replica::database& p) {
        return (p.find_column_family(uuid).get_stats().*f).rate();},
            utils::rate_moving_average_and_histogram(),
@@ -159,7 +147,7 @@ static future<json::json_return_type>  get_cf_rate_and_histogram(http_context& c
    });
 }

-static future<json::json_return_type> get_cf_rate_and_histogram(http_context& ctx, utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
+static future<json::json_return_type> get_cf_rate_and_histogram(http_context& ctx, utils::timed_rate_moving_average_and_histogram replica::column_family_stats::*f) {
    std::function<utils::rate_moving_average_and_histogram(const replica::database&)> fun = [f] (const replica::database& db)  {
        utils::rate_moving_average_and_histogram res;
        for (auto i : db.get_column_families()) {
@@ -394,7 +382,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_cf_all_memtables_off_heap_size.set(r, [&ctx] (std::unique_ptr<request> req) {
        warn(unimplemented::cause::INDEXES);
        return ctx.db.map_reduce0([](const replica::database& db){
-            return db.dirty_memory_region_group().real_memory_used();
+            return db.dirty_memory_region_group().memory_used();
        }, int64_t(0), std::plus<int64_t>()).then([](int res) {
            return make_ready_future<json::json_return_type>(res);
        });
@@ -815,19 +803,19 @@ void set_column_family(http_context& ctx, routes& r) {

    cf::get_cas_prepare.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
-            return cf.get_stats().cas_prepare.histogram();
+            return cf.get_stats().estimated_cas_prepare;
        });
    });

    cf::get_cas_propose.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
-            return cf.get_stats().cas_accept.histogram();
+            return cf.get_stats().estimated_cas_accept;
        });
    });

    cf::get_cas_commit.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
-            return cf.get_stats().cas_learn.histogram();
+            return cf.get_stats().estimated_cas_learn;
        });
    });

@@ -855,7 +843,7 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_auto_compaction.set(r, [&ctx] (const_req req) {
-        auto uuid = get_uuid(req.param["name"], ctx.db.local());
+        const utils::UUID& uuid = get_uuid(req.param["name"], ctx.db.local());
        replica::column_family& cf = ctx.db.local().find_column_family(uuid);
        return !cf.is_auto_compaction_disabled_by_user();
    });
@@ -933,13 +921,13 @@ void set_column_family(http_context& ctx, routes& r) {

    cf::get_read_latency_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
        return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
-            return cf.get_stats().reads.histogram();
+            return cf.get_stats().estimated_read;
        });
    });

    cf::get_write_latency_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
        return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
-            return cf.get_stats().writes.histogram();
+            return cf.get_stats().estimated_write;
        });
    });

--- a/api/column_family.hh
+++ b/api/column_family.hh
@@ -18,7 +18,7 @@ namespace api {

 void set_column_family(http_context& ctx, routes& r);

-const table_id& get_uuid(const sstring& name, const replica::database& db);
+const utils::UUID& get_uuid(const sstring& name, const replica::database& db);
 future<> foreach_column_family(http_context& ctx, const sstring& name, std::function<void(replica::column_family&)> f);


@@ -63,7 +63,7 @@ struct map_reduce_column_families_locally {
    std::function<std::unique_ptr<std::any>(std::unique_ptr<std::any>, std::unique_ptr<std::any>)> reducer;
    future<std::unique_ptr<std::any>> operator()(replica::database& db) const {
        auto res = seastar::make_lw_shared<std::unique_ptr<std::any>>(std::make_unique<std::any>(init));
-        return do_for_each(db.get_column_families(), [res, this](const std::pair<table_id, seastar::lw_shared_ptr<replica::table>>& i) {
+        return do_for_each(db.get_column_families(), [res, this](const std::pair<utils::UUID, seastar::lw_shared_ptr<replica::table>>& i) {
            *res = reducer(std::move(*res), mapper(*i.second.get()));
        }).then([res] {
            return std::move(*res);
--- a/api/compaction_manager.cc
+++ b/api/compaction_manager.cc
@@ -68,7 +68,7 @@ void set_compaction_manager(http_context& ctx, routes& r) {
    cm::get_pending_tasks_by_table.set(r, [&ctx] (std::unique_ptr<request> req) {
        return ctx.db.map_reduce0([&ctx](replica::database& db) {
            return do_with(std::unordered_map<std::pair<sstring, sstring>, uint64_t, utils::tuple_hash>(), [&ctx, &db](std::unordered_map<std::pair<sstring, sstring>, uint64_t, utils::tuple_hash>& tasks) {
-                return do_for_each(db.get_column_families(), [&tasks](const std::pair<table_id, seastar::lw_shared_ptr<replica::table>>& i) {
+                return do_for_each(db.get_column_families(), [&tasks](const std::pair<utils::UUID, seastar::lw_shared_ptr<replica::table>>& i) {
                    replica::table& cf = *i.second.get();
                    tasks[std::make_pair(cf.schema()->ks_name(), cf.schema()->cf_name())] = cf.get_compaction_strategy().estimated_pending_compactions(cf.as_table_state());
                    return make_ready_future<>();
@@ -119,7 +119,7 @@ void set_compaction_manager(http_context& ctx, routes& r) {
            auto& cm = db.get_compaction_manager();
            return parallel_for_each(table_names, [&db, &cm, &ks_name, type] (sstring& table_name) {
                auto& t = db.find_column_family(ks_name, table_name);
-                return cm.stop_compaction(type, &t.as_table_state());
+                return cm.stop_compaction(type, &t);
            });
        });
        co_return json_void();
--- a/api/endpoint_snitch.cc
+++ b/api/endpoint_snitch.cc
@@ -6,63 +6,30 @@
 * SPDX-License-Identifier: AGPL-3.0-or-later
 */

-#include "locator/token_metadata.hh"
 #include "locator/snitch_base.hh"
-#include "locator/production_snitch_base.hh"
 #include "endpoint_snitch.hh"
 #include "api/api-doc/endpoint_snitch_info.json.hh"
-#include "api/api-doc/storage_service.json.hh"
 #include "utils/fb_utilities.hh"

 namespace api {

-void set_endpoint_snitch(http_context& ctx, routes& r, sharded<locator::snitch_ptr>& snitch) {
+void set_endpoint_snitch(http_context& ctx, routes& r) {
    static auto host_or_broadcast = [](const_req req) {
        auto host = req.get_query_param("host");
        return host.empty() ? gms::inet_address(utils::fb_utilities::get_broadcast_address()) : gms::inet_address(host);
    };

-    httpd::endpoint_snitch_info_json::get_datacenter.set(r, [&ctx](const_req req) {
-        auto& topology = ctx.shared_token_metadata.local().get()->get_topology();
-        auto ep = host_or_broadcast(req);
-        if (!topology.has_endpoint(ep, locator::topology::pending::yes)) {
-            // Cannot return error here, nodetool status can race, request
-            // info about just-left node and not handle it nicely
-            return sstring(locator::production_snitch_base::default_dc);
-        }
-        return topology.get_datacenter(ep);
+    httpd::endpoint_snitch_info_json::get_datacenter.set(r, [](const_req req) {
+        return locator::i_endpoint_snitch::get_local_snitch_ptr()->get_datacenter(host_or_broadcast(req));
    });

-    httpd::endpoint_snitch_info_json::get_rack.set(r, [&ctx](const_req req) {
-        auto& topology = ctx.shared_token_metadata.local().get()->get_topology();
-        auto ep = host_or_broadcast(req);
-        if (!topology.has_endpoint(ep, locator::topology::pending::yes)) {
-            // Cannot return error here, nodetool status can race, request
-            // info about just-left node and not handle it nicely
-            return sstring(locator::production_snitch_base::default_rack);
-        }
-        return topology.get_rack(ep);
+    httpd::endpoint_snitch_info_json::get_rack.set(r, [](const_req req) {
+        return locator::i_endpoint_snitch::get_local_snitch_ptr()->get_rack(host_or_broadcast(req));
    });

-    httpd::endpoint_snitch_info_json::get_snitch_name.set(r, [&snitch] (const_req req) {
-        return snitch.local()->get_name();
+    httpd::endpoint_snitch_info_json::get_snitch_name.set(r, [] (const_req req) {
+        return locator::i_endpoint_snitch::get_local_snitch_ptr()->get_name();
    });
-
-    httpd::storage_service_json::update_snitch.set(r, [&snitch](std::unique_ptr<request> req) {
-        locator::snitch_config cfg;
-        cfg.name = req->get_query_param("ep_snitch_class_name");
-        return locator::i_endpoint_snitch::reset_snitch(snitch, cfg).then([] {
-            return make_ready_future<json::json_return_type>(json::json_void());
-        });
-    });
-
-}
-
-void unset_endpoint_snitch(http_context& ctx, routes& r) {
-    httpd::endpoint_snitch_info_json::get_datacenter.unset(r);
-    httpd::endpoint_snitch_info_json::get_rack.unset(r);
-    httpd::endpoint_snitch_info_json::get_snitch_name.unset(r);
-    httpd::storage_service_json::update_snitch.unset(r);
 }

 }
--- a/api/endpoint_snitch.hh
+++ b/api/endpoint_snitch.hh
@@ -10,13 +10,8 @@

 #include "api.hh"

-namespace locator {
-class snitch_ptr;
-}
-
 namespace api {

-void set_endpoint_snitch(http_context& ctx, routes& r, sharded<locator::snitch_ptr>&);
-void unset_endpoint_snitch(http_context& ctx, routes& r);
+void set_endpoint_snitch(http_context& ctx, routes& r);

 }
--- a/api/error_injection.cc
+++ b/api/error_injection.cc
@@ -12,7 +12,7 @@
 #include <seastar/http/exception.hh>
 #include "log.hh"
 #include "utils/error_injection.hh"
-#include <seastar/core/future-util.hh>
+#include "seastar/core/future-util.hh"

 namespace api {

--- a/api/failure_detector.cc
+++ b/api/failure_detector.cc
@@ -18,7 +18,7 @@ namespace fd = httpd::failure_detector_json;
 void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
    fd::get_all_endpoint_states.set(r, [&g](std::unique_ptr<request> req) {
        std::vector<fd::endpoint_state> res;
-        for (auto i : g.get_endpoint_states()) {
+        for (auto i : g.endpoint_state_map) {
            fd::endpoint_state val;
            val.addrs = boost::lexical_cast<std::string>(i.first);
            val.is_alive = i.second.is_alive();
@@ -40,53 +40,54 @@ void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
    });

    fd::get_up_endpoint_count.set(r, [&g](std::unique_ptr<request> req) {
-        int res = g.get_up_endpoint_count();
-        return make_ready_future<json::json_return_type>(res);
+        return gms::get_up_endpoint_count(g).then([](int res) {
+            return make_ready_future<json::json_return_type>(res);
+        });
    });

    fd::get_down_endpoint_count.set(r, [&g](std::unique_ptr<request> req) {
-        int res = g.get_down_endpoint_count();
-        return make_ready_future<json::json_return_type>(res);
+        return gms::get_down_endpoint_count(g).then([](int res) {
+            return make_ready_future<json::json_return_type>(res);
+        });
    });

    fd::get_phi_convict_threshold.set(r, [] (std::unique_ptr<request> req) {
-        return make_ready_future<json::json_return_type>(8);
+        return gms::get_phi_convict_threshold().then([](double res) {
+            return make_ready_future<json::json_return_type>(res);
+        });
    });

    fd::get_simple_states.set(r, [&g] (std::unique_ptr<request> req) {
-        std::map<sstring, sstring> nodes_status;
-        for (auto& entry : g.get_endpoint_states()) {
-            nodes_status.emplace(entry.first.to_sstring(), entry.second.is_alive() ? "UP" : "DOWN");
-        }
-        return make_ready_future<json::json_return_type>(map_to_key_value<fd::mapper>(nodes_status));
+        return gms::get_simple_states(g).then([](const std::map<sstring, sstring>& map) {
+            return make_ready_future<json::json_return_type>(map_to_key_value<fd::mapper>(map));
+        });
    });

    fd::set_phi_convict_threshold.set(r, [](std::unique_ptr<request> req) {
        double phi = atof(req->get_query_param("phi").c_str());
-        return make_ready_future<json::json_return_type>("");
+        return gms::set_phi_convict_threshold(phi).then([]() {
+            return make_ready_future<json::json_return_type>("");
+        });
    });

    fd::get_endpoint_state.set(r, [&g] (std::unique_ptr<request> req) {
-        auto* state = g.get_endpoint_state_for_endpoint_ptr(gms::inet_address(req->param["addr"]));
-        if (!state) {
-            return make_ready_future<json::json_return_type>(format("unknown endpoint {}", req->param["addr"]));
-        }
-        std::stringstream ss;
-        g.append_endpoint_state(ss, *state);
-        return make_ready_future<json::json_return_type>(sstring(ss.str()));
+        return get_endpoint_state(g, req->param["addr"]).then([](const sstring& state) {
+            return make_ready_future<json::json_return_type>(state);
+        });
    });

    fd::get_endpoint_phi_values.set(r, [](std::unique_ptr<request> req) {
-        std::map<gms::inet_address, gms::arrival_window> map;
-        std::vector<fd::endpoint_phi_value> res;
-        auto now = gms::arrival_window::clk::now();
-        for (auto& p : map) {
-            fd::endpoint_phi_value val;
-            val.endpoint = p.first.to_sstring();
-            val.phi = p.second.phi(now);
-            res.emplace_back(std::move(val));
-        }
-        return make_ready_future<json::json_return_type>(res);
+        return gms::get_arrival_samples().then([](std::map<gms::inet_address, gms::arrival_window> map) {
+            std::vector<fd::endpoint_phi_value> res;
+            auto now = gms::arrival_window::clk::now();
+            for (auto& p : map) {
+                fd::endpoint_phi_value val;
+                val.endpoint = p.first.to_sstring();
+                val.phi = p.second.phi(now);
+                res.emplace_back(std::move(val));
+            }
+            return make_ready_future<json::json_return_type>(res);
+        });
    });
 }

--- a/api/storage_proxy.cc
+++ b/api/storage_proxy.cc
@@ -14,7 +14,7 @@
 #include "db/config.hh"
 #include "utils/histogram.hh"
 #include "replica/database.hh"
-#include <seastar/core/scheduling_specific.hh>
+#include "seastar/core/scheduling_specific.hh"

 namespace api {

@@ -22,9 +22,6 @@ namespace sp = httpd::storage_proxy_json;
 using proxy = service::storage_proxy;
 using namespace json;

-utils::time_estimated_histogram timed_rate_moving_average_summary_merge(utils::time_estimated_histogram a, const utils::timed_rate_moving_average_summary_and_histogram& b) {
-    return a.merge(b.histogram());
-}

 /**
 * This function implement a two dimentional map reduce where
@@ -58,10 +55,10 @@ future<V> two_dimensional_map_reduce(distributed<service::storage_proxy>& d,
 * @param initial_value - the initial value to use for both aggregations* @return
 * @return A future that resolves to the result of the aggregation.
 */
-template<typename V, typename Reducer, typename F, typename C>
+template<typename V, typename Reducer, typename F>
 future<V> two_dimensional_map_reduce(distributed<service::storage_proxy>& d,
-        C F::*f, Reducer reducer, V initial_value) {
-    return two_dimensional_map_reduce(d, [f] (F& stats) -> V {
+        V F::*f, Reducer reducer, V initial_value) {
+    return two_dimensional_map_reduce(d, [f] (F& stats) {
        return stats.*f;
    }, reducer, initial_value);
 }
@@ -115,10 +112,10 @@ utils_json::estimated_histogram time_to_json_histogram(const utils::time_estimat
    return res;
 }

-static future<json::json_return_type>  sum_estimated_histogram(http_context& ctx, utils::timed_rate_moving_average_summary_and_histogram service::storage_proxy_stats::stats::*f) {
-    return two_dimensional_map_reduce(ctx.sp, [f] (service::storage_proxy_stats::stats& stats) {
-        return (stats.*f).histogram();
-    }, utils::time_estimated_histogram_merge, utils::time_estimated_histogram()).then([](const utils::time_estimated_histogram& val) {
+static future<json::json_return_type>  sum_estimated_histogram(http_context& ctx, utils::time_estimated_histogram service::storage_proxy_stats::stats::*f) {
+
+    return two_dimensional_map_reduce(ctx.sp, f, utils::time_estimated_histogram_merge,
+            utils::time_estimated_histogram()).then([](const utils::time_estimated_histogram& val) {
        return make_ready_future<json::json_return_type>(time_to_json_histogram(val));
    });
 }
@@ -133,7 +130,7 @@ static future<json::json_return_type>  sum_estimated_histogram(http_context& ctx
    });
 }

-static future<json::json_return_type>  total_latency(http_context& ctx, utils::timed_rate_moving_average_summary_and_histogram service::storage_proxy_stats::stats::*f) {
+static future<json::json_return_type>  total_latency(http_context& ctx, utils::timed_rate_moving_average_and_histogram service::storage_proxy_stats::stats::*f) {
    return two_dimensional_map_reduce(ctx.sp, [f] (service::storage_proxy_stats::stats& stats) {
            return (stats.*f).hist.mean * (stats.*f).hist.count;
        }, std::plus<double>(), 0.0).then([](double val) {
@@ -153,7 +150,7 @@ static future<json::json_return_type>  total_latency(http_context& ctx, utils::t
 template<typename F>
 future<json::json_return_type>
 sum_histogram_stats_storage_proxy(distributed<proxy>& d,
-        utils::timed_rate_moving_average_summary_and_histogram F::*f) {
+        utils::timed_rate_moving_average_and_histogram F::*f) {
    return two_dimensional_map_reduce(d, [f] (service::storage_proxy_stats::stats& stats) {
        return (stats.*f).hist;
    }, std::plus<utils::ihistogram>(), utils::ihistogram()).
@@ -173,7 +170,7 @@ sum_histogram_stats_storage_proxy(distributed<proxy>& d,
 template<typename F>
 future<json::json_return_type>
 sum_timer_stats_storage_proxy(distributed<proxy>& d,
-        utils::timed_rate_moving_average_summary_and_histogram F::*f) {
+        utils::timed_rate_moving_average_and_histogram F::*f) {

    return two_dimensional_map_reduce(d, [f] (service::storage_proxy_stats::stats& stats) {
        return (stats.*f).rate();
@@ -494,14 +491,14 @@ void set_storage_proxy(http_context& ctx, routes& r, sharded<service::storage_se
    });

    sp::get_read_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_estimated_histogram(ctx, &service::storage_proxy_stats::stats::read);
+        return sum_estimated_histogram(ctx, &service::storage_proxy_stats::stats::estimated_read);
    });

    sp::get_read_latency.set(r, [&ctx](std::unique_ptr<request> req) {
        return total_latency(ctx, &service::storage_proxy_stats::stats::read);
    });
    sp::get_write_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
-        return sum_estimated_histogram(ctx, &service::storage_proxy_stats::stats::write);
+        return sum_estimated_histogram(ctx, &service::storage_proxy_stats::stats::estimated_write);
    });

    sp::get_write_latency.set(r, [&ctx](std::unique_ptr<request> req) {
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -11,7 +11,6 @@
 #include "db/config.hh"
 #include "db/schema_tables.hh"
 #include "utils/hash.hh"
-#include <optional>
 #include <sstream>
 #include <time.h>
 #include <algorithm>
@@ -25,9 +24,8 @@
 #include "db/commitlog/commitlog.hh"
 #include "gms/gossiper.hh"
 #include "db/system_keyspace.hh"
-#include <seastar/http/exception.hh>
+#include "seastar/http/exception.hh"
 #include <seastar/core/coroutine.hh>
-#include <seastar/coroutine/parallel_for_each.hh>
 #include "repair/row_level.hh"
 #include "locator/snitch_base.hh"
 #include "column_family.hh"
@@ -58,30 +56,23 @@ const locator::token_metadata& http_context::get_token_metadata() {
 namespace ss = httpd::storage_service_json;
 using namespace json;

-sstring validate_keyspace(http_context& ctx, sstring ks_name) {
+sstring validate_keyspace(http_context& ctx, const parameters& param) {
+    const auto& ks_name = param["keyspace"];
    if (ctx.db.local().has_keyspace(ks_name)) {
        return ks_name;
    }
    throw bad_param_exception(replica::no_such_keyspace(ks_name).what());
 }

-sstring validate_keyspace(http_context& ctx, const parameters& param) {
-    return validate_keyspace(ctx, param["keyspace"]);
-}
-
-locator::host_id validate_host_id(const sstring& param) {
-    auto hoep = locator::host_id_or_endpoint(param, locator::host_id_or_endpoint::param_type::host_id);
-    return hoep.id;
-}
-
 // splits a request parameter assumed to hold a comma-separated list of table names
 // verify that the tables are found, otherwise a bad_param_exception exception is thrown
 // containing the description of the respective no_such_column_family error.
-std::vector<sstring> parse_tables(const sstring& ks_name, http_context& ctx, sstring value) {
-    if (value.empty()) {
-        return map_keys(ctx.db.local().find_keyspace(ks_name).metadata().get()->cf_meta_data());
+std::vector<sstring> parse_tables(const sstring& ks_name, http_context& ctx, const std::unordered_map<sstring, sstring>& query_params, sstring param_name) {
+    auto it = query_params.find(param_name);
+    if (it == query_params.end()) {
+        return {};
    }
-    std::vector<sstring> names = split(value, ",");
+    std::vector<sstring> names = split(it->second, ",");
    try {
        for (const auto& table_name : names) {
            ctx.db.local().find_column_family(ks_name, table_name);
@@ -92,14 +83,6 @@ std::vector<sstring> parse_tables(const sstring& ks_name, http_context& ctx, sst
    return names;
 }

-std::vector<sstring> parse_tables(const sstring& ks_name, http_context& ctx, const std::unordered_map<sstring, sstring>& query_params, sstring param_name) {
-    auto it = query_params.find(param_name);
-    if (it == query_params.end()) {
-        return {};
-    }
-    return parse_tables(ks_name, ctx, it->second);
-}
-
 static ss::token_range token_range_endpoints_to_json(const dht::token_range_endpoints& d) {
    ss::token_range r;
    r.start_token = d._start_token;
@@ -162,7 +145,7 @@ seastar::future<json::json_return_type> run_toppartitions_query(db::toppartition
    });
 }

-future<json::json_return_type> set_tables_autocompaction(http_context& ctx, const sstring &keyspace, std::vector<sstring> tables, bool enabled) {
+future<json::json_return_type> set_tables_autocompaction(http_context& ctx, service::storage_service& ss, const sstring &keyspace, std::vector<sstring> tables, bool enabled) {
    if (tables.empty()) {
        tables = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
    }
@@ -401,10 +384,11 @@ static future<json::json_return_type> describe_ring_as_json(sharded<service::sto
    co_return json::json_return_type(stream_range_as_array(co_await ss.local().describe_ring(keyspace), token_range_endpoints_to_json));
 }

-void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_service>& ss, gms::gossiper& g, sharded<cdc::generation_service>& cdc_gs, sharded<db::system_keyspace>& sys_ks) {
-    ss::local_hostid.set(r, [&ctx](std::unique_ptr<request> req) {
-        auto id = ctx.db.local().get_config().host_id;
-        return make_ready_future<json::json_return_type>(id.to_sstring());
+void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_service>& ss, gms::gossiper& g, sharded<cdc::generation_service>& cdc_gs) {
+    ss::local_hostid.set(r, [](std::unique_ptr<request> req) {
+        return db::system_keyspace::load_local_host_id().then([](const utils::UUID& id) {
+            return make_ready_future<json::json_return_type>(id.to_sstring());
+        });
    });

    ss::get_tokens.set(r, [&ctx] (std::unique_ptr<request> req) {
@@ -520,10 +504,10 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        return ctx.db.local().get_config().saved_caches_directory();
    });

-    ss::get_range_to_endpoint_map.set(r, [&ctx, &ss](std::unique_ptr<request> req) -> future<json::json_return_type> {
+    ss::get_range_to_endpoint_map.set(r, [&ctx, &ss](std::unique_ptr<request> req) {
        auto keyspace = validate_keyspace(ctx, req->param);
        std::vector<ss::maplist_mapper> res;
-        co_return stream_range_as_array(co_await ss.local().get_range_to_address_map(keyspace),
+        return make_ready_future<json::json_return_type>(stream_range_as_array(ss.local().get_range_to_address_map(keyspace),
                [](const std::pair<dht::token_range, inet_address_vector_replica_set>& entry){
            ss::maplist_mapper m;
            if (entry.first.start()) {
@@ -540,7 +524,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
                m.value.push(address.to_sstring());
            }
            return m;
-        });
+        }));
    });

    ss::get_pending_range_to_endpoint_map.set(r, [&ctx](std::unique_ptr<request> req) {
@@ -552,13 +536,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    });

    ss::describe_any_ring.set(r, [&ctx, &ss](std::unique_ptr<request> req) {
-        // Find an arbitrary non-system keyspace.
-        auto keyspaces = ctx.db.local().get_non_local_strategy_keyspaces();
-        if (keyspaces.empty()) {
-            throw std::runtime_error("No keyspace provided and no non system kespace exist");
-        }
-        auto ks = keyspaces[0];
-        return describe_ring_as_json(ss, ks);
+        return describe_ring_as_json(ss, "");
    });

    ss::describe_ring.set(r, [&ctx, &ss](std::unique_ptr<request> req) {
@@ -616,11 +594,11 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
            column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
        }
        return ctx.db.invoke_on_all([keyspace, column_families] (replica::database& db) -> future<> {
-            auto table_ids = boost::copy_range<std::vector<table_id>>(column_families | boost::adaptors::transformed([&] (auto& cf_name) {
+            auto table_ids = boost::copy_range<std::vector<utils::UUID>>(column_families | boost::adaptors::transformed([&] (auto& cf_name) {
                return db.find_uuid(keyspace, cf_name);
            }));
            // major compact smaller tables first, to increase chances of success if low on space.
-            std::ranges::sort(table_ids, std::less<>(), [&] (const table_id& id) {
+            std::ranges::sort(table_ids, std::less<>(), [&] (const utils::UUID& id) {
                return db.find_column_family(id).get_stats().live_disk_space_used;
            });
            // as a table can be dropped during loop below, let's find it before issuing major compaction request.
@@ -646,19 +624,18 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
                        std::runtime_error("Can not perform cleanup operation when topology changes"));
            }
            return ctx.db.invoke_on_all([keyspace, column_families] (replica::database& db) -> future<> {
-                auto table_ids = boost::copy_range<std::vector<table_id>>(column_families | boost::adaptors::transformed([&] (auto& table_name) {
+                auto table_ids = boost::copy_range<std::vector<utils::UUID>>(column_families | boost::adaptors::transformed([&] (auto& table_name) {
                    return db.find_uuid(keyspace, table_name);
                }));
                // cleanup smaller tables first, to increase chances of success if low on space.
-                std::ranges::sort(table_ids, std::less<>(), [&] (const table_id& id) {
+                std::ranges::sort(table_ids, std::less<>(), [&] (const utils::UUID& id) {
                    return db.find_column_family(id).get_stats().live_disk_space_used;
                });
                auto& cm = db.get_compaction_manager();
-                auto owned_ranges_ptr = compaction::make_owned_ranges_ptr(db.get_keyspace_local_ranges(keyspace));
                // as a table can be dropped during loop below, let's find it before issuing the cleanup request.
                for (auto& id : table_ids) {
                    replica::table& t = db.find_column_family(id);
-                    co_await cm.perform_cleanup(owned_ranges_ptr, t.as_table_state());
+                    co_await cm.perform_cleanup(db, &t);
                }
                co_return;
            }).then([]{
@@ -682,11 +659,10 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        bool exclude_current_version = req_param<bool>(*req, "exclude_current_version", false);

        return ctx.db.invoke_on_all([=] (replica::database& db) {
-            auto owned_ranges_ptr = compaction::make_owned_ranges_ptr(db.get_keyspace_local_ranges(keyspace));
            return do_for_each(column_families, [=, &db](sstring cfname) {
                auto& cm = db.get_compaction_manager();
                auto& cf = db.find_column_family(keyspace, cfname);
-                return cm.perform_sstable_upgrade(owned_ranges_ptr, cf.as_table_state(), exclude_current_version);
+                return cm.perform_sstable_upgrade(db, &cf, exclude_current_version);
            });
        }).then([]{
            return make_ready_future<json::json_return_type>(0);
@@ -696,11 +672,11 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    ss::force_keyspace_flush.set(r, [&ctx](std::unique_ptr<request> req) -> future<json::json_return_type> {
        auto keyspace = validate_keyspace(ctx, req->param);
        auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
-        auto& db = ctx.db;
+        auto &db = ctx.db.local();
        if (column_families.empty()) {
-            co_await replica::database::flush_keyspace_on_all_shards(db, keyspace);
+            co_await db.flush_on_all(keyspace);
        } else {
-            co_await replica::database::flush_tables_on_all_shards(db, keyspace, std::move(column_families));
+            co_await db.flush_on_all(keyspace, std::move(column_families));
        }
        co_return json_void();
    });
@@ -720,23 +696,20 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    });

    ss::remove_node.set(r, [&ss](std::unique_ptr<request> req) {
-        auto host_id = validate_host_id(req->get_query_param("host_id"));
+        auto host_id = req->get_query_param("host_id");
        std::vector<sstring> ignore_nodes_strs= split(req->get_query_param("ignore_nodes"), ",");
-        auto ignore_nodes = std::list<locator::host_id_or_endpoint>();
+        auto ignore_nodes = std::list<gms::inet_address>();
        for (std::string n : ignore_nodes_strs) {
            try {
                std::replace(n.begin(), n.end(), '\"', ' ');
                std::replace(n.begin(), n.end(), '\'', ' ');
                boost::trim_all(n);
                if (!n.empty()) {
-                    auto hoep = locator::host_id_or_endpoint(n);
-                    if (!ignore_nodes.empty() && hoep.has_host_id() != ignore_nodes.front().has_host_id()) {
-                        throw std::runtime_error("All nodes should be identified using the same method: either Host IDs or ip addresses.");
-                    }
-                    ignore_nodes.push_back(std::move(hoep));
+                    auto node = gms::inet_address(n);
+                    ignore_nodes.push_back(node);
                }
            } catch (...) {
-                throw std::runtime_error(format("Failed to parse ignore_nodes parameter: ignore_nodes={}, node={}: {}", ignore_nodes_strs, n, std::current_exception()));
+                throw std::runtime_error(format("Failed to parse ignore_nodes parameter: ignore_nodes={}, node={}", ignore_nodes_strs, n));
            }
        }
        return ss.local().removenode(host_id, std::move(ignore_nodes)).then([] {
@@ -777,13 +750,13 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_

    ss::get_operation_mode.set(r, [&ss](std::unique_ptr<request> req) {
        return ss.local().get_operation_mode().then([] (auto mode) {
-            return make_ready_future<json::json_return_type>(format("{}", mode));
+            return make_ready_future<json::json_return_type>(mode);
        });
    });

    ss::is_starting.set(r, [&ss](std::unique_ptr<request> req) {
-        return ss.local().get_operation_mode().then([] (auto mode) {
-            return make_ready_future<json::json_return_type>(mode <= service::storage_service::mode::STARTING);
+        return ss.local().is_starting().then([] (auto starting) {
+            return make_ready_future<json::json_return_type>(starting);
        });
    });

@@ -812,13 +785,22 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    ss::get_keyspaces.set(r, [&ctx](const_req req) {
        auto type = req.get_query_param("type");
        if (type == "user") {
-            return ctx.db.local().get_user_keyspaces();
+            return ctx.db.local().get_non_system_keyspaces();
        } else if (type == "non_local_strategy") {
-            return ctx.db.local().get_non_local_strategy_keyspaces();
+            return map_keys(ctx.db.local().get_keyspaces() | boost::adaptors::filtered([](const auto& p) {
+                return p.second.get_replication_strategy().get_type() != locator::replication_strategy_type::local;
+            }));
        }
        return map_keys(ctx.db.local().get_keyspaces());
    });

+    ss::update_snitch.set(r, [](std::unique_ptr<request> req) {
+        auto ep_snitch_class_name = req->get_query_param("ep_snitch_class_name");
+        return locator::i_endpoint_snitch::reset_snitch(ep_snitch_class_name).then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
+    });
+
    ss::stop_gossiping.set(r, [&ss](std::unique_ptr<request> req) {
        return ss.local().stop_gossiping().then([] {
            return make_ready_future<json::json_return_type>(json_void());
@@ -844,13 +826,9 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        return make_ready_future<json::json_return_type>(json_void());
    });

-    ss::is_initialized.set(r, [&ss, &g](std::unique_ptr<request> req) {
-        return ss.local().get_operation_mode().then([&g] (auto mode) {
-            bool is_initialized = mode >= service::storage_service::mode::STARTING;
-            if (mode == service::storage_service::mode::NORMAL) {
-                is_initialized = g.is_enabled();
-            }
-            return make_ready_future<json::json_return_type>(is_initialized);
+    ss::is_initialized.set(r, [&ss](std::unique_ptr<request> req) {
+        return ss.local().is_initialized().then([] (bool initialized) {
+            return make_ready_future<json::json_return_type>(initialized);
        });
    });

@@ -859,9 +837,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    });

    ss::is_joined.set(r, [&ss] (std::unique_ptr<request> req) {
-        return ss.local().get_operation_mode().then([] (auto mode) {
-            return make_ready_future<json::json_return_type>(mode >= service::storage_service::mode::JOINING);
-        });
+        return make_ready_future<json::json_return_type>(ss.local().is_joined());
    });

    ss::set_stream_throughput_mb_per_sec.set(r, [](std::unique_ptr<request> req) {
@@ -960,11 +936,11 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        return make_ready_future<json::json_return_type>(res);
    });

-    ss::reset_local_schema.set(r, [&sys_ks](std::unique_ptr<request> req) {
+    ss::reset_local_schema.set(r, [](std::unique_ptr<request> req) {
        // FIXME: We should truncate schema tables if more than one node in the cluster.
        auto& sp = service::get_storage_proxy();
        auto& fs = sp.local().features();
-        return db::schema_tables::recalculate_schema_version(sys_ks, sp, fs).then([] {
+        return db::schema_tables::recalculate_schema_version(sp, fs).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
    });
@@ -1030,18 +1006,18 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        }
    });

-    ss::enable_auto_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
+    ss::enable_auto_compaction.set(r, [&ctx, &ss](std::unique_ptr<request> req) {
        auto keyspace = validate_keyspace(ctx, req->param);
        auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");

-        return set_tables_autocompaction(ctx, keyspace, tables, true);
+        return set_tables_autocompaction(ctx, ss.local(), keyspace, tables, true);
    });

-    ss::disable_auto_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
+    ss::disable_auto_compaction.set(r, [&ctx, &ss](std::unique_ptr<request> req) {
        auto keyspace = validate_keyspace(ctx, req->param);
        auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");

-        return set_tables_autocompaction(ctx, keyspace, tables, false);
+        return set_tables_autocompaction(ctx, ss.local(), keyspace, tables, false);
    });

    ss::deliver_hints.set(r, [](std::unique_ptr<request> req) {
@@ -1190,7 +1166,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
                            ss::sstable info;

                            info.timestamp = t;
-                            info.generation = sstables::generation_value(sstable->generation());
+                            info.generation = sstable->generation();
                            info.level = sstable->get_sstable_level();
                            info.size = sstable->bytes_on_disk();
                            info.data_size = sstable->ondisk_data_size();
@@ -1267,13 +1243,6 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    });
 }

-enum class scrub_status {
-    successful = 0,
-    aborted,
-    unable_to_cancel,   // Not used in Scylla, included to ensure compability with nodetool api.
-    validation_errors,
-};
-
 void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_ctl) {
    ss::get_snapshot_details.set(r, [&snap_ctl](std::unique_ptr<request> req) {
        return snap_ctl.local().get_snapshot_details().then([] (std::unordered_map<sstring, std::vector<db::snapshot_ctl::snapshot_details>>&& result) {
@@ -1312,7 +1281,7 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
        });
    });

-    ss::take_snapshot.set(r, [&ctx, &snap_ctl](std::unique_ptr<request> req) -> future<json::json_return_type> {
+    ss::take_snapshot.set(r, [&snap_ctl](std::unique_ptr<request> req) -> future<json::json_return_type> {
        apilog.info("take_snapshot: {}", req->query_parameters);
        auto tag = req->get_query_param("tag");
        auto column_families = split(req->get_query_param("cf"), ",");
@@ -1330,13 +1299,7 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
                if (keynames.size() > 1) {
                    throw httpd::bad_param_exception("Only one keyspace allowed when specifying a column family");
                }
-                for (const auto& table_name : column_families) {
-                    auto& t = ctx.db.local().find_column_family(keynames[0], table_name);
-                    if (t.schema()->is_view()) {
-                        throw std::invalid_argument("Do not take a snapshot of a materialized view or a secondary index by itself. Run snapshot on the base table instead.");
-                    }
-                }
-                co_await snap_ctl.local().take_column_family_snapshot(keynames[0], column_families, tag, db::snapshot_ctl::snap_views::yes, sf);
+                co_await snap_ctl.local().take_column_family_snapshot(keynames[0], column_families, tag, sf);
            }
            co_return json_void();
        } catch (...) {
@@ -1366,29 +1329,17 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
        });
    });

-    ss::scrub.set(r, [&ctx, &snap_ctl] (std::unique_ptr<request> req) {
-        auto rp = req_params({
-            {"keyspace", {mandatory::yes}},
-            {"cf", {""}},
-            {"scrub_mode", {}},
-            {"skip_corrupted", {}},
-            {"disable_snapshot", {}},
-            {"quarantine_mode", {}},
-        });
-        rp.process(*req);
-        auto keyspace = validate_keyspace(ctx, *rp.get("keyspace"));
-        auto column_families = parse_tables(keyspace, ctx, *rp.get("cf"));
-        auto scrub_mode_opt = rp.get("scrub_mode");
+    ss::scrub.set(r, wrap_ks_cf(ctx, [&snap_ctl] (http_context& ctx, std::unique_ptr<request> req, sstring keyspace, std::vector<sstring> column_families) {
        auto scrub_mode = sstables::compaction_type_options::scrub::mode::abort;

-        if (!scrub_mode_opt) {
-            const auto skip_corrupted = rp.get_as<bool>("skip_corrupted").value_or(false);
+        const sstring scrub_mode_str = req_param<sstring>(*req, "scrub_mode", "");
+        if (scrub_mode_str == "") {
+            const auto skip_corrupted = req_param<bool>(*req, "skip_corrupted", false);

            if (skip_corrupted) {
                scrub_mode = sstables::compaction_type_options::scrub::mode::skip;
            }
        } else {
-            auto scrub_mode_str = *scrub_mode_opt;
            if (scrub_mode_str == "ABORT") {
                scrub_mode = sstables::compaction_type_options::scrub::mode::abort;
            } else if (scrub_mode_str == "SKIP") {
@@ -1398,7 +1349,7 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
            } else if (scrub_mode_str == "VALIDATE") {
                scrub_mode = sstables::compaction_type_options::scrub::mode::validate;
            } else {
-                throw httpd::bad_param_exception(fmt::format("Unknown argument for 'scrub_mode' parameter: {}", scrub_mode_str));
+                throw std::invalid_argument(fmt::format("Unknown argument for 'scrub_mode' parameter: {}", scrub_mode_str));
            }
        }

@@ -1406,10 +1357,7 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
        if (!req_param<bool>(*req, "disable_snapshot", false)) {
            auto tag = format("pre-scrub-{:d}", db_clock::now().time_since_epoch().count());
            f = parallel_for_each(column_families, [&snap_ctl, keyspace, tag](sstring cf) {
-                // We always pass here db::snapshot_ctl::snap_views::no since:
-                // 1. When scrubbing particular tables, there's no need to auto-snapshot their views.
-                // 2. When scrubbing the whole keyspace, column_families will contain both base tables and views.
-                return snap_ctl.local().take_column_family_snapshot(keyspace, cf, tag, db::snapshot_ctl::snap_views::no, db::snapshot_ctl::skip_flush::no);
+                return snap_ctl.local().take_column_family_snapshot(keyspace, cf, tag);
            });
        }

@@ -1424,39 +1372,20 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
        } else if (quarantine_mode_str == "ONLY") {
            opts.quarantine_operation_mode = sstables::compaction_type_options::scrub::quarantine_mode::only;
        } else {
-            throw httpd::bad_param_exception(fmt::format("Unknown argument for 'quarantine_mode' parameter: {}", quarantine_mode_str));
+            throw std::invalid_argument(fmt::format("Unknown argument for 'quarantine_mode' parameter: {}", quarantine_mode_str));
        }
-
-        const auto& reduce_compaction_stats = [] (const compaction_manager::compaction_stats_opt& lhs, const compaction_manager::compaction_stats_opt& rhs) {
-            sstables::compaction_stats stats{};
-            stats += lhs.value();
-            stats += rhs.value();
-            return stats;
-        };
-
-        return f.then([&ctx, keyspace, column_families, opts, &reduce_compaction_stats] {
-            return ctx.db.map_reduce0([=] (replica::database& db) {
-                return map_reduce(column_families, [=, &db] (sstring cfname) {
+        return f.then([&ctx, keyspace, column_families, opts] {
+            return ctx.db.invoke_on_all([=] (replica::database& db) {
+                return do_for_each(column_families, [=, &db](sstring cfname) {
                    auto& cm = db.get_compaction_manager();
                    auto& cf = db.find_column_family(keyspace, cfname);
-                    return cm.perform_sstable_scrub(cf.as_table_state(), opts);
-                }, std::make_optional(sstables::compaction_stats{}), reduce_compaction_stats);
-            }, std::make_optional(sstables::compaction_stats{}), reduce_compaction_stats);
-        }).then_wrapped([] (auto f) {
-            if (f.failed()) {
-                auto ex = f.get_exception();
-                if (try_catch<sstables::compaction_aborted_exception>(ex)) {
-                    return make_ready_future<json::json_return_type>(static_cast<int>(scrub_status::aborted));
-                } else {
-                    return make_exception_future<json::json_return_type>(std::move(ex));
-                }
-            } else if (f.get()->validation_errors) {
-                return make_ready_future<json::json_return_type>(static_cast<int>(scrub_status::validation_errors));
-            } else {
-                return make_ready_future<json::json_return_type>(static_cast<int>(scrub_status::successful));
-            }
+                    return cm.perform_sstable_scrub(&cf, opts);
+                });
+            });
+        }).then([]{
+            return make_ready_future<json::json_return_type>(0);
        });
-    });
+    }));
 }

 void unset_snapshot(http_context& ctx, routes& r) {
--- a/api/storage_service.hh
+++ b/api/storage_service.hh
@@ -19,7 +19,6 @@ class snapshot_ctl;
 namespace view {
 class view_builder;
 }
-class system_keyspace;
 }
 namespace netw { class messaging_service; }
 class repair_service;
@@ -43,7 +42,7 @@ sstring validate_keyspace(http_context& ctx, const parameters& param);
 // containing the description of the respective no_such_column_family error.
 std::vector<sstring> parse_tables(const sstring& ks_name, http_context& ctx, const std::unordered_map<sstring, sstring>& query_params, sstring param_name);

-void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_service>& ss, gms::gossiper& g, sharded<cdc::generation_service>& cdc_gs, sharded<db::system_keyspace>& sys_ls);
+void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_service>& ss, gms::gossiper& g, sharded<cdc::generation_service>& cdc_gs);
 void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>& sst_loader);
 void unset_sstables_loader(http_context& ctx, routes& r);
 void set_view_builder(http_context& ctx, routes& r, sharded<db::view::view_builder>& vb);
--- a/api/system.cc
+++ b/api/system.cc
@@ -61,16 +61,6 @@ void set_system(http_context& ctx, routes& r) {
        return json::json_void();
    });

-    hs::write_log_message.set(r, [](const_req req) {
-        try {
-            logging::log_level level = boost::lexical_cast<logging::log_level>(std::string(req.get_query_param("level")));
-            apilog.log(level, "/system/log: {}", std::string(req.get_query_param("message")));
-        } catch (boost::bad_lexical_cast& e) {
-            throw bad_param_exception("Unknown logging level " + req.get_query_param("level"));
-        }
-        return json::json_void();
-    });
-
    hs::drop_sstable_caches.set(r, [&ctx](std::unique_ptr<request> req) {
        apilog.info("Dropping sstable caches");
        return ctx.db.invoke_on_all([] (replica::database& db) {
--- a/api/task_manager.cc
+++ b/api/task_manager.cc
@@ -1,164 +0,0 @@
-/*
- * Copyright (C) 2022-present ScyllaDB
- */
-
-/*
- * SPDX-License-Identifier: AGPL-3.0-or-later
- */
-
-#include <seastar/core/coroutine.hh>
-
-#include "task_manager.hh"
-#include "api/api-doc/task_manager.json.hh"
-#include "db/system_keyspace.hh"
-#include "column_family.hh"
-#include "unimplemented.hh"
-#include "storage_service.hh"
-
-#include <utility>
-#include <boost/range/adaptors.hpp>
-
-namespace api {
-
-namespace tm = httpd::task_manager_json;
-using namespace json;
-
-inline bool filter_tasks(tasks::task_manager::task_ptr task, std::unordered_map<sstring, sstring>& query_params) {
-    return (!query_params.contains("keyspace") || query_params["keyspace"] == task->get_status().keyspace) &&
-        (!query_params.contains("table") || query_params["table"] == task->get_status().table);
-}
-
-struct full_task_status {
-    tasks::task_manager::task::status task_status;
-    tasks::task_manager::task::progress progress;
-    std::string module;
-    tasks::task_id parent_id;
-    tasks::is_abortable abortable;
-};
-
-struct task_stats {
-    task_stats(tasks::task_manager::task_ptr task) : task_id(task->id().to_sstring()), state(task->get_status().state) {}
-
-    sstring task_id;
-    tasks::task_manager::task_state state;
-};
-
-tm::task_status make_status(full_task_status status) {
-    auto start_time = db_clock::to_time_t(status.task_status.start_time);
-    auto end_time = db_clock::to_time_t(status.task_status.end_time);
-    ::tm st, et;
-    ::gmtime_r(&end_time, &et);
-    ::gmtime_r(&start_time, &st);
-
-    tm::task_status res{};
-    res.id = status.task_status.id.to_sstring();
-    res.type = status.task_status.type;
-    res.state = status.task_status.state;
-    res.is_abortable = bool(status.abortable);
-    res.start_time = st;
-    res.end_time = et;
-    res.error = status.task_status.error;
-    res.parent_id = status.parent_id.to_sstring();
-    res.sequence_number = status.task_status.sequence_number;
-    res.shard = status.task_status.shard;
-    res.keyspace = status.task_status.keyspace;
-    res.table = status.task_status.table;
-    res.entity = status.task_status.entity;
-    res.progress_units = status.task_status.progress_units;
-    res.progress_total = status.progress.total;
-    res.progress_completed = status.progress.completed;
-    return res;
-}
-
-future<json::json_return_type> retrieve_status(tasks::task_manager::foreign_task_ptr task) {
-    if (task.get() == nullptr) {
-        co_return coroutine::return_exception(httpd::bad_param_exception("Task not found"));
-    }
-    auto progress = co_await task->get_progress();
-    full_task_status s;
-    s.task_status = task->get_status();
-    s.parent_id = task->get_parent_id();
-    s.abortable = task->is_abortable();
-    s.module = task->get_module_name();
-    s.progress.completed = progress.completed;
-    s.progress.total = progress.total;
-    co_return make_status(s);
-}
-
-void set_task_manager(http_context& ctx, routes& r) {
-    tm::get_modules.set(r, [&ctx] (std::unique_ptr<request> req) -> future<json::json_return_type> {
-        std::vector<std::string> v = boost::copy_range<std::vector<std::string>>(ctx.tm.local().get_modules() | boost::adaptors::map_keys);
-        co_return v;
-    });
-
-    tm::get_tasks.set(r, [&ctx] (std::unique_ptr<request> req) -> future<json::json_return_type> {
-        using chunked_stats = utils::chunked_vector<task_stats>;
-        auto internal = tasks::is_internal{req_param<bool>(*req, "internal", false)};
-        std::vector<chunked_stats> res = co_await ctx.tm.map([&req, internal] (tasks::task_manager& tm) {
-            chunked_stats local_res;
-            auto module = tm.find_module(req->param["module"]);
-            const auto& filtered_tasks = module->get_tasks() | boost::adaptors::filtered([&params = req->query_parameters, internal] (const auto& task) {
-                return (internal || !task.second->is_internal()) && filter_tasks(task.second, params);
-            });
-            for (auto& [task_id, task] : filtered_tasks) {
-                local_res.push_back(task_stats{task});
-            }
-            return local_res;
-        });
-
-        std::function<future<>(output_stream<char>&&)> f = [r = std::move(res)] (output_stream<char>&& os) -> future<> {
-            auto s = std::move(os);
-            auto res = std::move(r);
-            co_await s.write("[");
-            std::string delim = "";
-            for (auto& v: res) {
-                for (auto& stats: v) {
-                    co_await s.write(std::exchange(delim, ", "));
-                    tm::task_stats ts;
-                    ts = stats;
-                    co_await formatter::write(s, ts);
-                }
-            }
-            co_await s.write("]");
-            co_await s.close();
-        };
-        co_return std::move(f);
-    });
-
-    tm::get_task_status.set(r, [&ctx] (std::unique_ptr<request> req) -> future<json::json_return_type> {
-        auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
-        auto task = co_await tasks::task_manager::invoke_on_task(ctx.tm, id, std::function([] (tasks::task_manager::task_ptr task) -> future<tasks::task_manager::foreign_task_ptr> {
-            auto state = task->get_status().state;
-            if (state == tasks::task_manager::task_state::done || state == tasks::task_manager::task_state::failed) {
-                task->unregister_task();
-            }
-            co_return std::move(task);
-        }));
-        co_return co_await retrieve_status(std::move(task));
-    });
-
-    tm::abort_task.set(r, [&ctx] (std::unique_ptr<request> req) -> future<json::json_return_type> {
-        auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
-        co_await tasks::task_manager::invoke_on_task(ctx.tm, id, [] (tasks::task_manager::task_ptr task) -> future<> {
-            if (!task->is_abortable()) {
-                co_await coroutine::return_exception(std::runtime_error("Requested task cannot be aborted"));
-            }
-            co_await task->abort();
-        });
-        co_return json_void();
-    });
-
-    tm::wait_task.set(r, [&ctx] (std::unique_ptr<request> req) -> future<json::json_return_type> {
-        auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
-        auto task = co_await tasks::task_manager::invoke_on_task(ctx.tm, id, std::function([] (tasks::task_manager::task_ptr task) {
-            return task->done().then_wrapped([task] (auto f) {
-                task->unregister_task();
-                f.get();
-                return make_foreign(task);
-            });
-        }));
-        co_return co_await retrieve_status(std::move(task));
-    });
-}
-
-}
--- a/api/task_manager.hh
+++ b/api/task_manager.hh
@@ -1,17 +0,0 @@
-/*
- * Copyright (C) 2022-present ScyllaDB
- */
-
-/*
- * SPDX-License-Identifier: AGPL-3.0-or-later
- */
-
-#pragma once
-
-#include "api.hh"
-
-namespace api {
-
-void set_task_manager(http_context& ctx, routes& r);
-
-}
--- a/api/task_manager_test.cc
+++ b/api/task_manager_test.cc
@@ -1,109 +0,0 @@
-/*
- * Copyright (C) 2022-present ScyllaDB
- */
-
-/*
- * SPDX-License-Identifier: AGPL-3.0-or-later
- */
-
-#ifndef SCYLLA_BUILD_MODE_RELEASE
-
-#include <seastar/core/coroutine.hh>
-
-#include "task_manager_test.hh"
-#include "api/api-doc/task_manager_test.json.hh"
-#include "tasks/test_module.hh"
-
-namespace api {
-
-namespace tmt = httpd::task_manager_test_json;
-using namespace json;
-
-void set_task_manager_test(http_context& ctx, routes& r, db::config& cfg) {
-    tmt::register_test_module.set(r, [&ctx] (std::unique_ptr<request> req) -> future<json::json_return_type> {
-        co_await ctx.tm.invoke_on_all([] (tasks::task_manager& tm) {
-            auto m = make_shared<tasks::test_module>(tm);
-            tm.register_module("test", m);
-        });
-        co_return json_void();
-    });
-
-    tmt::unregister_test_module.set(r, [&ctx] (std::unique_ptr<request> req) -> future<json::json_return_type> {
-        co_await ctx.tm.invoke_on_all([] (tasks::task_manager& tm) -> future<> {
-            auto module_name = "test";
-            auto module = tm.find_module(module_name);
-            co_await module->stop();
-        });
-        co_return json_void();
-    });
-
-    tmt::register_test_task.set(r, [&ctx] (std::unique_ptr<request> req) -> future<json::json_return_type> {
-        sharded<tasks::task_manager>& tms = ctx.tm;
-        auto it = req->query_parameters.find("task_id");
-        auto id = it != req->query_parameters.end() ? tasks::task_id{utils::UUID{it->second}} : tasks::task_id::create_null_id();
-        it = req->query_parameters.find("shard");
-        unsigned shard = it != req->query_parameters.end() ? boost::lexical_cast<unsigned>(it->second) : 0;
-        it = req->query_parameters.find("keyspace");
-        std::string keyspace = it != req->query_parameters.end() ? it->second : "";
-        it = req->query_parameters.find("table");
-        std::string table = it != req->query_parameters.end() ? it->second : "";
-        it = req->query_parameters.find("type");
-        std::string type = it != req->query_parameters.end() ? it->second : "";
-        it = req->query_parameters.find("entity");
-        std::string entity = it != req->query_parameters.end() ? it->second : "";
-        it = req->query_parameters.find("parent_id");
-        tasks::task_info data;
-        if (it != req->query_parameters.end()) {
-            data.id = tasks::task_id{utils::UUID{it->second}};
-            auto parent_ptr = co_await tasks::task_manager::lookup_task_on_all_shards(ctx.tm, data.id);
-            data.shard = parent_ptr->get_status().shard;
-        }
-
-        auto module = tms.local().find_module("test");
-        id = co_await module->make_task<tasks::test_task_impl>(shard, id, keyspace, table, type, entity, data);
-        co_await tms.invoke_on(shard, [id] (tasks::task_manager& tm) {
-            auto it = tm.get_all_tasks().find(id);
-            if (it != tm.get_all_tasks().end()) {
-                it->second->start();
-            }
-        });
-        co_return id.to_sstring();
-    });
-
-    tmt::unregister_test_task.set(r, [&ctx] (std::unique_ptr<request> req) -> future<json::json_return_type> {
-        auto id = tasks::task_id{utils::UUID{req->query_parameters["task_id"]}};
-        co_await tasks::task_manager::invoke_on_task(ctx.tm, id, [] (tasks::task_manager::task_ptr task) -> future<> {
-            tasks::test_task test_task{task};
-            co_await test_task.unregister_task();
-        });
-        co_return json_void();
-    });
-
-    tmt::finish_test_task.set(r, [&ctx] (std::unique_ptr<request> req) -> future<json::json_return_type> {
-        auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
-        auto it = req->query_parameters.find("error");
-        bool fail = it != req->query_parameters.end();
-        std::string error = fail ? it->second : "";
-
-        co_await tasks::task_manager::invoke_on_task(ctx.tm, id, [fail, error = std::move(error)] (tasks::task_manager::task_ptr task) {
-            tasks::test_task test_task{task};
-            if (fail) {
-                test_task.finish_failed(std::make_exception_ptr(std::runtime_error(error)));
-            } else {
-                test_task.finish();
-            }
-            return make_ready_future<>();
-        });
-        co_return json_void();
-    });
-
-    tmt::get_and_update_ttl.set(r, [&ctx, &cfg] (std::unique_ptr<request> req) -> future<json::json_return_type> {
-        uint32_t ttl = cfg.task_ttl_seconds();
-        cfg.task_ttl_seconds.set(boost::lexical_cast<uint32_t>(req->query_parameters["ttl"]));
-        co_return json::json_return_type(ttl);
-    });
-}
-
-}
-
-#endif
--- a/api/task_manager_test.hh
+++ b/api/task_manager_test.hh
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2022-present ScyllaDB
- */
-
-/*
- * SPDX-License-Identifier: AGPL-3.0-or-later
- */
-
-#ifndef SCYLLA_BUILD_MODE_RELEASE
-
-#pragma once
-
-#include "api.hh"
-#include "db/config.hh"
-
-namespace api {
-
-void set_task_manager_test(http_context& ctx, routes& r, db::config& cfg);
-
-}
-
-#endif
--- a/auth/authenticated_user.cc
+++ b/auth/authenticated_user.cc
@@ -1,3 +1,6 @@
+/*
+ */
+
 /*
 * Copyright (C) 2016-present ScyllaDB
 *
--- a/auth/authenticated_user.hh
+++ b/auth/authenticated_user.hh
@@ -1,3 +1,6 @@
+/*
+ */
+
 /*
 * Copyright (C) 2016-present ScyllaDB
 *
--- a/auth/authenticator.cc
+++ b/auth/authenticator.cc
@@ -1,3 +1,6 @@
+/*
+ */
+
 /*
 * Copyright (C) 2016-present ScyllaDB
 *
--- a/auth/authenticator.hh
+++ b/auth/authenticator.hh
@@ -1,3 +1,6 @@
+/*
+ */
+
 /*
 * Copyright (C) 2016-present ScyllaDB
 *
--- a/auth/authorizer.hh
+++ b/auth/authorizer.hh
@@ -1,3 +1,6 @@
+/*
+ */
+
 /*
 * Copyright (C) 2016-present ScyllaDB
 *
--- a/auth/default_authorizer.cc
+++ b/auth/default_authorizer.cc
@@ -1,3 +1,6 @@
+/*
+ */
+
 /*
 * Copyright (C) 2016-present ScyllaDB
 *
@@ -74,7 +77,7 @@ future<bool> default_authorizer::any_granted() const {
            query,
            db::consistency_level::LOCAL_ONE,
            {},
-            cql3::query_processor::cache_internal::yes).then([this](::shared_ptr<cql3::untyped_result_set> results) {
+            true).then([this](::shared_ptr<cql3::untyped_result_set> results) {
        return !results->empty();
    });
 }
@@ -85,8 +88,7 @@ future<> default_authorizer::migrate_legacy_metadata() const {

    return _qp.execute_internal(
            query,
-            db::consistency_level::LOCAL_ONE,
-            cql3::query_processor::cache_internal::no).then([this](::shared_ptr<cql3::untyped_result_set> results) {
+            db::consistency_level::LOCAL_ONE).then([this](::shared_ptr<cql3::untyped_result_set> results) {
        return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
            return do_with(
                    row.get_as<sstring>("username"),
@@ -166,8 +168,7 @@ default_authorizer::authorize(const role_or_anonymous& maybe_role, const resourc
    return _qp.execute_internal(
            query,
            db::consistency_level::LOCAL_ONE,
-            {*maybe_role.name, r.name()},
-            cql3::query_processor::cache_internal::yes).then([](::shared_ptr<cql3::untyped_result_set> results) {
+            {*maybe_role.name, r.name()}).then([](::shared_ptr<cql3::untyped_result_set> results) {
        if (results->empty()) {
            return permissions::NONE;
        }
@@ -196,8 +197,7 @@ default_authorizer::modify(
                query,
                db::consistency_level::ONE,
                internal_distributed_query_state(),
-                {permissions::to_strings(set), sstring(role_name), resource.name()},
-                cql3::query_processor::cache_internal::no).discard_result();
+                {permissions::to_strings(set), sstring(role_name), resource.name()}).discard_result();
    });
 }

@@ -223,7 +223,7 @@ future<std::vector<permission_details>> default_authorizer::list_all() const {
            db::consistency_level::ONE,
            internal_distributed_query_state(),
            {},
-            cql3::query_processor::cache_internal::yes).then([](::shared_ptr<cql3::untyped_result_set> results) {
+            true).then([](::shared_ptr<cql3::untyped_result_set> results) {
        std::vector<permission_details> all_details;

        for (const auto& row : *results) {
@@ -249,8 +249,7 @@ future<> default_authorizer::revoke_all(std::string_view role_name) const {
            query,
            db::consistency_level::ONE,
            internal_distributed_query_state(),
-            {sstring(role_name)},
-            cql3::query_processor::cache_internal::no).discard_result().handle_exception([role_name](auto ep) {
+            {sstring(role_name)}).discard_result().handle_exception([role_name](auto ep) {
        try {
            std::rethrow_exception(ep);
        } catch (exceptions::request_execution_exception& e) {
@@ -269,8 +268,7 @@ future<> default_authorizer::revoke_all(const resource& resource) const {
    return _qp.execute_internal(
            query,
            db::consistency_level::LOCAL_ONE,
-            {resource.name()},
-            cql3::query_processor::cache_internal::no).then_wrapped([this, resource](future<::shared_ptr<cql3::untyped_result_set>> f) {
+            {resource.name()}).then_wrapped([this, resource](future<::shared_ptr<cql3::untyped_result_set>> f) {
        try {
            auto res = f.get0();
            return parallel_for_each(
@@ -286,8 +284,7 @@ future<> default_authorizer::revoke_all(const resource& resource) const {
                return _qp.execute_internal(
                        query,
                        db::consistency_level::LOCAL_ONE,
-                        {r.get_as<sstring>(ROLE_NAME), resource.name()},
-                        cql3::query_processor::cache_internal::no).discard_result().handle_exception(
+                        {r.get_as<sstring>(ROLE_NAME), resource.name()}).discard_result().handle_exception(
                                [resource](auto ep) {
                    try {
                        std::rethrow_exception(ep);
--- a/auth/default_authorizer.hh
+++ b/auth/default_authorizer.hh
@@ -1,3 +1,6 @@
+/*
+ */
+
 /*
 * Copyright (C) 2016-present ScyllaDB
 *
--- a/auth/password_authenticator.cc
+++ b/auth/password_authenticator.cc
@@ -1,3 +1,6 @@
+/*
+ */
+
 /*
 * Copyright (C) 2016-present ScyllaDB
 *
@@ -84,8 +87,7 @@ future<> password_authenticator::migrate_legacy_metadata() const {
    return _qp.execute_internal(
            query,
            db::consistency_level::QUORUM,
-            internal_distributed_query_state(),
-            cql3::query_processor::cache_internal::no).then([this](::shared_ptr<cql3::untyped_result_set> results) {
+            internal_distributed_query_state()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
        return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
            auto username = row.get_as<sstring>("username");
            auto salted_hash = row.get_as<sstring>(SALTED_HASH);
@@ -94,8 +96,7 @@ future<> password_authenticator::migrate_legacy_metadata() const {
                    update_row_query(),
                    consistency_for_user(username),
                    internal_distributed_query_state(),
-                    {std::move(salted_hash), username},
-                    cql3::query_processor::cache_internal::no).discard_result();
+                    {std::move(salted_hash), username}).discard_result();
        }).finally([results] {});
    }).then([] {
       plogger.info("Finished migrating legacy authentication metadata.");
@@ -112,8 +113,7 @@ future<> password_authenticator::create_default_if_missing() const {
                    update_row_query(),
                    db::consistency_level::QUORUM,
                    internal_distributed_query_state(),
-                    {passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt), DEFAULT_USER_NAME},
-                    cql3::query_processor::cache_internal::no).then([](auto&&) {
+                    {passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt), DEFAULT_USER_NAME}).then([](auto&&) {
                plogger.info("Created default superuser authentication record.");
            });
        }
@@ -211,7 +211,7 @@ future<authenticated_user> password_authenticator::authenticate(
                consistency_for_user(username),
                internal_distributed_query_state(),
                {username},
-                cql3::query_processor::cache_internal::yes);
+                true);
    }).then_wrapped([=](future<::shared_ptr<cql3::untyped_result_set>> f) {
        try {
            auto res = f.get0();
@@ -244,8 +244,7 @@ future<> password_authenticator::create(std::string_view role_name, const authen
            update_row_query(),
            consistency_for_user(role_name),
            internal_distributed_query_state(),
-            {passwords::hash(*options.password, rng_for_salt), sstring(role_name)},
-            cql3::query_processor::cache_internal::no).discard_result();
+            {passwords::hash(*options.password, rng_for_salt), sstring(role_name)}).discard_result();
 }

 future<> password_authenticator::alter(std::string_view role_name, const authentication_options& options) const {
@@ -262,8 +261,7 @@ future<> password_authenticator::alter(std::string_view role_name, const authent
            query,
            consistency_for_user(role_name),
            internal_distributed_query_state(),
-            {passwords::hash(*options.password, rng_for_salt), sstring(role_name)},
-            cql3::query_processor::cache_internal::no).discard_result();
+            {passwords::hash(*options.password, rng_for_salt), sstring(role_name)}).discard_result();
 }

 future<> password_authenticator::drop(std::string_view name) const {
@@ -275,8 +273,7 @@ future<> password_authenticator::drop(std::string_view name) const {
    return _qp.execute_internal(
            query, consistency_for_user(name),
            internal_distributed_query_state(),
-            {sstring(name)},
-            cql3::query_processor::cache_internal::no).discard_result();
+            {sstring(name)}).discard_result();
 }

 future<custom_options> password_authenticator::query_custom_options(std::string_view role_name) const {
--- a/auth/password_authenticator.hh
+++ b/auth/password_authenticator.hh
@@ -1,3 +1,6 @@
+/*
+ */
+
 /*
 * Copyright (C) 2016-present ScyllaDB
 *
--- a/auth/permission.cc
+++ b/auth/permission.cc
@@ -1,3 +1,6 @@
+/*
+ */
+
 /*
 * Copyright (C) 2016-present ScyllaDB
 *
--- a/auth/permission.hh
+++ b/auth/permission.hh
@@ -1,3 +1,6 @@
+/*
+ */
+
 /*
 * Copyright (C) 2016-present ScyllaDB
 *
--- a/auth/permissions_cache.cc
+++ b/auth/permissions_cache.cc
@@ -14,21 +14,13 @@

 namespace auth {

-permissions_cache::permissions_cache(const utils::loading_cache_config& c, service& ser, logging::logger& log)
-        : _cache(c, log, [&ser, &log](const key_type& k) {
+permissions_cache::permissions_cache(const permissions_cache_config& c, service& ser, logging::logger& log)
+        : _cache(c.max_entries, c.validity_period, c.update_period, log, [&ser, &log](const key_type& k) {
              log.debug("Refreshing permissions for {}", k.first);
              return ser.get_uncached_permissions(k.first, k.second);
          }) {
 }

-bool permissions_cache::update_config(utils::loading_cache_config c) {
-    return _cache.update_config(std::move(c));
-}
-
-void permissions_cache::reset() {
-    _cache.reset();
-}
-
 future<permission_set> permissions_cache::get(const role_or_anonymous& maybe_role, const resource& r) {
    return do_with(key_type(maybe_role, r), [this](const auto& k) {
        return _cache.get(k);
--- a/auth/permissions_cache.hh
+++ b/auth/permissions_cache.hh
@@ -44,6 +44,12 @@ namespace auth {

 class service;

+struct permissions_cache_config final {
+    std::size_t max_entries;
+    std::chrono::milliseconds validity_period;
+    std::chrono::milliseconds update_period;
+};
+
 class permissions_cache final {
    using cache_type = utils::loading_cache<
            std::pair<role_or_anonymous, resource>,
@@ -58,14 +64,12 @@ class permissions_cache final {
    cache_type _cache;

 public:
-    explicit permissions_cache(const utils::loading_cache_config&, service&, logging::logger&);
+    explicit permissions_cache(const permissions_cache_config&, service&, logging::logger&);

    future <> stop() {
        return _cache.stop();
    }

-    bool update_config(utils::loading_cache_config);
-    void reset();
    future<permission_set> get(const role_or_anonymous&, const resource&);
 };

--- a/auth/resource.cc
+++ b/auth/resource.cc
@@ -1,3 +1,6 @@
+/*
+ */
+
 /*
 * Copyright (C) 2016-present ScyllaDB
 *
--- a/auth/resource.hh
+++ b/auth/resource.hh
@@ -1,3 +1,6 @@
+/*
+ */
+
 /*
 * Copyright (C) 2016-present ScyllaDB
 *
--- a/auth/roles-metadata.cc
+++ b/auth/roles-metadata.cc
@@ -56,14 +56,14 @@ future<bool> default_role_row_satisfies(
                query,
                db::consistency_level::ONE,
                {meta::DEFAULT_SUPERUSER_NAME},
-                cql3::query_processor::cache_internal::yes).then([&qp, &p](::shared_ptr<cql3::untyped_result_set> results) {
+                true).then([&qp, &p](::shared_ptr<cql3::untyped_result_set> results) {
            if (results->empty()) {
                return qp.execute_internal(
                        query,
                        db::consistency_level::QUORUM,
                        internal_distributed_query_state(),
                        {meta::DEFAULT_SUPERUSER_NAME},
-                        cql3::query_processor::cache_internal::yes).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
+                        true).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
                    if (results->empty()) {
                        return make_ready_future<bool>(false);
                    }
@@ -86,8 +86,7 @@ future<bool> any_nondefault_role_row_satisfies(
        return qp.execute_internal(
                query,
                db::consistency_level::QUORUM,
-                internal_distributed_query_state(),
-                cql3::query_processor::cache_internal::no).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
+                internal_distributed_query_state()).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
            if (results->empty()) {
                return false;
            }
--- a/auth/sasl_challenge.cc
+++ b/auth/sasl_challenge.cc
@@ -1,3 +1,6 @@
+/*
+ */
+
 /*
 * Copyright (C) 2019-present ScyllaDB
 *
--- a/auth/sasl_challenge.hh
+++ b/auth/sasl_challenge.hh
@@ -1,3 +1,6 @@
+/*
+ */
+
 /*
 * Copyright (C) 2019-present ScyllaDB
 *
--- a/auth/service.cc
+++ b/auth/service.cc
@@ -22,7 +22,6 @@
 #include "auth/role_or_anonymous.hh"
 #include "cql3/query_processor.hh"
 #include "cql3/untyped_result_set.hh"
-#include "db/config.hh"
 #include "db/consistency_level_type.hh"
 #include "exceptions/exceptions.hh"
 #include "log.hh"
@@ -101,28 +100,23 @@ static future<> validate_role_exists(const service& ser, std::string_view role_n
 }

 service::service(
-        utils::loading_cache_config c,
+        permissions_cache_config c,
        cql3::query_processor& qp,
        ::service::migration_notifier& mn,
        std::unique_ptr<authorizer> z,
        std::unique_ptr<authenticator> a,
        std::unique_ptr<role_manager> r)
-            : _loading_cache_config(std::move(c))
+            : _permissions_cache_config(std::move(c))
            , _permissions_cache(nullptr)
            , _qp(qp)
            , _mnotifier(mn)
            , _authorizer(std::move(z))
            , _authenticator(std::move(a))
            , _role_manager(std::move(r))
-            , _migration_listener(std::make_unique<auth_migration_listener>(*_authorizer))
-            , _permissions_cache_cfg_cb([this] (uint32_t) { (void) _permissions_cache_config_action.trigger_later(); })
-            , _permissions_cache_config_action([this] { update_cache_config(); return make_ready_future<>(); })
-            , _permissions_cache_max_entries_observer(_qp.db().get_config().permissions_cache_max_entries.observe(_permissions_cache_cfg_cb))
-            , _permissions_cache_update_interval_in_ms_observer(_qp.db().get_config().permissions_update_interval_in_ms.observe(_permissions_cache_cfg_cb))
-            , _permissions_cache_validity_in_ms_observer(_qp.db().get_config().permissions_validity_in_ms.observe(_permissions_cache_cfg_cb)) {}
+            , _migration_listener(std::make_unique<auth_migration_listener>(*_authorizer)) {}

 service::service(
-        utils::loading_cache_config c,
+        permissions_cache_config c,
        cql3::query_processor& qp,
        ::service::migration_notifier& mn,
        ::service::migration_manager& mm,
@@ -166,7 +160,7 @@ future<> service::start(::service::migration_manager& mm) {
            return when_all_succeed(_authorizer->start(), _authenticator->start()).discard_result();
        });
    }).then([this] {
-        _permissions_cache = std::make_unique<permissions_cache>(_loading_cache_config, *this, log);
+        _permissions_cache = std::make_unique<permissions_cache>(_permissions_cache_config, *this, log);
    }).then([this] {
        return once_among_shards([this] {
            _mnotifier.register_listener(_migration_listener.get());
@@ -188,24 +182,6 @@ future<> service::stop() {
    });
 }

-void service::update_cache_config() {
-    auto db = _qp.db();
-
-    utils::loading_cache_config perm_cache_config;
-    perm_cache_config.max_size = db.get_config().permissions_cache_max_entries();
-    perm_cache_config.expiry = std::chrono::milliseconds(db.get_config().permissions_validity_in_ms());
-    perm_cache_config.refresh = std::chrono::milliseconds(db.get_config().permissions_update_interval_in_ms());
-
-    if (!_permissions_cache->update_config(std::move(perm_cache_config))) {
-        log.error("Failed to apply permissions cache changes. Please read the documentation of these parameters");
-    }
-}
-
-void service::reset_authorization_cache() {
-    _permissions_cache->reset();
-    _qp.reset_cache();
-}
-
 future<bool> service::has_existing_legacy_users() const {
    if (!_qp.db().has_schema(meta::AUTH_KS, meta::USERS_CF)) {
        return make_ready_future<bool>(false);
@@ -227,7 +203,7 @@ future<bool> service::has_existing_legacy_users() const {
            default_user_query,
            db::consistency_level::ONE,
            {meta::DEFAULT_SUPERUSER_NAME},
-            cql3::query_processor::cache_internal::yes).then([this](auto results) {
+            true).then([this](auto results) {
        if (!results->empty()) {
            return make_ready_future<bool>(true);
        }
@@ -236,15 +212,14 @@ future<bool> service::has_existing_legacy_users() const {
                default_user_query,
                db::consistency_level::QUORUM,
                {meta::DEFAULT_SUPERUSER_NAME},
-                cql3::query_processor::cache_internal::yes).then([this](auto results) {
+                true).then([this](auto results) {
            if (!results->empty()) {
                return make_ready_future<bool>(true);
            }

            return _qp.execute_internal(
                    all_users_query,
-                    db::consistency_level::QUORUM,
-                    cql3::query_processor::cache_internal::no).then([](auto results) {
+                    db::consistency_level::QUORUM).then([](auto results) {
                return make_ready_future<bool>(!results->empty());
            });
        });
--- a/auth/service.hh
+++ b/auth/service.hh
@@ -23,8 +23,6 @@
 #include "auth/permissions_cache.hh"
 #include "auth/role_manager.hh"
 #include "seastarx.hh"
-#include "utils/observable.hh"
-#include "utils/serialized_action.hh"

 namespace cql3 {
 class query_processor;
@@ -70,7 +68,7 @@ public:
 /// peering_sharded_service inheritance is needed to be able to access shard local authentication service
 /// given an object from another shard. Used for bouncing lwt requests to correct shard.
 class service final : public seastar::peering_sharded_service<service> {
-    utils::loading_cache_config _loading_cache_config;
+    permissions_cache_config _permissions_cache_config;
    std::unique_ptr<permissions_cache> _permissions_cache;

    cql3::query_processor& _qp;
@@ -86,16 +84,9 @@ class service final : public seastar::peering_sharded_service<service> {
    // Only one of these should be registered, so we end up with some unused instances. Not the end of the world.
    std::unique_ptr<::service::migration_listener> _migration_listener;

-    std::function<void(uint32_t)> _permissions_cache_cfg_cb;
-    serialized_action _permissions_cache_config_action;
-
-    utils::observer<uint32_t> _permissions_cache_max_entries_observer;
-    utils::observer<uint32_t> _permissions_cache_update_interval_in_ms_observer;
-    utils::observer<uint32_t> _permissions_cache_validity_in_ms_observer;
-
 public:
    service(
-            utils::loading_cache_config,
+            permissions_cache_config,
            cql3::query_processor&,
            ::service::migration_notifier&,
            std::unique_ptr<authorizer>,
@@ -108,7 +99,7 @@ public:
    /// of the instances themselves.
    ///
    service(
-            utils::loading_cache_config,
+            permissions_cache_config,
            cql3::query_processor&,
            ::service::migration_notifier&,
            ::service::migration_manager&,
@@ -118,10 +109,6 @@ public:

    future<> stop();

-    void update_cache_config();
-
-    void reset_authorization_cache();
-
    ///
    /// \returns an exceptional future with \ref nonexistant_role if the named role does not exist.
    ///
--- a/auth/standard_role_manager.cc
+++ b/auth/standard_role_manager.cc
@@ -95,7 +95,7 @@ static future<std::optional<record>> find_record(cql3::query_processor& qp, std:
            consistency_for_role(role_name),
            internal_distributed_query_state(),
            {sstring(role_name)},
-            cql3::query_processor::cache_internal::yes).then([](::shared_ptr<cql3::untyped_result_set> results) {
+            true).then([](::shared_ptr<cql3::untyped_result_set> results) {
        if (results->empty()) {
            return std::optional<record>();
        }
@@ -178,8 +178,7 @@ future<> standard_role_manager::create_default_role_if_missing() const {
                    query,
                    db::consistency_level::QUORUM,
                    internal_distributed_query_state(),
-                    {meta::DEFAULT_SUPERUSER_NAME},
-                    cql3::query_processor::cache_internal::no).then([](auto&&) {
+                    {meta::DEFAULT_SUPERUSER_NAME}).then([](auto&&) {
                log.info("Created default superuser role '{}'.", meta::DEFAULT_SUPERUSER_NAME);
                return make_ready_future<>();
            });
@@ -205,8 +204,7 @@ future<> standard_role_manager::migrate_legacy_metadata() const {
    return _qp.execute_internal(
            query,
            db::consistency_level::QUORUM,
-            internal_distributed_query_state(),
-            cql3::query_processor::cache_internal::no).then([this](::shared_ptr<cql3::untyped_result_set> results) {
+            internal_distributed_query_state()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
        return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
            role_config config;
            config.is_superuser = row.get_or<bool>("super", false);
@@ -269,7 +267,7 @@ future<> standard_role_manager::create_or_replace(std::string_view role_name, co
            consistency_for_role(role_name),
            internal_distributed_query_state(),
            {sstring(role_name), c.is_superuser, c.can_login},
-            cql3::query_processor::cache_internal::yes).discard_result();
+            true).discard_result();
 }

 future<>
@@ -311,8 +309,7 @@ standard_role_manager::alter(std::string_view role_name, const role_config_updat
                        meta::roles_table::role_col_name),
                consistency_for_role(role_name),
                internal_distributed_query_state(),
-                {sstring(role_name)},
-                cql3::query_processor::cache_internal::no).discard_result();
+                {sstring(role_name)}).discard_result();
    });
 }

@@ -331,8 +328,7 @@ future<> standard_role_manager::drop(std::string_view role_name) {
                    query,
                    consistency_for_role(role_name),
                    internal_distributed_query_state(),
-                    {sstring(role_name)},
-                    cql3::query_processor::cache_internal::no).then([this, role_name](::shared_ptr<cql3::untyped_result_set> members) {
+                    {sstring(role_name)}).then([this, role_name](::shared_ptr<cql3::untyped_result_set> members) {
                return parallel_for_each(
                        members->begin(),
                        members->end(),
@@ -364,7 +360,7 @@ future<> standard_role_manager::drop(std::string_view role_name) {
        // Delete all attributes for that role
        const auto remove_attributes_of = [this, role_name] {
            static const sstring query = format("DELETE FROM {} WHERE role = ?", meta::role_attributes_table::qualified_name());
-            return _qp.execute_internal(query, {sstring(role_name)}, cql3::query_processor::cache_internal::yes).discard_result();
+            return _qp.execute_internal(query, {sstring(role_name)}).discard_result();
        };

        // Finally, delete the role itself.
@@ -377,8 +373,7 @@ future<> standard_role_manager::drop(std::string_view role_name) {
                    query,
                    consistency_for_role(role_name),
                    internal_distributed_query_state(),
-                    {sstring(role_name)},
-                    cql3::query_processor::cache_internal::no).discard_result();
+                    {sstring(role_name)}).discard_result();
        };

        return when_all_succeed(revoke_from_members(), revoke_members_of(),
@@ -406,8 +401,7 @@ standard_role_manager::modify_membership(
                query,
                consistency_for_role(grantee_name),
                internal_distributed_query_state(),
-                {role_set{sstring(role_name)}, sstring(grantee_name)},
-                cql3::query_processor::cache_internal::no).discard_result();
+                {role_set{sstring(role_name)}, sstring(grantee_name)}).discard_result();
    };

    const auto modify_role_members = [this, role_name, grantee_name, ch] {
@@ -418,8 +412,7 @@ standard_role_manager::modify_membership(
                                meta::role_members_table::qualified_name),
                        consistency_for_role(role_name),
                        internal_distributed_query_state(),
-                        {sstring(role_name), sstring(grantee_name)},
-                        cql3::query_processor::cache_internal::no).discard_result();
+                        {sstring(role_name), sstring(grantee_name)}).discard_result();

            case membership_change::remove:
                return _qp.execute_internal(
@@ -427,8 +420,7 @@ standard_role_manager::modify_membership(
                                meta::role_members_table::qualified_name),
                        consistency_for_role(role_name),
                        internal_distributed_query_state(),
-                        {sstring(role_name), sstring(grantee_name)},
-                        cql3::query_processor::cache_internal::no).discard_result();
+                        {sstring(role_name), sstring(grantee_name)}).discard_result();
        }

        return make_ready_future<>();
@@ -530,8 +522,7 @@ future<role_set> standard_role_manager::query_all() {
    return _qp.execute_internal(
            query,
            db::consistency_level::QUORUM,
-            internal_distributed_query_state(),
-            cql3::query_processor::cache_internal::yes).then([](::shared_ptr<cql3::untyped_result_set> results) {
+            internal_distributed_query_state()).then([](::shared_ptr<cql3::untyped_result_set> results) {
        role_set roles;

        std::transform(
@@ -566,7 +557,7 @@ future<bool> standard_role_manager::can_login(std::string_view role_name) {

 future<std::optional<sstring>> standard_role_manager::get_attribute(std::string_view role_name, std::string_view attribute_name) {
    static const sstring query = format("SELECT name, value FROM {} WHERE role = ? AND name = ?", meta::role_attributes_table::qualified_name());
-    return _qp.execute_internal(query, {sstring(role_name), sstring(attribute_name)}, cql3::query_processor::cache_internal::yes).then([] (shared_ptr<cql3::untyped_result_set> result_set) {
+    return _qp.execute_internal(query, {sstring(role_name), sstring(attribute_name)}).then([] (shared_ptr<cql3::untyped_result_set> result_set) {
        if (!result_set->empty()) {
            const cql3::untyped_result_set_row &row = result_set->one();
            return std::optional<sstring>(row.get_as<sstring>("value"));
@@ -599,7 +590,7 @@ future<> standard_role_manager::set_attribute(std::string_view role_name, std::s
            if (!role_exists) {
                throw auth::nonexistant_role(role_name);
            }
-            return _qp.execute_internal(query, {sstring(role_name), sstring(attribute_name), sstring(attribute_value)}, cql3::query_processor::cache_internal::yes).discard_result();
+            return _qp.execute_internal(query, {sstring(role_name), sstring(attribute_name), sstring(attribute_value)}).discard_result();
        });
    });

@@ -612,7 +603,7 @@ future<> standard_role_manager::remove_attribute(std::string_view role_name, std
            if (!role_exists) {
                throw auth::nonexistant_role(role_name);
            }
-            return _qp.execute_internal(query, {sstring(role_name), sstring(attribute_name)}, cql3::query_processor::cache_internal::yes).discard_result();
+            return _qp.execute_internal(query, {sstring(role_name), sstring(attribute_name)}).discard_result();
        });
    });
 }
--- a/auth/transitional.cc
+++ b/auth/transitional.cc
@@ -1,3 +1,6 @@
+/*
+ */
+
 /*
 * Copyright (C) 2017-present ScyllaDB
 *
--- a/backlog_controller.hh
+++ b/backlog_controller.hh
@@ -37,27 +37,19 @@
 // The constants q1 and q2 are used to determine the proportional factor at each stage.
 class backlog_controller {
 public:
-    struct scheduling_group {
-        seastar::scheduling_group cpu = default_scheduling_group();
-        seastar::io_priority_class io = default_priority_class();
-    };
    future<> shutdown() {
        _update_timer.cancel();
        return std::move(_inflight_update);
    }
-
-    future<> update_static_shares(float static_shares) {
-        _static_shares = static_shares;
-        return make_ready_future<>();
-    }
-
 protected:
    struct control_point {
        float input;
        float output;
    };

-    scheduling_group _scheduling_group;
+    seastar::scheduling_group _scheduling_group;
+    const ::io_priority_class& _io_priority;
+    std::chrono::milliseconds _interval;
    timer<> _update_timer;

    std::vector<control_point> _control_points;
@@ -66,36 +58,41 @@ protected:
    // updating shares for an I/O class may contact another shard and returns a future.
    future<> _inflight_update;

-    // Used when the controllers are disabled and a static share is used
-    // When that option is deprecated we should remove this.
-    float _static_shares;
-
    virtual void update_controller(float quota);

-    bool controller_disabled() const noexcept {
-        return _static_shares > 0;
-    }
-
    void adjust();

-    backlog_controller(scheduling_group sg, std::chrono::milliseconds interval,
-                       std::vector<control_point> control_points, std::function<float()> backlog,
-                       float static_shares = 0)
-        : _scheduling_group(std::move(sg))
+    backlog_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, std::chrono::milliseconds interval,
+                       std::vector<control_point> control_points, std::function<float()> backlog)
+        : _scheduling_group(sg)
+        , _io_priority(iop)
+        , _interval(interval)
        , _update_timer([this] { adjust(); })
        , _control_points()
        , _current_backlog(std::move(backlog))
        , _inflight_update(make_ready_future<>())
-        , _static_shares(static_shares)
    {
        _control_points.insert(_control_points.end(), control_points.begin(), control_points.end());
-        _update_timer.arm_periodic(interval);
+         _update_timer.arm_periodic(_interval);
+    }
+
+    // Used when the controllers are disabled and a static share is used
+    // When that option is deprecated we should remove this.
+    backlog_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, float static_shares) 
+        : _scheduling_group(sg)
+        , _io_priority(iop)
+        , _inflight_update(make_ready_future<>())
+    {
+        update_controller(static_shares);
    }

    virtual ~backlog_controller() {}
 public:
    backlog_controller(backlog_controller&&) = default;
    float backlog_of_shares(float shares) const;
+    seastar::scheduling_group sg() {
+        return _scheduling_group;
+    }
 };

 // memtable flush CPU controller.
@@ -116,11 +113,11 @@ public:
 class flush_controller : public backlog_controller {
    static constexpr float hard_dirty_limit = 1.0f;
 public:
-    flush_controller(backlog_controller::scheduling_group sg, float static_shares, std::chrono::milliseconds interval, float soft_limit, std::function<float()> current_dirty)
-        : backlog_controller(std::move(sg), std::move(interval),
+    flush_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, float static_shares) : backlog_controller(sg, iop, static_shares) {}
+    flush_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, std::chrono::milliseconds interval, float soft_limit, std::function<float()> current_dirty)
+        : backlog_controller(sg, iop, std::move(interval),
          std::vector<backlog_controller::control_point>({{0.0, 0.0}, {soft_limit, 10}, {soft_limit + (hard_dirty_limit - soft_limit) / 2, 200} , {hard_dirty_limit, 1000}}),
-          std::move(current_dirty),
-          static_shares
+          std::move(current_dirty)
        )
    {}
 };
@@ -130,11 +127,11 @@ public:
    static constexpr unsigned normalization_factor = 30;
    static constexpr float disable_backlog = std::numeric_limits<double>::infinity();
    static constexpr float backlog_disabled(float backlog) { return std::isinf(backlog); }
-    compaction_controller(backlog_controller::scheduling_group sg, float static_shares, std::chrono::milliseconds interval, std::function<float()> current_backlog)
-        : backlog_controller(std::move(sg), std::move(interval),
+    compaction_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, float static_shares) : backlog_controller(sg, iop, static_shares) {}
+    compaction_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, std::chrono::milliseconds interval, std::function<float()> current_backlog)
+        : backlog_controller(sg, iop, std::move(interval),
          std::vector<backlog_controller::control_point>({{0.0, 50}, {1.5, 100} , {normalization_factor, 1000}}),
-          std::move(current_backlog),
-          static_shares
+          std::move(current_backlog)
        )
    {}
 };
--- a/build_mode.hh
+++ b/build_mode.hh
@@ -1,59 +0,0 @@
-
-/*
- * Copyright (C) 2022-present ScyllaDB
- */
-
-/*
- * SPDX-License-Identifier: AGPL-3.0-or-later
- */
-
-#pragma once
-
-#ifndef SCYLLA_BUILD_MODE
-#error SCYLLA_BUILD_MODE must be defined
-#endif
-
-#ifndef STRINGIFY
-// We need to levels of indirection
-// to make a string out of the macro name.
-// The outer level expands the macro
-// and the inner level makes a string out of the expanded macro.
-#define STRINGIFY_VALUE(x) #x
-#define STRINGIFY_MACRO(x) STRINGIFY_VALUE(x)
-#endif
-
-#define SCYLLA_BUILD_MODE_STR STRINGIFY_MACRO(SCYLLA_BUILD_MODE)
-
-// We use plain macro definitions
-// so the preprocessor can expand them
-// inline in the #if directives below
-#define SCYLLA_BUILD_MODE_CODE_debug 0
-#define SCYLLA_BUILD_MODE_CODE_release 1
-#define SCYLLA_BUILD_MODE_CODE_dev 2
-#define SCYLLA_BUILD_MODE_CODE_sanitize 3
-#define SCYLLA_BUILD_MODE_CODE_coverage 4
-
-#define _SCYLLA_BUILD_MODE_CODE(sbm) SCYLLA_BUILD_MODE_CODE_ ## sbm
-#define SCYLLA_BUILD_MODE_CODE(sbm) _SCYLLA_BUILD_MODE_CODE(sbm)
-
-#if SCYLLA_BUILD_MODE_CODE(SCYLLA_BUILD_MODE) == SCYLLA_BUILD_MODE_CODE_debug
-#define SCYLLA_BUILD_MODE_DEBUG
-#elif SCYLLA_BUILD_MODE_CODE(SCYLLA_BUILD_MODE) == SCYLLA_BUILD_MODE_CODE_release
-#define SCYLLA_BUILD_MODE_RELEASE
-#elif SCYLLA_BUILD_MODE_CODE(SCYLLA_BUILD_MODE) == SCYLLA_BUILD_MODE_CODE_dev
-#define SCYLLA_BUILD_MODE_DEV
-#elif SCYLLA_BUILD_MODE_CODE(SCYLLA_BUILD_MODE) == SCYLLA_BUILD_MODE_CODE_sanitize
-#define SCYLLA_BUILD_MODE_SANITIZE
-#elif SCYLLA_BUILD_MODE_CODE(SCYLLA_BUILD_MODE) == SCYLLA_BUILD_MODE_CODE_coverage
-#define SCYLLA_BUILD_MODE_COVERAGE
-#else
-#error unrecognized SCYLLA_BUILD_MODE
-#endif
-
-#if (defined(SCYLLA_BUILD_MODE_RELEASE) || defined(SCYLLA_BUILD_MODE_DEV)) && defined(SEASTAR_DEBUG)
-#error SEASTAR_DEBUG is not expected to be defined when SCYLLA_BUILD_MODE is "release" or "dev"
-#endif
-
-#if (defined(SCYLLA_BUILD_MODE_DEBUG) || defined(SCYLLA_BUILD_MODE_SANITIZE)) && !defined(SEASTAR_DEBUG)
-#error SEASTAR_DEBUG is expected to be defined when SCYLLA_BUILD_MODE is "debug" or "sanitize"
-#endif
--- a/bytes_ostream.hh
+++ b/bytes_ostream.hh
@@ -11,11 +11,9 @@
 #include <boost/range/iterator_range.hpp>

 #include "bytes.hh"
-#include "utils/managed_bytes.hh"
 #include "hashing.hh"
 #include <seastar/core/simple-stream.hh>
 #include <seastar/core/loop.hh>
-#include <bit>
 #include <concepts>

 /**
@@ -33,15 +31,26 @@ public:
    static constexpr size_type max_chunk_size() { return max_alloc_size() - sizeof(chunk); }
 private:
    static_assert(sizeof(value_type) == 1, "value_type is assumed to be one byte long");
-    // Note: while appending data, chunk::size refers to the allocated space in the chunk,
-    //       and chunk::frag_size refers to the currently occupied space in the chunk.
-    //       After building, the first chunk::size is the whole object size, and chunk::frag_size
-    //       doesn't change. This fits with managed_bytes interpretation.
-    using chunk = blob_storage;
+    struct chunk {
+        // FIXME: group fragment pointers to reduce pointer chasing when packetizing
+        std::unique_ptr<chunk> next;
+        ~chunk() {
+            auto p = std::move(next);
+            while (p) {
+                // Avoid recursion when freeing chunks
+                auto p_next = std::move(p->next);
+                p = std::move(p_next);
+            }
+        }
+        size_type offset; // Also means "size" after chunk is closed
+        size_type size;
+        value_type data[0];
+        void operator delete(void* ptr) { free(ptr); }
+    };
    static constexpr size_type default_chunk_size{512};
    static constexpr size_type max_alloc_size() { return 128 * 1024; }
 private:
-    blob_storage::ref_type _begin;
+    std::unique_ptr<chunk> _begin;
    chunk* _current;
    size_type _size;
    size_type _initial_chunk_size = default_chunk_size;
@@ -61,13 +70,13 @@ public:
        fragment_iterator(const fragment_iterator&) = default;
        fragment_iterator& operator=(const fragment_iterator&) = default;
        bytes_view operator*() const {
-            return { _current->data, _current->frag_size };
+            return { _current->data, _current->offset };
        }
        bytes_view operator->() const {
            return *(*this);
        }
        fragment_iterator& operator++() {
-            _current = _current->next;
+            _current = _current->next.get();
            return *this;
        }
        fragment_iterator operator++(int) {
@@ -110,21 +119,19 @@ private:
        if (!_current) {
            return 0;
        }
-        return _current->size - _current->frag_size;
+        return _current->size - _current->offset;
    }
    // Figure out next chunk size.
    //   - must be enough for data_size + sizeof(chunk)
    //   - must be at least _initial_chunk_size
    //   - try to double each time to prevent too many allocations
    //   - should not exceed max_alloc_size, unless data_size requires so
-    //   - will be power-of-two so the allocated memory can be fully utilized.
    size_type next_alloc_size(size_t data_size) const {
        auto next_size = _current
                ? _current->size * 2
                : _initial_chunk_size;
        next_size = std::min(next_size, max_alloc_size());
-        auto r = std::max<size_type>(next_size, data_size + sizeof(chunk));
-        return std::bit_ceil(r);
+        return std::max<size_type>(next_size, data_size + sizeof(chunk));
    }
    // Makes room for a contiguous region of given size.
    // The region is accounted for as already written.
@@ -132,8 +139,8 @@ private:
    [[gnu::always_inline]]
    value_type* alloc(size_type size) {
        if (__builtin_expect(size <= current_space_left(), true)) {
-            auto ret = _current->data + _current->frag_size;
-            _current->frag_size += size;
+            auto ret = _current->data + _current->offset;
+            _current->offset += size;
            _size += size;
            return ret;
        } else {
@@ -147,21 +154,19 @@ private:
            if (!space) {
                throw std::bad_alloc();
            }
-            auto backref = _current ? &_current->next : &_begin;
-            auto new_chunk = new (space) chunk(backref, alloc_size - sizeof(chunk), size);
-            _current = new_chunk;
+            auto new_chunk = std::unique_ptr<chunk>(new (space) chunk());
+            new_chunk->offset = size;
+            new_chunk->size = alloc_size - sizeof(chunk);
+            if (_current) {
+                _current->next = std::move(new_chunk);
+                _current = _current->next.get();
+            } else {
+                _begin = std::move(new_chunk);
+                _current = _begin.get();
+            }
            _size += size;
            return _current->data;
    }
-    [[gnu::noinline]]
-    void free_chain(chunk* c) noexcept {
-        while (c) {
-            auto n = c->next;
-            c->~chunk();
-            ::free(c);
-            c = n;
-        }
-    }
 public:
    explicit bytes_ostream(size_t initial_chunk_size) noexcept
        : _begin()
@@ -173,7 +178,7 @@ public:
    bytes_ostream() noexcept : bytes_ostream(default_chunk_size) {}

    bytes_ostream(bytes_ostream&& o) noexcept
-        : _begin(std::exchange(o._begin, {}))
+        : _begin(std::move(o._begin))
        , _current(o._current)
        , _size(o._size)
        , _initial_chunk_size(o._initial_chunk_size)
@@ -191,10 +196,6 @@ public:
        append(o);
    }

-    ~bytes_ostream() {
-        free_chain(_begin.ptr);
-    }
-
    bytes_ostream& operator=(const bytes_ostream& o) {
        if (this != &o) {
            auto x = bytes_ostream(o);
@@ -242,8 +243,8 @@ public:

        auto this_size = std::min(v.size(), size_t(current_space_left()));
        if (__builtin_expect(this_size, true)) {
-            memcpy(_current->data + _current->frag_size, v.begin(), this_size);
-            _current->frag_size += this_size;
+            memcpy(_current->data + _current->offset, v.begin(), this_size);
+            _current->offset += this_size;
            _size += this_size;
            v.remove_prefix(this_size);
        }
@@ -286,20 +287,19 @@ public:
            throw std::bad_alloc();
        }

-        auto old_begin = _begin;
-        auto new_chunk = new (space) chunk(&_begin, _size, _size);
+        auto new_chunk = std::unique_ptr<chunk>(new (space) chunk());
+        new_chunk->offset = _size;
+        new_chunk->size = _size;

        auto dst = new_chunk->data;
-        auto r = old_begin.ptr;
+        auto r = _begin.get();
        while (r) {
-            auto next = r->next;
-            dst = std::copy_n(r->data, r->frag_size, dst);
-            r->~chunk();
-            ::free(r);
+            auto next = r->next.get();
+            dst = std::copy_n(r->data, r->offset, dst);
            r = next;
        }

-        _current = new_chunk;
+        _current = new_chunk.get();
        _begin = std::move(new_chunk);
        return bytes_view(_current->data, _size);
    }
@@ -333,23 +333,22 @@ public:
    void remove_suffix(size_t n) {
        _size -= n;
        auto left = _size;
-        auto current = _begin.ptr;
+        auto current = _begin.get();
        while (current) {
-            if (current->frag_size >= left) {
-                current->frag_size = left;
+            if (current->offset >= left) {
+                current->offset = left;
                _current = current;
-                free_chain(current->next);
-                current->next = nullptr;
+                current->next.reset();
                return;
            }
-            left -= current->frag_size;
-            current = current->next;
+            left -= current->offset;
+            current = current->next.get();
        }
    }

    // begin() and end() form an input range to bytes_view representing fragments.
    // Any modification of this instance invalidates iterators.
-    fragment_iterator begin() const { return { _begin.ptr }; }
+    fragment_iterator begin() const { return { _begin.get() }; }
    fragment_iterator end() const { return { nullptr }; }

    output_iterator write_begin() { return output_iterator(*this); }
@@ -364,7 +363,7 @@ public:
    };

    position pos() const {
-        return { _current, _current ? _current->frag_size : 0 };
+        return { _current, _current ? _current->offset : 0 };
    }

    // Returns the amount of bytes written since given position.
@@ -374,11 +373,11 @@ public:
        if (!c) {
            return _size;
        }
-        size_type total = c->frag_size - pos._offset;
-        c = c->next;
+        size_type total = c->offset - pos._offset;
+        c = c->next.get();
        while (c) {
-            total += c->frag_size;
-            c = c->next;
+            total += c->offset;
+            c = c->next.get();
        }
        return total;
    }
@@ -392,9 +391,8 @@ public:
        }
        _size -= written_since(pos);
        _current = pos._chunk;
-        free_chain(_current->next);
        _current->next = nullptr;
-        _current->frag_size = pos._offset;
+        _current->offset = pos._offset;
    }

    void reduce_chunk_count() {
@@ -443,23 +441,11 @@ public:
    // the clear() calls then writes will not involve any memory allocations,
    // except for the first write made on this instance.
    void clear() {
-        if (_begin.ptr) {
-            _begin.ptr->frag_size = 0;
+        if (_begin) {
+            _begin->offset = 0;
            _size = 0;
-            free_chain(_begin.ptr->next);
-            _begin.ptr->next = nullptr;
-            _current = _begin.ptr;
-        }
-    }
-
-    managed_bytes to_managed_bytes() && {
-        if (_size) {
-            _begin.ptr->size = _size;
-            _current = nullptr;
-            _size = 0;
-            return managed_bytes(std::exchange(_begin.ptr, {}));
-        } else {
-            return managed_bytes();
+            _current = _begin.get();
+            _begin->next.reset();
        }
    }

@@ -470,17 +456,15 @@ public:
    // the clear() calls then writes will not involve any memory allocations,
    // except for the first write made on this instance.
    future<> clear_gently() noexcept {
-        if (!_begin.ptr) {
+        if (!_begin) {
            return make_ready_future<>();
        }
-        _begin->frag_size = 0;
-        _current = _begin.ptr;
+        _begin->offset = 0;
        _size = 0;
-        return do_until([this] { return !_begin.ptr->next; }, [this] {
-            auto second_chunk = _begin.ptr->next;
-            auto next = second_chunk->next;
-            second_chunk->~chunk();
-            ::free(second_chunk);
+        return do_until([this] { return !_begin->next; }, [this] {
+            // move next->next first to avoid it being recursively destroyed
+            // in ~chunk when _begin->next is move-assigned.
+            auto next = std::move(_begin->next->next);
            _begin->next = std::move(next);
            return make_ready_future<>();
        });
--- a/cache_flat_mutation_reader.hh
+++ b/cache_flat_mutation_reader.hh
@@ -10,19 +10,19 @@

 #include <vector>
 #include "row_cache.hh"
+#include "mutation_reader.hh"
 #include "mutation_fragment.hh"
 #include "query-request.hh"
 #include "partition_snapshot_row_cursor.hh"
-#include "range_tombstone_assembler.hh"
 #include "read_context.hh"
-#include "readers/delegating_v2.hh"
+#include "flat_mutation_reader.hh"
 #include "clustering_key_filter.hh"

 namespace cache {

 extern logging::logger clogger;

-class cache_flat_mutation_reader final : public flat_mutation_reader_v2::impl {
+class cache_flat_mutation_reader final : public flat_mutation_reader::impl {
    enum class state {
        before_static_row,

@@ -51,46 +51,6 @@ class cache_flat_mutation_reader final : public flat_mutation_reader_v2::impl {

        end_of_stream
    };
-    enum class source {
-        cache = 0,
-        underlying = 1,
-    };
-    // Merges range tombstone change streams coming from underlying and the cache.
-    // Ensures no range tombstone change fragment is emitted when there is no
-    // actual change in the effective tombstone.
-    class range_tombstone_change_merger {
-        const schema& _schema;
-        position_in_partition _pos;
-        tombstone _current_tombstone;
-        std::array<tombstone, 2> _tombstones;
-    private:
-        std::optional<range_tombstone_change> do_flush(position_in_partition pos, bool end_of_range) {
-            std::optional<range_tombstone_change> ret;
-            position_in_partition::tri_compare cmp(_schema);
-            const auto res = cmp(_pos, pos);
-            const auto should_flush = end_of_range ? res <= 0 : res < 0;
-            if (should_flush) {
-                auto merged_tomb = std::max(_tombstones.front(), _tombstones.back());
-                if (merged_tomb != _current_tombstone) {
-                    _current_tombstone = merged_tomb;
-                    ret.emplace(_pos, _current_tombstone);
-                }
-                _pos = std::move(pos);
-            }
-            return ret;
-        }
-    public:
-        range_tombstone_change_merger(const schema& s) : _schema(s), _pos(position_in_partition::before_all_clustered_rows()), _tombstones{}
-        { }
-        std::optional<range_tombstone_change> apply(source src, range_tombstone_change&& rtc) {
-            auto ret = do_flush(rtc.position(), false);
-            _tombstones[static_cast<size_t>(src)] = rtc.tombstone();
-            return ret;
-        }
-        std::optional<range_tombstone_change> flush(position_in_partition_view pos, bool end_of_range) {
-            return do_flush(position_in_partition(pos), end_of_range);
-        }
-    };
    partition_snapshot_ptr _snp;

    query::clustering_key_filter_ranges _ck_ranges; // Query schema domain, reversed reads use native order
@@ -106,7 +66,6 @@ class cache_flat_mutation_reader final : public flat_mutation_reader_v2::impl {
    // range_tombstones with positions <= _lower_bound.
    position_in_partition _lower_bound; // Query schema domain
    position_in_partition_view _upper_bound; // Query schema domain
-    std::optional<position_in_partition> _underlying_upper_bound; // Query schema domain

    // cache_flat_mutation_reader may be constructed either
    // with a read_context&, where it knows that the read_context
@@ -121,19 +80,6 @@ class cache_flat_mutation_reader final : public flat_mutation_reader_v2::impl {
    read_context& _read_context;
    partition_snapshot_row_cursor _next_row;

-    range_tombstone_change_generator _rt_gen; // cache -> reader
-    range_tombstone_assembler _rt_assembler; // underlying -> cache
-    range_tombstone_change_merger _rt_merger; // {cache, underlying} -> reader
-
-    // When the read moves to the underlying, the read range will be
-    // (_lower_bound, x], where x is either _next_row.position() or _upper_bound.
-    // In the former case (x is _next_row.position()), underlying can emit
-    // a range tombstone change for after_key(x), which is outside the range.
-    // We can't push this fragment into the buffer straight away, the cache may
-    // have fragments with smaller position. So we save it here and flush it when
-    // a fragment with a larger position is seen.
-    std::optional<mutation_fragment_v2> _queued_underlying_fragment;
-
    state _state = state::before_static_row;

    bool _next_row_in_range = false;
@@ -152,8 +98,8 @@ class cache_flat_mutation_reader final : public flat_mutation_reader_v2::impl {

    // Points to the underlying reader conforming to _schema,
    // either to *_underlying_holder or _read_context.underlying().underlying().
-    flat_mutation_reader_v2* _underlying = nullptr;
-    flat_mutation_reader_v2_opt _underlying_holder;
+    flat_mutation_reader* _underlying = nullptr;
+    flat_mutation_reader_opt _underlying_holder;

    future<> do_fill_buffer();
    future<> ensure_underlying();
@@ -164,13 +110,11 @@ class cache_flat_mutation_reader final : public flat_mutation_reader_v2::impl {
    void move_to_range(query::clustering_row_ranges::const_iterator);
    void move_to_next_entry();
    void maybe_drop_last_entry() noexcept;
-    void flush_tombstones(position_in_partition_view, bool end_of_range = false);
    void add_to_buffer(const partition_snapshot_row_cursor&);
-    void add_clustering_row_to_buffer(mutation_fragment_v2&&);
-    void add_to_buffer(range_tombstone_change&&, source);
-    void do_add_to_buffer(range_tombstone_change&&);
+    void add_clustering_row_to_buffer(mutation_fragment&&);
+    void add_to_buffer(range_tombstone&&);
    void add_range_tombstone_to_buffer(range_tombstone&&);
-    void add_to_buffer(mutation_fragment_v2&&);
+    void add_to_buffer(mutation_fragment&&);
    future<> read_from_underlying();
    void start_reading_from_underlying();
    bool after_current_range(position_in_partition_view position);
@@ -187,9 +131,9 @@ class cache_flat_mutation_reader final : public flat_mutation_reader_v2::impl {
    // if !_read_context.is_reversed() then _last_row is valid after this or the population lower bound
    // is before all rows (so _last_row doesn't point at any entry).
    bool ensure_population_lower_bound();
-    void maybe_add_to_cache(const mutation_fragment_v2& mf);
+    void maybe_add_to_cache(const mutation_fragment& mf);
    void maybe_add_to_cache(const clustering_row& cr);
-    void maybe_add_to_cache(const range_tombstone_change& rtc);
+    void maybe_add_to_cache(const range_tombstone& rt);
    void maybe_add_to_cache(const static_row& sr);
    void maybe_set_static_row_continuous();
    void finish_reader() {
@@ -233,7 +177,7 @@ public:
                               read_context& ctx,
                               partition_snapshot_ptr snp,
                               row_cache& cache)
-        : flat_mutation_reader_v2::impl(std::move(s), ctx.permit())
+        : flat_mutation_reader::impl(std::move(s), ctx.permit())
        , _snp(std::move(snp))
        , _ck_ranges(std::move(crr))
        , _ck_ranges_curr(_ck_ranges.begin())
@@ -244,8 +188,6 @@ public:
        , _read_context_holder()
        , _read_context(ctx)    // ctx is owned by the caller, who's responsible for closing it.
        , _next_row(*_schema, *_snp, false, _read_context.is_reversed())
-        , _rt_gen(*_schema)
-        , _rt_merger(*_schema)
    {
        clogger.trace("csm {}: table={}.{}, reversed={}, snap={}", fmt::ptr(this), _schema->ks_name(), _schema->cf_name(), _read_context.is_reversed(),
                      fmt::ptr(&*_snp));
@@ -296,13 +238,13 @@ future<> cache_flat_mutation_reader::process_static_row() {
            return _snp->static_row(_read_context.digest_requested());
        });
        if (!sr.empty()) {
-            push_mutation_fragment(*_schema, _permit, std::move(sr));
+            push_mutation_fragment(mutation_fragment(*_schema, _permit, std::move(sr)));
        }
        return make_ready_future<>();
    } else {
        _read_context.cache().on_row_miss();
        return ensure_underlying().then([this] {
-            return (*_underlying)().then([this] (mutation_fragment_v2_opt&& sr) {
+            return (*_underlying)().then([this] (mutation_fragment_opt&& sr) {
                if (sr) {
                    assert(sr->is_static_row());
                    maybe_add_to_cache(sr->as_static_row());
@@ -352,7 +294,7 @@ future<> cache_flat_mutation_reader::ensure_underlying() {
        return make_ready_future<>();
    }
    return _read_context.ensure_underlying().then([this] {
-        flat_mutation_reader_v2& ctx_underlying = _read_context.underlying().underlying();
+        flat_mutation_reader& ctx_underlying = _read_context.underlying().underlying();
        if (ctx_underlying.schema() != _schema) {
            _underlying_holder = make_delegating_reader(ctx_underlying);
            _underlying_holder->upgrade_schema(_schema);
@@ -376,9 +318,9 @@ future<> cache_flat_mutation_reader::do_fill_buffer() {
        if (!_read_context.partition_exists()) {
            return read_from_underlying();
        }
-        _underlying_upper_bound = _next_row_in_range ? position_in_partition(_next_row.position())
+        auto end = _next_row_in_range ? position_in_partition(_next_row.position())
                                      : position_in_partition(_upper_bound);
-        return _underlying->fast_forward_to(position_range{_lower_bound, *_underlying_upper_bound}).then([this] {
+        return _underlying->fast_forward_to(position_range{_lower_bound, std::move(end)}).then([this] {
            return read_from_underlying();
        });
    }
@@ -421,13 +363,12 @@ inline
 future<> cache_flat_mutation_reader::read_from_underlying() {
    return consume_mutation_fragments_until(*_underlying,
        [this] { return _state != state::reading_from_underlying || is_buffer_full(); },
-        [this] (mutation_fragment_v2 mf) {
+        [this] (mutation_fragment mf) {
            _read_context.cache().on_row_miss();
            maybe_add_to_cache(mf);
            add_to_buffer(std::move(mf));
        },
        [this] {
-            _underlying_upper_bound.reset();
            _state = state::reading_from_cache;
            _lsa_manager.run_in_update_section([this] {
                auto same_pos = _next_row.maybe_refresh();
@@ -554,9 +495,9 @@ void cache_flat_mutation_reader::maybe_update_continuity() {
 }

 inline
-void cache_flat_mutation_reader::maybe_add_to_cache(const mutation_fragment_v2& mf) {
-    if (mf.is_range_tombstone_change()) {
-        maybe_add_to_cache(mf.as_range_tombstone_change());
+void cache_flat_mutation_reader::maybe_add_to_cache(const mutation_fragment& mf) {
+    if (mf.is_range_tombstone()) {
+        maybe_add_to_cache(mf.as_range_tombstone());
    } else {
        assert(mf.is_clustering_row());
        const clustering_row& cr = mf.as_clustering_row();
@@ -572,16 +513,9 @@ void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
        _read_context.cache().on_mispopulate();
        return;
    }
-    auto rt_opt = _rt_assembler.flush(*_schema, position_in_partition::after_key(cr.key()));
    clogger.trace("csm {}: populate({})", fmt::ptr(this), clustering_row::printer(*_schema, cr));
-    _lsa_manager.run_in_update_section_with_allocator([this, &cr, &rt_opt] {
+    _lsa_manager.run_in_update_section_with_allocator([this, &cr] {
        mutation_partition& mp = _snp->version()->partition();
-
-        if (rt_opt) {
-            clogger.trace("csm {}: populate flushed rt({})", fmt::ptr(this), *rt_opt);
-            mp.mutable_row_tombstones().apply_monotonically(table_schema(), to_table_domain(range_tombstone(*rt_opt)));
-        }
-
        rows_entry::tri_compare cmp(table_schema());

        if (_read_context.digest_requested()) {
@@ -637,6 +571,11 @@ void cache_flat_mutation_reader::copy_from_cache_to_buffer() {
    position_in_partition_view next_lower_bound = _next_row.dummy() ? _next_row.position() : position_in_partition_view::after_key(_next_row.key());
    auto upper_bound = _next_row_in_range ? next_lower_bound : _upper_bound;
    if (_snp->range_tombstones(_lower_bound, upper_bound, [&] (range_tombstone rts) {
+        position_in_partition::less_compare less(*_schema);
+        // Avoid emitting overlapping range tombstones for performance reasons.
+        if (less(upper_bound, rts.end_position())) {
+            rts.set_end(*_schema, upper_bound);
+        }
        add_range_tombstone_to_buffer(std::move(rts));
        return stop_iteration(_lower_bound_changed && is_buffer_full());
    }, _read_context.is_reversed()) == stop_iteration::no) {
@@ -660,10 +599,6 @@ void cache_flat_mutation_reader::move_to_end() {

 inline
 void cache_flat_mutation_reader::move_to_next_range() {
-    if (_queued_underlying_fragment) {
-        add_to_buffer(*std::exchange(_queued_underlying_fragment, {}));
-    }
-    flush_tombstones(position_in_partition::for_range_end(*_ck_ranges_curr), true);
    auto next_it = std::next(_ck_ranges_curr);
    if (next_it == _ck_ranges_end) {
        move_to_end();
@@ -680,7 +615,6 @@ void cache_flat_mutation_reader::move_to_range(query::clustering_row_ranges::con
    _last_row = nullptr;
    _lower_bound = std::move(lb);
    _upper_bound = std::move(ub);
-    _rt_gen.trim(_lower_bound);
    _lower_bound_changed = true;
    _ck_ranges_curr = next_it;
    auto adjacent = _next_row.advance_to(_lower_bound);
@@ -727,7 +661,7 @@ void cache_flat_mutation_reader::maybe_drop_last_entry() noexcept {
    // This prevents unnecessary dummy entries from accumulating in cache and slowing down scans.
    //
    // Eviction can happen only from oldest versions to preserve the continuity non-overlapping rule
-    // (See docs/dev/row_cache.md)
+    // (See docs/design-notes/row_cache.md)
    //
    if (_last_row
            && !_read_context.is_reversed() // FIXME
@@ -737,9 +671,7 @@ void cache_flat_mutation_reader::maybe_drop_last_entry() noexcept {
            && _snp->at_oldest_version()) {

        with_allocator(_snp->region().allocator(), [&] {
-            cache_tracker& tracker = _read_context.cache()._tracker;
-            tracker.get_lru().remove(*_last_row);
-            _last_row->on_evicted(tracker);
+            _last_row->on_evicted(_read_context.cache()._tracker);
        });
        _last_row = nullptr;

@@ -774,49 +706,27 @@ void cache_flat_mutation_reader::move_to_next_entry() {
    }
 }

-void cache_flat_mutation_reader::flush_tombstones(position_in_partition_view pos, bool end_of_range) {
-    // Ensure position is appropriate for range tombstone bound
-    pos = position_in_partition_view::after_key(pos);
-    clogger.trace("csm {}: flush_tombstones({}) end_of_range: {}", fmt::ptr(this), pos, end_of_range);
-    _rt_gen.flush(pos, [this] (range_tombstone_change&& rtc) {
-        add_to_buffer(std::move(rtc), source::cache);
-    }, end_of_range);
-    if (auto rtc_opt = _rt_merger.flush(pos, end_of_range)) {
-        do_add_to_buffer(std::move(*rtc_opt));
-    }
-}
-
 inline
-void cache_flat_mutation_reader::add_to_buffer(mutation_fragment_v2&& mf) {
-    clogger.trace("csm {}: add_to_buffer({})", fmt::ptr(this), mutation_fragment_v2::printer(*_schema, mf));
-    position_in_partition::less_compare less(*_schema);
-    if (_underlying_upper_bound && less(*_underlying_upper_bound, mf.position())) {
-        _queued_underlying_fragment = std::move(mf);
-        return;
-    }
-    flush_tombstones(mf.position());
+void cache_flat_mutation_reader::add_to_buffer(mutation_fragment&& mf) {
+    clogger.trace("csm {}: add_to_buffer({})", fmt::ptr(this), mutation_fragment::printer(*_schema, mf));
    if (mf.is_clustering_row()) {
        add_clustering_row_to_buffer(std::move(mf));
    } else {
-        assert(mf.is_range_tombstone_change());
-        add_to_buffer(std::move(mf).as_range_tombstone_change(), source::underlying);
+        assert(mf.is_range_tombstone());
+        add_to_buffer(std::move(mf).as_range_tombstone());
    }
 }

 inline
 void cache_flat_mutation_reader::add_to_buffer(const partition_snapshot_row_cursor& row) {
-    position_in_partition::less_compare less(*_schema);
-    if (_queued_underlying_fragment && less(_queued_underlying_fragment->position(), row.position())) {
-        add_to_buffer(*std::exchange(_queued_underlying_fragment, {}));
-    }
    if (!row.dummy()) {
        _read_context.cache().on_row_hit();
        if (_read_context.digest_requested()) {
            row.latest_row().cells().prepare_hash(table_schema(), column_kind::regular_column);
        }
-        flush_tombstones(position_in_partition_view::for_key(row.key()));
-        add_clustering_row_to_buffer(mutation_fragment_v2(*_schema, _permit, row.row()));
+        add_clustering_row_to_buffer(mutation_fragment(*_schema, _permit, row.row()));
    } else {
+        position_in_partition::less_compare less(*_schema);
        if (less(_lower_bound, row.position())) {
            _lower_bound = row.position();
            _lower_bound_changed = true;
@@ -829,8 +739,8 @@ void cache_flat_mutation_reader::add_to_buffer(const partition_snapshot_row_curs
 //   (1) no fragment with position >= _lower_bound was pushed yet
 //   (2) If _lower_bound > mf.position(), mf was emitted
 inline
-void cache_flat_mutation_reader::add_clustering_row_to_buffer(mutation_fragment_v2&& mf) {
-    clogger.trace("csm {}: add_clustering_row_to_buffer({})", fmt::ptr(this), mutation_fragment_v2::printer(*_schema, mf));
+void cache_flat_mutation_reader::add_clustering_row_to_buffer(mutation_fragment&& mf) {
+    clogger.trace("csm {}: add_clustering_row_to_buffer({})", fmt::ptr(this), mutation_fragment::printer(*_schema, mf));
    auto& row = mf.as_clustering_row();
    auto new_lower_bound = position_in_partition::after_key(row.key());
    push_mutation_fragment(std::move(mf));
@@ -842,46 +752,32 @@ void cache_flat_mutation_reader::add_clustering_row_to_buffer(mutation_fragment_
 }

 inline
-void cache_flat_mutation_reader::add_to_buffer(range_tombstone_change&& rtc, source src) {
-    clogger.trace("csm {}: add_to_buffer({})", fmt::ptr(this), rtc);
-    if (auto rtc_opt = _rt_merger.apply(src, std::move(rtc))) {
-        do_add_to_buffer(std::move(*rtc_opt));
-    }
-}
-
-inline
-void cache_flat_mutation_reader::do_add_to_buffer(range_tombstone_change&& rtc) {
-    clogger.trace("csm {}: push({})", fmt::ptr(this), rtc);
+void cache_flat_mutation_reader::add_to_buffer(range_tombstone&& rt) {
+    clogger.trace("csm {}: add_to_buffer({})", fmt::ptr(this), rt);
+    // This guarantees that rt starts after any emitted clustering_row
+    // and not before any emitted range tombstone.
    position_in_partition::less_compare less(*_schema);
-    auto lower_bound_changed = less(_lower_bound, rtc.position());
-    _lower_bound = position_in_partition(rtc.position());
-    _lower_bound_changed = lower_bound_changed;
-    push_mutation_fragment(*_schema, _permit, std::move(rtc));
-    _read_context.cache()._tracker.on_range_tombstone_read();
+    if (less(_lower_bound, rt.end_position())) {
+        add_range_tombstone_to_buffer(std::move(rt));
+    }
 }

 inline
 void cache_flat_mutation_reader::add_range_tombstone_to_buffer(range_tombstone&& rt) {
    position_in_partition::less_compare less(*_schema);
-    if (_queued_underlying_fragment && less(_queued_underlying_fragment->position(), rt.position())) {
-        add_to_buffer(*std::exchange(_queued_underlying_fragment, {}));
-    }
-    clogger.trace("csm {}: add_to_buffer({})", fmt::ptr(this), rt);
    if (!less(_lower_bound, rt.position())) {
        rt.set_start(_lower_bound);
+    } else {
+        _lower_bound = position_in_partition(rt.position());
+        _lower_bound_changed = true;
    }
-    flush_tombstones(rt.position());
-    _rt_gen.consume(std::move(rt));
+    clogger.trace("csm {}: push({})", fmt::ptr(this), rt);
+    push_mutation_fragment(*_schema, _permit, std::move(rt));
+    _read_context.cache()._tracker.on_range_tombstone_read();
 }

 inline
-void cache_flat_mutation_reader::maybe_add_to_cache(const range_tombstone_change& rtc) {
-    clogger.trace("csm {}: maybe_add_to_cache({})", fmt::ptr(this), rtc);
-    auto rt_opt = _rt_assembler.consume(*_schema, range_tombstone_change(rtc));
-    if (!rt_opt) {
-        return;
-    }
-    const auto& rt = *rt_opt;
+void cache_flat_mutation_reader::maybe_add_to_cache(const range_tombstone& rt) {
    if (can_populate()) {
        clogger.trace("csm {}: maybe_add_to_cache({})", fmt::ptr(this), rt);
        _lsa_manager.run_in_update_section_with_allocator([&] {
@@ -929,25 +825,25 @@ bool cache_flat_mutation_reader::can_populate() const {

 // pass a reference to ctx to cache_flat_mutation_reader
 // keeping its ownership at caller's.
-inline flat_mutation_reader_v2 make_cache_flat_mutation_reader(schema_ptr s,
+inline flat_mutation_reader make_cache_flat_mutation_reader(schema_ptr s,
                                                            dht::decorated_key dk,
                                                            query::clustering_key_filter_ranges crr,
                                                            row_cache& cache,
                                                            cache::read_context& ctx,
                                                            partition_snapshot_ptr snp)
 {
-    return make_flat_mutation_reader_v2<cache::cache_flat_mutation_reader>(
+    return make_flat_mutation_reader<cache::cache_flat_mutation_reader>(
        std::move(s), std::move(dk), std::move(crr), ctx, std::move(snp), cache);
 }

 // transfer ownership of ctx to cache_flat_mutation_reader
-inline flat_mutation_reader_v2 make_cache_flat_mutation_reader(schema_ptr s,
+inline flat_mutation_reader make_cache_flat_mutation_reader(schema_ptr s,
                                                            dht::decorated_key dk,
                                                            query::clustering_key_filter_ranges crr,
                                                            row_cache& cache,
                                                            std::unique_ptr<cache::read_context> unique_ctx,
                                                            partition_snapshot_ptr snp)
 {
-    return make_flat_mutation_reader_v2<cache::cache_flat_mutation_reader>(
+    return make_flat_mutation_reader<cache::cache_flat_mutation_reader>(
        std::move(s), std::move(dk), std::move(crr), std::move(unique_ctx), std::move(snp), cache);
 }
--- a/canonical_mutation.cc
+++ b/canonical_mutation.cc
@@ -15,7 +15,14 @@
 #include "converting_mutation_partition_applier.hh"
 #include "hashing_partition_visitor.hh"
 #include "utils/UUID.hh"
+#include "serializer.hh"
+#include "idl/uuid.dist.hh"
+#include "idl/keys.dist.hh"
 #include "idl/mutation.dist.hh"
+#include "serializer_impl.hh"
+#include "serialization_visitors.hh"
+#include "idl/uuid.dist.impl.hh"
+#include "idl/keys.dist.impl.hh"
 #include "idl/mutation.dist.impl.hh"
 #include <iostream>

@@ -37,7 +44,7 @@ canonical_mutation::canonical_mutation(const mutation& m)
                 }).end_canonical_mutation();
 }

-table_id canonical_mutation::column_family_id() const {
+utils::UUID canonical_mutation::column_family_id() const {
    auto in = ser::as_input_stream(_data);
    auto mv = ser::deserialize(in, boost::type<ser::canonical_mutation_view>());
    return mv.table_id();
@@ -113,19 +120,17 @@ std::ostream& operator<<(std::ostream& os, const canonical_mutation& cm) {
            auto&& entry = _cm.static_column_at(id);
            fmt::print(_os, "static column {} {}", bytes_to_text(entry.name()), collection_mutation_view::printer(*entry.type(), cmv));
        }
-        virtual stop_iteration accept_row_tombstone(range_tombstone rt) override {
+        virtual void accept_row_tombstone(range_tombstone rt) override {
            print_separator();
            fmt::print(_os, "row tombstone {}", rt);
-            return stop_iteration::no;
        }
-        virtual stop_iteration accept_row(position_in_partition_view pipv, row_tombstone rt, row_marker rm, is_dummy, is_continuous) override {
+        virtual void accept_row(position_in_partition_view pipv, row_tombstone rt, row_marker rm, is_dummy, is_continuous) override {
            if (_in_row) {
                fmt::print(_os, "}}, ");
            }
            fmt::print(_os, "{{row {} tombstone {} marker {}", pipv, rt, rm);
            _in_row = true;
            _first = false;
-            return stop_iteration::no;
        }
        virtual void accept_row_cell(column_id id, atomic_cell ac) override {
            print_separator();
--- a/canonical_mutation.hh
+++ b/canonical_mutation.hh
@@ -14,6 +14,10 @@
 #include "bytes_ostream.hh"
 #include <iosfwd>

+namespace utils {
+    class UUID;
+} // namespace utils
+
 // Immutable mutation form which can be read using any schema version of the same table.
 // Safe to access from other shards via const&.
 // Safe to pass serialized across nodes.
@@ -35,7 +39,7 @@ public:
    // is not intended, user should sync the schema first.
    mutation to_mutation(schema_ptr) const;

-    table_id column_family_id() const;
+    utils::UUID column_family_id() const;

    const bytes_ostream& representation() const { return _data; }

--- a/cdc/cdc_partitioner.cc
+++ b/cdc/cdc_partitioner.cc
@@ -14,12 +14,12 @@
 #include "cdc/generation.hh"
 #include "keys.hh"

+static const sstring cdc_partitioner_name = "com.scylladb.dht.CDCPartitioner";
+
 namespace cdc {

-const sstring cdc_partitioner::classname = "com.scylladb.dht.CDCPartitioner";
-
 const sstring cdc_partitioner::name() const {
-    return classname;
+    return cdc_partitioner_name;
 }

 static dht::token to_token(int64_t value) {
@@ -48,7 +48,7 @@ cdc_partitioner::get_token(const schema& s, partition_key_view key) const {
 }

 using registry = class_registrator<dht::i_partitioner, cdc_partitioner>;
-static registry registrator(cdc::cdc_partitioner::classname);
+static registry registrator(cdc_partitioner_name);
 static registry registrator_short_name("CDCPartitioner");

 }
--- a/cdc/cdc_partitioner.hh
+++ b/cdc/cdc_partitioner.hh
@@ -25,8 +25,6 @@ class key_view;
 namespace cdc {

 struct cdc_partitioner final : public dht::i_partitioner {
-    static const sstring classname;
-
    cdc_partitioner() = default;
    virtual const sstring name() const override;
    virtual dht::token get_token(const schema& s, partition_key_view key) const override;
--- a/cdc/generation.cc
+++ b/cdc/generation.cc
@@ -340,7 +340,7 @@ future<cdc::generation_id> generation_service::make_new_generation(const std::un
    auto normal_token_owners = tmptr->count_normal_token_owners();
    assert(normal_token_owners);

-    if (_feature_service.cdc_generations_v2) {
+    if (_feature_service.cluster_supports_cdc_generations_v2()) {
        auto uuid = utils::make_random_uuid();
        cdc_log.info("Inserting new generation data at UUID {}", uuid);
        // This may take a while.
@@ -455,12 +455,7 @@ static future<> update_streams_description(
                    noncopyable_function<unsigned()> get_num_token_owners,
                    abort_source& abort_src) -> future<> {
            while (true) {
-                try {
-                    co_await sleep_abortable(std::chrono::seconds(60), abort_src);
-                } catch (seastar::sleep_aborted&) {
-                    cdc_log.warn( "Aborted update CDC description table with generation {}", gen_id);
-                    co_return;
-                }
+                co_await sleep_abortable(std::chrono::seconds(60), abort_src);
                try {
                    co_await do_update_streams_description(gen_id, *sys_dist_ks, { get_num_token_owners() });
                    co_return;
@@ -583,7 +578,7 @@ future<> generation_service::maybe_rewrite_streams_descriptions() {
        co_return;
    }

-    if (co_await _sys_ks.local().cdc_is_rewritten()) {
+    if (co_await db::system_keyspace::cdc_is_rewritten()) {
        co_return;
    }

@@ -607,13 +602,13 @@ future<> generation_service::maybe_rewrite_streams_descriptions() {
            continue;
        }

-        times_and_ttls.push_back(time_and_ttl{as_timepoint(s.id().uuid()), cdc_opts.ttl()});
+        times_and_ttls.push_back(time_and_ttl{as_timepoint(s.id()), cdc_opts.ttl()});
    }

    if (times_and_ttls.empty()) {
        // There's no point in rewriting old generations' streams (they don't contain any data).
        cdc_log.info("No CDC log tables present, not rewriting stream tables.");
-        co_return co_await _sys_ks.local().cdc_set_rewritten(std::nullopt);
+        co_return co_await db::system_keyspace::cdc_set_rewritten(std::nullopt);
    }

    auto get_num_token_owners = [tm = _token_metadata.get()] { return tm->count_normal_token_owners(); };
@@ -631,7 +626,7 @@ future<> generation_service::maybe_rewrite_streams_descriptions() {
            std::move(get_num_token_owners),
            _abort_src);

-    co_await _sys_ks.local().cdc_set_rewritten(last_rewritten);
+    co_await db::system_keyspace::cdc_set_rewritten(last_rewritten);
 }

 static void assert_shard_zero(const sstring& where) {
@@ -675,13 +670,11 @@ constexpr char could_not_retrieve_msg_template[]

 generation_service::generation_service(
            config cfg, gms::gossiper& g, sharded<db::system_distributed_keyspace>& sys_dist_ks,
-            sharded<db::system_keyspace>& sys_ks,
            abort_source& abort_src, const locator::shared_token_metadata& stm, gms::feature_service& f,
            replica::database& db)
        : _cfg(std::move(cfg))
        , _gossiper(g)
        , _sys_dist_ks(sys_dist_ks)
-        , _sys_ks(sys_ks)
        , _abort_src(abort_src)
        , _token_metadata(stm)
        , _feature_service(f)
@@ -709,7 +702,7 @@ generation_service::~generation_service() {

 future<> generation_service::after_join(std::optional<cdc::generation_id>&& startup_gen_id) {
    assert_shard_zero(__PRETTY_FUNCTION__);
-    assert(_sys_ks.local().bootstrap_complete());
+    assert(db::system_keyspace::bootstrap_complete());

    _gen_id = std::move(startup_gen_id);
    _gossiper.register_(shared_from_this());
@@ -783,7 +776,7 @@ future<> generation_service::check_and_repair_cdc_streams() {
        cdc_log.warn("check_and_repair_cdc_streams: no generation observed in gossip");
        should_regenerate = true;
    } else if (std::holds_alternative<cdc::generation_id_v1>(*latest)
-            && _feature_service.cdc_generations_v2) {
+            && _feature_service.cluster_supports_cdc_generations_v2()) {
        cdc_log.info(
            "Cluster still using CDC generation storage format V1 (id: {}), even though it already understands the V2 format."
            " Creating a new generation using V2.", *latest);
@@ -875,7 +868,7 @@ future<> generation_service::check_and_repair_cdc_streams() {
            { gms::application_state::CDC_GENERATION_ID, gms::versioned_value::cdc_generation_id(new_gen_id) },
            { gms::application_state::STATUS, *status }
    });
-    co_await _sys_ks.local().update_cdc_generation_id(new_gen_id);
+    co_await db::system_keyspace::update_cdc_generation_id(new_gen_id);
 }

 future<> generation_service::handle_cdc_generation(std::optional<cdc::generation_id> gen_id) {
@@ -885,7 +878,7 @@ future<> generation_service::handle_cdc_generation(std::optional<cdc::generation
        co_return;
    }

-    if (!_sys_ks.local().bootstrap_complete() || !_sys_dist_ks.local_is_initialized()
+    if (!db::system_keyspace::bootstrap_complete() || !_sys_dist_ks.local_is_initialized()
            || !_sys_dist_ks.local().started()) {
        // The service should not be listening for generation changes until after the node
        // is bootstrapped. Therefore we would previously assume that this condition
@@ -1018,7 +1011,7 @@ future<bool> generation_service::do_handle_cdc_generation(cdc::generation_id gen
    // The assumption follows from the requirement of bootstrapping nodes sequentially.
    if (!_gen_id || get_ts(*_gen_id) < get_ts(gen_id)) {
        _gen_id = gen_id;
-        co_await _sys_ks.local().update_cdc_generation_id(gen_id);
+        co_await db::system_keyspace::update_cdc_generation_id(gen_id);
        co_await _gossiper.add_local_application_state(
                gms::application_state::CDC_GENERATION_ID, gms::versioned_value::cdc_generation_id(gen_id));
    }
--- a/cdc/generation_service.hh
+++ b/cdc/generation_service.hh
@@ -15,7 +15,6 @@

 namespace db {
 class system_distributed_keyspace;
-class system_keyspace;
 }

 namespace gms {
@@ -52,7 +51,6 @@ private:
    config _cfg;
    gms::gossiper& _gossiper;
    sharded<db::system_distributed_keyspace>& _sys_dist_ks;
-    sharded<db::system_keyspace>& _sys_ks;
    abort_source& _abort_src;
    const locator::shared_token_metadata& _token_metadata;
    gms::feature_service& _feature_service;
@@ -79,9 +77,7 @@ private:
    future<> _cdc_streams_rewrite_complete = make_ready_future<>();
 public:
    generation_service(config cfg, gms::gossiper&,
-            sharded<db::system_distributed_keyspace>&,
-            sharded<db::system_keyspace>& sys_ks,
-            abort_source&, const locator::shared_token_metadata&,
+            sharded<db::system_distributed_keyspace>&, abort_source&, const locator::shared_token_metadata&,
            gms::feature_service&, replica::database& db);

    future<> stop();
--- a/cdc/log.cc
+++ b/cdc/log.cc
@@ -20,7 +20,6 @@
 #include "cdc/cdc_options.hh"
 #include "cdc/change_visitor.hh"
 #include "cdc/metadata.hh"
-#include "cdc/cdc_partitioner.hh"
 #include "bytes.hh"
 #include "replica/database.hh"
 #include "db/schema_tables.hh"
@@ -31,6 +30,7 @@
 #include "service/storage_proxy.hh"
 #include "types/tuple.hh"
 #include "cql3/statements/select_statement.hh"
+#include "cql3/multi_column_relation.hh"
 #include "cql3/untyped_result_set.hh"
 #include "log.hh"
 #include "utils/rjson.hh"
@@ -59,7 +59,7 @@ using namespace std::chrono_literals;
 logging::logger cdc_log("cdc");

 namespace cdc {
-static schema_ptr create_log_schema(const schema&, std::optional<table_id> = {}, schema_ptr = nullptr);
+static schema_ptr create_log_schema(const schema&, std::optional<utils::UUID> = {}, schema_ptr = nullptr);
 }

 static constexpr auto cdc_group_name = "cdc";
@@ -169,8 +169,9 @@ public:

            // in seastar thread
            auto log_schema = create_log_schema(schema);
+            auto& keyspace = db.find_keyspace(schema.ks_name());

-            auto log_mut = db::schema_tables::make_create_table_mutations(log_schema, timestamp);
+            auto log_mut = db::schema_tables::make_create_table_mutations(keyspace.metadata(), log_schema, timestamp);

            mutations.insert(mutations.end(), std::make_move_iterator(log_mut.begin()), std::make_move_iterator(log_mut.end()));
        }
@@ -180,36 +181,36 @@ public:
        bool is_cdc = new_schema.cdc_options().enabled();
        bool was_cdc = old_schema.cdc_options().enabled();

-        // if we are turning off cdc we can skip this, since even if columns change etc,
-        // any writer should see cdc -> off together with any actual schema changes to
-        // base table, so should never try to write to non-existent log column etc.
-        // note that if user has set ttl=0 in cdc options, he is still reponsible
-        // for emptying the log. 
-        if (is_cdc) {
+        // we need to create or modify the log & stream schemas iff either we changed cdc status (was != is)
+        // or if cdc is on now unconditionally, since then any actual base schema changes will affect the column 
+        // etc.
+        if (was_cdc || is_cdc) {
            auto& db = _ctxt._proxy.get_db().local();
-            auto logname = log_name(old_schema.cf_name());
-            auto& keyspace = db.find_keyspace(old_schema.ks_name());
-            auto has_cdc_log = db.has_schema(old_schema.ks_name(), logname);
-            auto log_schema = has_cdc_log ? db.find_schema(old_schema.ks_name(), logname) : nullptr;

-            if (!was_cdc && has_cdc_log) {
-                // make sure the apparent log table really is a cdc log (not user table)
-                // we just check the partitioner - since user tables should _not_ be able
-                // set/use this.
-                if (log_schema->get_partitioner().name() != cdc::cdc_partitioner::classname) {
-                    // will throw
-                    check_that_cdc_log_table_does_not_exist(db, old_schema, logname);
-                }
+            if (!was_cdc) {
+                check_that_cdc_log_table_does_not_exist(db, new_schema, log_name(new_schema.cf_name()));
+            }
+            if (is_cdc) {
+                check_for_attempt_to_create_nested_cdc_log(db, new_schema);
+                ensure_that_table_has_no_counter_columns(new_schema);
            }

-            check_for_attempt_to_create_nested_cdc_log(db, new_schema);
-            ensure_that_table_has_no_counter_columns(new_schema);
+            auto logname = log_name(old_schema.cf_name());
+            auto& keyspace = db.find_keyspace(old_schema.ks_name());
+            auto log_schema = was_cdc ? db.find_column_family(old_schema.ks_name(), logname).schema() : nullptr;
+
+            if (!is_cdc) {
+                auto log_mut = db::schema_tables::make_drop_table_mutations(keyspace.metadata(), log_schema, timestamp);
+
+                mutations.insert(mutations.end(), std::make_move_iterator(log_mut.begin()), std::make_move_iterator(log_mut.end()));
+                return;
+            }

            auto new_log_schema = create_log_schema(new_schema, log_schema ? std::make_optional(log_schema->id()) : std::nullopt, log_schema);

            auto log_mut = log_schema 
                ? db::schema_tables::make_update_table_mutations(db, keyspace.metadata(), log_schema, new_log_schema, timestamp, false)
-                : db::schema_tables::make_create_table_mutations(new_log_schema, timestamp)
+                : db::schema_tables::make_create_table_mutations(keyspace.metadata(), new_log_schema, timestamp)
                ;

            mutations.insert(mutations.end(), std::make_move_iterator(log_mut.begin()), std::make_move_iterator(log_mut.end()));
@@ -217,16 +218,14 @@ public:
    }

    void on_before_drop_column_family(const schema& schema, std::vector<mutation>& mutations, api::timestamp_type timestamp) override {
-        auto logname = log_name(schema.cf_name());
-        auto& db = _ctxt._proxy.get_db().local();
-        auto has_cdc_log = db.has_schema(schema.ks_name(), logname);
-        if (has_cdc_log) {
-            auto log_schema = db.find_schema(schema.ks_name(), logname);
-            if (log_schema->get_partitioner().name() != cdc::cdc_partitioner::classname) {
-                return;
-            }
+        if (schema.cdc_options().enabled()) {
+            auto logname = log_name(schema.cf_name());
+            auto& db = _ctxt._proxy.get_db().local();
            auto& keyspace = db.find_keyspace(schema.ks_name());
+            auto log_schema = db.find_column_family(schema.ks_name(), logname).schema();
+
            auto log_mut = db::schema_tables::make_drop_table_mutations(keyspace.metadata(), log_schema, timestamp);
+
            mutations.insert(mutations.end(), std::make_move_iterator(log_mut.begin()), std::make_move_iterator(log_mut.end()));
        }
    }
@@ -408,7 +407,7 @@ static const sstring cdc_meta_column_prefix = "cdc$";
 static const sstring cdc_deleted_column_prefix = cdc_meta_column_prefix + "deleted_";
 static const sstring cdc_deleted_elements_column_prefix = cdc_meta_column_prefix + "deleted_elements_";

-bool is_log_name(const std::string_view& table_name) {
+static bool is_log_name(const std::string_view& table_name) {
    return boost::ends_with(table_name, cdc_log_suffix);
 }

@@ -485,9 +484,9 @@ bytes log_data_column_deleted_elements_name_bytes(const bytes& column_name) {
    return to_bytes(cdc_deleted_elements_column_prefix) + column_name;
 }

-static schema_ptr create_log_schema(const schema& s, std::optional<table_id> uuid, schema_ptr old) {
+static schema_ptr create_log_schema(const schema& s, std::optional<utils::UUID> uuid, schema_ptr old) {
    schema_builder b(s.ks_name(), log_name(s.cf_name()));
-    b.with_partitioner(cdc::cdc_partitioner::classname);
+    b.with_partitioner("com.scylladb.dht.CDCPartitioner");
    b.set_compaction_strategy(sstables::compaction_strategy_type::time_window);
    b.set_comment(fmt::format("CDC log for {}.{}", s.ks_name(), s.cf_name()));
    auto ttl_seconds = s.cdc_options().ttl();
@@ -1655,8 +1654,7 @@ public:

        auto partition_slice = query::partition_slice(std::move(bounds), std::move(static_columns), std::move(regular_columns), std::move(opts));
        const auto max_result_size = _ctx._proxy.get_max_result_size(partition_slice);
-        const auto tombstone_limit = query::tombstone_limit(_ctx._proxy.get_tombstone_limit());
-        auto command = ::make_lw_shared<query::read_command>(_schema->id(), _schema->version(), partition_slice, query::max_result_size(max_result_size), tombstone_limit, query::row_limit(row_limit));
+        auto command = ::make_lw_shared<query::read_command>(_schema->id(), _schema->version(), partition_slice, query::max_result_size(max_result_size), query::row_limit(row_limit));

        const auto select_cl = adjust_cl(write_cl);

--- a/cdc/log.hh
+++ b/cdc/log.hh
@@ -60,8 +60,6 @@ struct operation_result_tracker;
 class db_context;
 class metadata;

-bool is_log_name(const std::string_view& table_name);
-
 /// \brief CDC service, responsible for schema listeners
 ///
 /// CDC service will listen for schema changes and iff CDC is enabled/changed
--- a/checked-file-impl.hh
+++ b/checked-file-impl.hh
@@ -8,8 +8,8 @@

 #pragma once

-#include <seastar/core/file.hh>
-#include <seastar/core/seastar.hh>
+#include "seastar/core/file.hh"
+#include "seastar/core/seastar.hh"
 #include "utils/disk-error-handler.hh"

 #include "seastarx.hh"
--- a/client_data.cc
+++ b/client_data.cc
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) 2019-present ScyllaDB
- */
-
-/*
- * SPDX-License-Identifier: AGPL-3.0-or-later
- */
-
-#include "client_data.hh"
-#include <stdexcept>
-
-sstring to_string(client_type ct) {
-    switch (ct) {
-        case client_type::cql: return "cql";
-        case client_type::thrift: return "thrift";
-        case client_type::alternator: return "alternator";
-    }
-    throw std::runtime_error("Invalid client_type");
-}
-
-sstring to_string(client_connection_stage ccs) {
-    switch (ccs) {
-        case client_connection_stage::established: return "ESTABLISHED";
-        case client_connection_stage::authenticating: return "AUTHENTICATING";
-        case client_connection_stage::ready: return "READY";
-    }
-    throw std::runtime_error("Invalid client_connection_stage");
-}
--- a/client_data.hh
+++ b/client_data.hh
@@ -1,51 +0,0 @@
-/*
- * Copyright (C) 2019-present ScyllaDB
- */
-
-/*
- * SPDX-License-Identifier: AGPL-3.0-or-later
- */
-#pragma once
-
-#include <seastar/net/inet_address.hh>
-#include <seastar/core/sstring.hh>
-#include "seastarx.hh"
-
-#include <optional>
-
-enum class client_type {
-    cql = 0,
-    thrift,
-    alternator,
-};
-
-sstring to_string(client_type ct);
-
-enum class client_connection_stage {
-    established = 0,
-    authenticating,
-    ready,
-};
-
-sstring to_string(client_connection_stage ct);
-
-// Representation of a row in `system.clients'. std::optionals are for nullable cells.
-struct client_data {
-    net::inet_address ip;
-    int32_t port;
-    client_type ct;
-    client_connection_stage connection_stage = client_connection_stage::established;
-    int32_t shard_id;  /// ID of server-side shard which is processing the connection.
-
-    std::optional<sstring> driver_name;
-    std::optional<sstring> driver_version;
-    std::optional<sstring> hostname;
-    std::optional<int32_t> protocol_version;
-    std::optional<sstring> ssl_cipher_suite;
-    std::optional<bool> ssl_enabled;
-    std::optional<sstring> ssl_protocol;
-    std::optional<sstring> username;
-
-    sstring stage_str() const { return to_string(connection_stage); }
-    sstring client_type_str() const { return to_string(ct); }
-};
--- a/column_computation.hh
+++ b/column_computation.hh
@@ -13,22 +13,16 @@
 class schema;
 class partition_key;
 class clustering_row;
-struct atomic_cell_view;
-struct tombstone;
-
-namespace db::view {
-struct view_key_and_action;
-}

 class column_computation;
 using column_computation_ptr = std::unique_ptr<column_computation>;

 /*
 * Column computation represents a computation performed in order to obtain a value for a computed column.
- * Computed columns description is also available at docs/dev/system_schema_keyspace.md. They hold values
+ * Computed columns description is also available at docs/system_schema_keyspace.md. They hold values
 * not provided directly by the user, but rather computed: from other column values and possibly other sources.
 * This class is able to serialize/deserialize column computations and perform the computation itself,
- * based on given schema, and partition key. Responsibility for providing enough data
+ * based on given schema, partition key and clustering row. Responsibility for providing enough data
 * in the clustering row in order for computation to succeed belongs to the caller. In particular,
 * generating a value might involve performing a read-before-write if the computation is performed
 * on more values than are present in the update request.
@@ -42,19 +36,7 @@ public:
    virtual column_computation_ptr clone() const = 0;

    virtual bytes serialize() const = 0;
-    virtual bytes compute_value(const schema& schema, const partition_key& key) const = 0;
-    /*
-     * depends_on_non_primary_key_column for a column computation is needed to
-     * detect a case where the primary key of a materialized view depends on a
-     * non primary key column from the base table, but at the same time, the view
-     * itself doesn't have non-primary key columns. This is an issue, since as
-     * for now, it was assumed that no non-primary key columns in view schema
-     * meant that the update cannot change the primary key of the view, and
-     * therefore the update path can be simplified.
-     */
-    virtual bool depends_on_non_primary_key_column() const {
-        return false;
-    }
+    virtual bytes_opt compute_value(const schema& schema, const partition_key& key, const clustering_row& row) const = 0;
 };

 /*
@@ -72,7 +54,7 @@ public:
        return std::make_unique<legacy_token_column_computation>(*this);
    }
    virtual bytes serialize() const override;
-    virtual bytes compute_value(const schema& schema, const partition_key& key) const override;
+    virtual bytes_opt compute_value(const schema& schema, const partition_key& key, const clustering_row& row) const override;
 };


@@ -93,53 +75,5 @@ public:
        return std::make_unique<token_column_computation>(*this);
    }
    virtual bytes serialize() const override;
-    virtual bytes compute_value(const schema& schema, const partition_key& key) const override;
-};
-
-/*
- * collection_column_computation is used for a secondary index on a collection
- * column. In this case we don't have a single value to compute, but rather we
- * want to return multiple values (e.g., all the keys in the collection).
- * So this class does not implement the base class's compute_value() -
- * instead it implements a new method compute_collection_values(), which
- * can return multiple values. This new method is currently called only from
- * the materialized-view code which uses collection_column_computation.
- */
-class collection_column_computation final : public column_computation {
-    enum class kind {
-        keys,
-        values,
-        entries,
-    };
-    const bytes _collection_name;
-    const kind _kind;
-    collection_column_computation(const bytes& collection_name, kind kind) : _collection_name(collection_name), _kind(kind) {}
-
-    using collection_kv = std::pair<bytes_view, atomic_cell_view>;
-    void operate_on_collection_entries(
-            std::invocable<collection_kv*, collection_kv*, tombstone> auto&& old_and_new_row_func, const schema& schema,
-            const partition_key& key, const clustering_row& update, const std::optional<clustering_row>& existing) const;
-
-public:
-    static collection_column_computation for_keys(const bytes& collection_name) {
-        return {collection_name, kind::keys};
-    }
-    static collection_column_computation for_values(const bytes& collection_name) {
-        return {collection_name, kind::values};
-    }
-    static collection_column_computation for_entries(const bytes& collection_name) {
-        return {collection_name, kind::entries};
-    }
-    static column_computation_ptr for_target_type(std::string_view type, const bytes& collection_name);
-
-    virtual bytes serialize() const override;
-    virtual bytes compute_value(const schema& schema, const partition_key& key) const override;
-    virtual column_computation_ptr clone() const override {
-        return std::make_unique<collection_column_computation>(*this);
-    }
-    virtual bool depends_on_non_primary_key_column() const override {
-        return true;
-    }
-
-    std::vector<db::view::view_key_and_action> compute_values_with_action(const schema& schema, const partition_key& key, const clustering_row& row, const std::optional<clustering_row>& existing) const;
+    virtual bytes_opt compute_value(const schema& schema, const partition_key& key, const clustering_row& row) const override;
 };
--- a/compaction/compaction.cc
+++ b/compaction/compaction.cc
@@ -26,7 +26,6 @@
 #include <seastar/core/scheduling.hh>
 #include <seastar/core/coroutine.hh>
 #include <seastar/util/closeable.hh>
-#include <seastar/core/shared_ptr.hh>

 #include "sstables/sstables.hh"
 #include "sstables/sstable_writer.hh"
@@ -34,6 +33,7 @@
 #include "sstables/sstables_manager.hh"
 #include "compaction.hh"
 #include "compaction_manager.hh"
+#include "mutation_reader.hh"
 #include "schema.hh"
 #include "db/system_keyspace.hh"
 #include "service/priority_manager.hh"
@@ -48,11 +48,7 @@
 #include "utils/UUID_gen.hh"
 #include "utils/utf8.hh"
 #include "utils/fmt-compat.hh"
-#include "utils/error_injection.hh"
-#include "readers/filtering.hh"
-#include "readers/compacting.hh"
 #include "tombstone_gc.hh"
-#include "keys.hh"

 namespace sstables {

@@ -90,7 +86,7 @@ compaction_type to_compaction_type(sstring type_name) {
    throw std::runtime_error("Invalid Compaction Type Name");
 }

-std::string_view to_string(compaction_type type) {
+static std::string_view to_string(compaction_type type) {
    switch (type) {
    case compaction_type::Compaction: return "Compact";
    case compaction_type::Cleanup: return "Cleanup";
@@ -184,7 +180,7 @@ static api::timestamp_type get_max_purgeable_timestamp(const table_state& table_
 }

 static std::vector<shared_sstable> get_uncompacting_sstables(const table_state& table_s, std::vector<shared_sstable> sstables) {
-    auto all_sstables = boost::copy_range<std::vector<shared_sstable>>(*table_s.main_sstable_set().all());
+    auto all_sstables = boost::copy_range<std::vector<shared_sstable>>(*table_s.get_sstable_set().all());
    auto& compacted_undeleted = table_s.compacted_undeleted_sstables();
    all_sstables.insert(all_sstables.end(), compacted_undeleted.begin(), compacted_undeleted.end());
    boost::sort(all_sstables, [] (const shared_sstable& x, const shared_sstable& y) {
@@ -278,18 +274,6 @@ class compacted_fragments_writer {
    stop_func_t _stop_compaction_writer;
    std::optional<utils::observer<>> _stop_request_observer;
    bool _unclosed_partition = false;
-    struct partition_state {
-        dht::decorated_key_opt dk;
-        // Partition tombstone is saved for the purpose of replicating it to every fragment storing a partition pL.
-        // Then when reading from the SSTable run, we won't unnecessarily have to open >= 2 fragments, the one which
-        // contains the tombstone and another one(s) that has the partition slice being queried.
-        ::tombstone tombstone;
-        // Used to determine whether any active tombstones need closing at EOS.
-        ::tombstone current_emitted_tombstone;
-        // Track last emitted clustering row, which will be used to close active tombstone if splitting partition
-        position_in_partition last_pos = position_in_partition::before_all_clustered_rows();
-        bool is_splitting_partition = false;
-    } _current_partition;
 private:
    inline void maybe_abort_compaction();

@@ -299,13 +283,6 @@ private:
            consume_end_of_stream();
        });
    }
-
-    void stop_current_writer();
-    bool can_split_large_partition() const;
-    void track_last_position(position_in_partition_view pos);
-    void split_large_partition();
-    void do_consume_new_partition(const dht::decorated_key& dk);
-    stop_iteration do_consume_end_of_partition();
 public:
    explicit compacted_fragments_writer(compaction& c, creator_func_t cpw, stop_func_t scw)
            : _c(c)
@@ -324,7 +301,7 @@ public:

    void consume_new_partition(const dht::decorated_key& dk);

-    void consume(tombstone t);
+    void consume(tombstone t) { _compaction_writer->writer.consume(t); }
    stop_iteration consume(static_row&& sr, tombstone, bool) {
        maybe_abort_compaction();
        return _compaction_writer->writer.consume(std::move(sr));
@@ -332,11 +309,17 @@ public:
    stop_iteration consume(static_row&& sr) {
        return consume(std::move(sr), tombstone{}, bool{});
    }
-    stop_iteration consume(clustering_row&& cr, row_tombstone, bool);
+    stop_iteration consume(clustering_row&& cr, row_tombstone, bool) {
+        maybe_abort_compaction();
+        return _compaction_writer->writer.consume(std::move(cr));
+    }
    stop_iteration consume(clustering_row&& cr) {
        return consume(std::move(cr), row_tombstone{}, bool{});
    }
-    stop_iteration consume(range_tombstone_change&& rtc);
+    stop_iteration consume(range_tombstone&& rt) {
+        maybe_abort_compaction();
+        return _compaction_writer->writer.consume(std::move(rt));
+    }

    stop_iteration consume_end_of_partition();
    void consume_end_of_stream();
@@ -407,7 +390,7 @@ struct compaction_read_monitor_generator final : public read_monitor_generator {
    }
 private:
    table_state& _table_s;
-    std::unordered_map<generation_type, compaction_read_monitor> _generated_monitors;
+    std::unordered_map<int64_t, compaction_read_monitor> _generated_monitors;
 };

 class formatted_sstables_list {
@@ -447,7 +430,7 @@ protected:
    schema_ptr _schema;
    reader_permit _permit;
    std::vector<shared_sstable> _sstables;
-    std::vector<generation_type> _input_sstable_generations;
+    std::vector<unsigned long> _input_sstable_generations;
    // Unused sstables are tracked because if compaction is interrupted we can only delete them.
    // Deleting used sstables could potentially result in data loss.
    std::unordered_set<shared_sstable> _new_partial_sstables;
@@ -462,11 +445,10 @@ protected:
    uint64_t _estimated_partitions = 0;
    db::replay_position _rp;
    encoding_stats_collector _stats_collector;
-    bool _can_split_large_partition = false;
    bool _contains_multi_fragment_runs = false;
    mutation_source_metadata _ms_metadata = {};
    compaction_sstable_replacer_fn _replacer;
-    run_id _run_identifier;
+    utils::UUID _run_identifier;
    ::io_priority_class _io_priority;
    // optional clone of sstable set to be used for expiration purposes, so it will be set if expiration is enabled.
    std::optional<sstable_set> _sstable_set;
@@ -494,7 +476,6 @@ protected:
        , _type(descriptor.options.type())
        , _max_sstable_size(descriptor.max_sstable_bytes)
        , _sstable_level(descriptor.level)
-        , _can_split_large_partition(descriptor.can_split_large_partition)
        , _replacer(std::move(descriptor.replacer))
        , _run_identifier(descriptor.run_identifier)
        , _io_priority(descriptor.io_priority)
@@ -505,7 +486,7 @@ protected:
        for (auto& sst : _sstables) {
            _stats_collector.update(sst->get_encoding_stats_for_compaction());
        }
-        std::unordered_set<run_id> ssts_run_ids;
+        std::unordered_set<utils::UUID> ssts_run_ids;
        _contains_multi_fragment_runs = std::any_of(_sstables.begin(), _sstables.end(), [&ssts_run_ids] (shared_sstable& sst) {
            return !ssts_run_ids.insert(sst->run_identifier()).second;
        });
@@ -615,10 +596,6 @@ protected:
    const std::vector<shared_sstable>& used_garbage_collected_sstables() const {
        return _used_garbage_collected_sstables;
    }
-
-    bool enable_garbage_collected_sstable_writer() const noexcept {
-        return _contains_multi_fragment_runs && _max_sstable_size != std::numeric_limits<uint64_t>::max();
-    }
 public:
    compaction& operator=(const compaction&) = delete;
    compaction(const compaction&) = delete;
@@ -636,15 +613,13 @@ private:
        return _table_s.get_compaction_strategy().make_sstable_set(_schema);
    }

-    future<> setup() {
+    void setup() {
        auto ssts = make_lw_shared<sstables::sstable_set>(make_sstable_set_for_input());
        formatted_sstables_list formatted_msg;
        auto fully_expired = _table_s.fully_expired_sstables(_sstables, gc_clock::now());
        min_max_tracker<api::timestamp_type> timestamp_tracker;

-        _input_sstable_generations.reserve(_sstables.size());
        for (auto& sst : _sstables) {
-            co_await coroutine::maybe_yield();
            auto& sst_stats = sst->get_stats_metadata();
            timestamp_tracker.update(sst_stats.min_timestamp);
            timestamp_tracker.update(sst_stats.max_timestamp);
@@ -694,14 +669,13 @@ private:
    // to be compacted together.
    future<> consume_without_gc_writer(gc_clock::time_point compaction_time) {
        auto consumer = make_interposer_consumer([this] (flat_mutation_reader_v2 reader) mutable {
-            return seastar::async([this, reader = std::move(reader)] () mutable {
+            return seastar::async([this, reader = downgrade_to_v1(std::move(reader))] () mutable {
                auto close_reader = deferred_close(reader);
                auto cfc = compacted_fragments_writer(get_compacted_fragments_writer());
                reader.consume_in_thread(std::move(cfc));
            });
        });
-        const auto& gc_state = _table_s.get_tombstone_gc_state();
-        return consumer(make_compacting_reader(make_sstable_reader(), compaction_time, max_purgeable_func(), gc_state));
+        return consumer(upgrade_to_v2(make_compacting_reader(downgrade_to_v1(make_sstable_reader()), compaction_time, max_purgeable_func())));
    }

    future<> consume() {
@@ -718,20 +692,18 @@ private:
                auto close_reader = deferred_close(reader);

                if (enable_garbage_collected_sstable_writer()) {
-                    using compact_mutations = compact_for_compaction_v2<compacted_fragments_writer, compacted_fragments_writer>;
+                    using compact_mutations = compact_for_compaction<compacted_fragments_writer, compacted_fragments_writer>;
                    auto cfc = compact_mutations(*schema(), now,
                        max_purgeable_func(),
-                        _table_s.get_tombstone_gc_state(),
                        get_compacted_fragments_writer(),
                        get_gc_compacted_fragments_writer());

                    reader.consume_in_thread(std::move(cfc));
                    return;
                }
-                using compact_mutations = compact_for_compaction_v2<compacted_fragments_writer, noop_compacted_fragments_consumer>;
+                using compact_mutations = compact_for_compaction<compacted_fragments_writer, noop_compacted_fragments_consumer>;
                auto cfc = compact_mutations(*schema(), now,
                    max_purgeable_func(),
-                    _table_s.get_tombstone_gc_state(),
                    get_compacted_fragments_writer(),
                    noop_compacted_fragments_consumer());
                reader.consume_in_thread(std::move(cfc));
@@ -747,15 +719,12 @@ private:
    virtual bool use_interposer_consumer() const {
        return _table_s.get_compaction_strategy().use_interposer_consumer();
    }
-protected:
-    virtual compaction_result finish(std::chrono::time_point<db_clock> started_at, std::chrono::time_point<db_clock> ended_at) {
+
+    compaction_result finish(std::chrono::time_point<db_clock> started_at, std::chrono::time_point<db_clock> ended_at) {
        compaction_result ret {
            .new_sstables = std::move(_all_new_sstables),
-            .stats {
-                .ended_at = ended_at,
-                .start_size = _start_size,
-                .end_size = _end_size,
-            },
+            .ended_at = ended_at,
+            .end_size = _end_size,
        };

        auto ratio = double(_end_size) / double(_start_size);
@@ -779,11 +748,6 @@ protected:

        return ret;
    }
-private:
-    void on_interrupt(std::exception_ptr ex) {
-        log_info("{} of {} sstables interrupted due to: {}", report_start_desc(), _input_sstable_generations.size(), ex);
-        delete_sstables_for_interrupted_compaction();
-    }

    virtual std::string_view report_start_desc() const = 0;
    virtual std::string_view report_finish_desc() const = 0;
@@ -861,6 +825,10 @@ protected:
        log(log_level::trace, std::move(fmt), std::forward<Args>(args)...);
    }
 public:
+    bool enable_garbage_collected_sstable_writer() const noexcept {
+        return _contains_multi_fragment_runs && _max_sstable_size != std::numeric_limits<uint64_t>::max();
+    }
+
    static future<compaction_result> run(std::unique_ptr<compaction> c);

    friend class compacted_fragments_writer;
@@ -883,51 +851,7 @@ void compacted_fragments_writer::maybe_abort_compaction() {
    }
 }

-void compacted_fragments_writer::stop_current_writer() {
-    // stop sstable writer being currently used.
-    _stop_compaction_writer(&*_compaction_writer);
-    _compaction_writer = std::nullopt;
-}
-
-bool compacted_fragments_writer::can_split_large_partition() const {
-    return _c._can_split_large_partition;
-}
-
-void compacted_fragments_writer::track_last_position(position_in_partition_view pos) {
-    if (can_split_large_partition()) {
-        _current_partition.last_pos = pos;
-    }
-}
-
-void compacted_fragments_writer::split_large_partition() {
-    // Closes the active range tombstone if needed, before emitting partition end.
-    // after_key(last_pos) is used for both closing and re-opening the active tombstone, which
-    // will result in current fragment storing an inclusive end bound for last pos, and the
-    // next fragment storing an exclusive start bound for last pos. This is very important
-    // for not losing information on the range tombstone.
-    auto after_last_pos = position_in_partition::after_key(_current_partition.last_pos.key());
-    if (_current_partition.current_emitted_tombstone) {
-        auto rtc = range_tombstone_change(after_last_pos, tombstone{});
-        _c.log_debug("Closing active tombstone {} with {} for partition {}", _current_partition.current_emitted_tombstone, rtc, *_current_partition.dk);
-        _compaction_writer->writer.consume(std::move(rtc));
-    }
-    _c.log_debug("Splitting large partition {} in order to respect SSTable size limit of {}", *_current_partition.dk, pretty_printed_data_size(_c._max_sstable_size));
-    // Close partition in current writer, and open it again in a new writer.
-    do_consume_end_of_partition();
-    stop_current_writer();
-    do_consume_new_partition(*_current_partition.dk);
-    // Replicate partition tombstone to every fragment, allowing the SSTable run reader
-    // to open a single fragment during the read.
-    if (_current_partition.tombstone) {
-        consume(_current_partition.tombstone);
-    }
-    if (_current_partition.current_emitted_tombstone) {
-        _compaction_writer->writer.consume(range_tombstone_change(after_last_pos, _current_partition.current_emitted_tombstone));
-    }
-    _current_partition.is_splitting_partition = false;
-}
-
-void compacted_fragments_writer::do_consume_new_partition(const dht::decorated_key& dk) {
+void compacted_fragments_writer::consume_new_partition(const dht::decorated_key& dk) {
    maybe_abort_compaction();
    if (!_compaction_writer) {
        _compaction_writer = _create_compaction_writer(dk);
@@ -935,55 +859,17 @@ void compacted_fragments_writer::do_consume_new_partition(const dht::decorated_k

    _c.on_new_partition();
    _compaction_writer->writer.consume_new_partition(dk);
+    _c._cdata.total_keys_written++;
    _unclosed_partition = true;
 }

-stop_iteration compacted_fragments_writer::do_consume_end_of_partition() {
-    _unclosed_partition = false;
-    return _compaction_writer->writer.consume_end_of_partition();
-}
-
-void compacted_fragments_writer::consume_new_partition(const dht::decorated_key& dk) {
-    _current_partition = {
-        .dk = dk,
-        .tombstone = tombstone(),
-        .current_emitted_tombstone = tombstone(),
-        .last_pos = position_in_partition(position_in_partition::partition_start_tag_t()),
-        .is_splitting_partition = false
-    };
-    do_consume_new_partition(dk);
-    _c._cdata.total_keys_written++;
-}
-
-void compacted_fragments_writer::consume(tombstone t) {
-    _current_partition.tombstone = t;
-    _compaction_writer->writer.consume(t);
-}
-
-stop_iteration compacted_fragments_writer::consume(clustering_row&& cr, row_tombstone, bool) {
-    maybe_abort_compaction();
-    if (_current_partition.is_splitting_partition) [[unlikely]] {
-        split_large_partition();
-    }
-    track_last_position(cr.position());
-    auto ret = _compaction_writer->writer.consume(std::move(cr));
-    if (can_split_large_partition() && ret == stop_iteration::yes) [[unlikely]] {
-        _current_partition.is_splitting_partition = true;
-    }
-    return stop_iteration::no;
-}
-
-stop_iteration compacted_fragments_writer::consume(range_tombstone_change&& rtc) {
-    maybe_abort_compaction();
-    _current_partition.current_emitted_tombstone = rtc.tombstone();
-    track_last_position(rtc.position());
-    return _compaction_writer->writer.consume(std::move(rtc));
-}
-
 stop_iteration compacted_fragments_writer::consume_end_of_partition() {
-    auto ret = do_consume_end_of_partition();
+    auto ret = _compaction_writer->writer.consume_end_of_partition();
+    _unclosed_partition = false;
    if (ret == stop_iteration::yes) {
-        stop_current_writer();
+        // stop sstable writer being currently used.
+        _stop_compaction_writer(&*_compaction_writer);
+        _compaction_writer = std::nullopt;
    }
    return ret;
 }
@@ -1002,7 +888,7 @@ public:
    }

    virtual sstables::sstable_set make_sstable_set_for_input() const override {
-        return sstables::make_partitioned_sstable_set(_schema, false);
+        return sstables::make_partitioned_sstable_set(_schema, make_lw_shared<sstable_list>(sstable_list{}), false);
    }

    flat_mutation_reader_v2 make_sstable_reader() const override {
@@ -1128,7 +1014,7 @@ private:
            _new_unused_sstables.insert(_new_unused_sstables.end(), unused_gc_sstables.begin(), unused_gc_sstables.end());

            auto exhausted_ssts = std::vector<shared_sstable>(exhausted, _sstables.end());
-            log_debug("Replacing earlier exhausted sstable(s) {} by new sstable(s) {}", formatted_sstables_list(exhausted_ssts, false), formatted_sstables_list(_new_unused_sstables, true));
+            log_debug("Replacing earlier exhausted sstable(s) {} by new sstable {}", formatted_sstables_list(exhausted_ssts, false), sst->get_filename());
            _replacer(get_compaction_completion_desc(exhausted_ssts, std::move(_new_unused_sstables)));
            _sstables.erase(exhausted, _sstables.end());
            _monitor_generator.remove_exhausted_sstables(exhausted_ssts);
@@ -1195,13 +1081,13 @@ class cleanup_compaction final : public regular_compaction {
        }
    };

-    owned_ranges_ptr _owned_ranges;
+    const dht::token_range_vector _owned_ranges;
    incremental_owned_ranges_checker _owned_ranges_checker;
 private:
    // Called in a seastar thread
    dht::partition_range_vector
    get_ranges_for_invalidation(const std::vector<shared_sstable>& sstables) {
-        auto owned_ranges = dht::to_partition_ranges(*_owned_ranges, utils::can_yield::yes);
+        auto owned_ranges = dht::to_partition_ranges(_owned_ranges, utils::can_yield::yes);

        auto non_owned_ranges = boost::copy_range<dht::partition_range_vector>(sstables
                | boost::adaptors::transformed([] (const shared_sstable& sst) {
@@ -1233,10 +1119,10 @@ protected:
    }

 private:
-    cleanup_compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata, owned_ranges_ptr owned_ranges)
+    cleanup_compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata, dht::token_range_vector owned_ranges)
        : regular_compaction(table_s, std::move(descriptor), cdata)
        , _owned_ranges(std::move(owned_ranges))
-        , _owned_ranges_checker(*_owned_ranges)
+        , _owned_ranges_checker(_owned_ranges)
    {
    }

@@ -1258,7 +1144,7 @@ public:
        return "Cleaned";
    }

-    flat_mutation_reader_v2::filter make_partition_filter() const {
+    flat_mutation_reader::filter make_partition_filter() const {
        return [this] (const dht::decorated_key& dk) {
 #ifdef SEASTAR_DEBUG
            // sstables should never be shared with other shards at this point.
@@ -1284,9 +1170,9 @@ public:
                type,
                schema.ks_name(),
                schema.cf_name(),
-                new_key.key().with_schema(schema),
+                partition_key_to_string(new_key.key(), schema),
                new_key,
-                current_key.key().with_schema(schema),
+                partition_key_to_string(current_key.key(), schema),
                current_key,
                action.empty() ? "" : "; ",
                action);
@@ -1299,9 +1185,9 @@ public:
                type,
                schema.ks_name(),
                schema.cf_name(),
-                new_key.key().with_schema(schema),
+                partition_key_to_string(new_key.key(), schema),
                new_key,
-                current_key.key().with_schema(schema),
+                partition_key_to_string(current_key.key(), schema),
                current_key,
                action.empty() ? "" : "; ",
                action);
@@ -1319,7 +1205,7 @@ public:
                mf.mutation_fragment_kind(),
                mf.has_key() ? format(" with key {}", mf.key().with_schema(schema)) : "",
                mf.position(),
-                key.key().with_schema(schema),
+                partition_key_to_string(key.key(), schema),
                key,
                prev_pos.region(),
                prev_pos.has_key() ? format(" with key {}", prev_pos.key().with_schema(schema)) : "",
@@ -1331,10 +1217,14 @@ public:
        const auto& schema = validator.schema();
        const auto& key = validator.previous_partition_key();
        clogger.error("[{} compaction {}.{}] Invalid end-of-stream, last partition {} ({}) didn't end with a partition-end fragment{}{}",
-                type, schema.ks_name(), schema.cf_name(), key.key().with_schema(schema), key, action.empty() ? "" : "; ", action);
+                type, schema.ks_name(), schema.cf_name(), partition_key_to_string(key.key(), schema), key, action.empty() ? "" : "; ", action);
    }

 private:
+    static sstring partition_key_to_string(const partition_key& key, const ::schema& s) {
+        sstring ret = format("{}", key.with_schema(s));
+        return utils::utf8::validate((const uint8_t*)ret.data(), ret.size()) ? ret : "<non-utf8-key>";
+    }

    class reader : public flat_mutation_reader_v2::impl {
        using skip = bool_class<class skip_tag>;
@@ -1343,23 +1233,18 @@ private:
        flat_mutation_reader_v2 _reader;
        mutation_fragment_stream_validator _validator;
        bool _skip_to_next_partition = false;
-        uint64_t& _validation_errors;

    private:
-        void maybe_abort_scrub(std::function<void()> report_error) {
+        void maybe_abort_scrub() {
            if (_scrub_mode == compaction_type_options::scrub::mode::abort) {
-                report_error();
                throw compaction_aborted_exception(_schema->ks_name(), _schema->cf_name(), "scrub compaction found invalid data");
            }
-            ++_validation_errors;
        }

        void on_unexpected_partition_start(const mutation_fragment_v2& ps) {
-            auto report_fn = [this, &ps] (std::string_view action = "") {
-                report_invalid_partition_start(compaction_type::Scrub, _validator, ps.as_partition_start().key(), action);
-            };
-            maybe_abort_scrub(report_fn);
-            report_fn("Rectifying by adding assumed missing partition-end");
+            maybe_abort_scrub();
+            report_invalid_partition_start(compaction_type::Scrub, _validator, ps.as_partition_start().key(),
+                    "Rectifying by adding assumed missing partition-end");

            auto pe = mutation_fragment_v2(*_schema, _permit, partition_end{});
            if (!_validator(pe)) {
@@ -1379,26 +1264,20 @@ private:
        }

        skip on_invalid_partition(const dht::decorated_key& new_key) {
-            auto report_fn = [this, &new_key] (std::string_view action = "") {
-                report_invalid_partition(compaction_type::Scrub, _validator, new_key, action);
-            };
-            maybe_abort_scrub(report_fn);
+            maybe_abort_scrub();
            if (_scrub_mode == compaction_type_options::scrub::mode::segregate) {
-                report_fn("Detected");
+                report_invalid_partition(compaction_type::Scrub, _validator, new_key, "Detected");
                _validator.reset(new_key);
                // Let the segregating interposer consumer handle this.
                return skip::no;
            }
-            report_fn("Skipping");
+            report_invalid_partition(compaction_type::Scrub, _validator, new_key, "Skipping");
            _skip_to_next_partition = true;
            return skip::yes;
        }

        skip on_invalid_mutation_fragment(const mutation_fragment_v2& mf) {
-            auto report_fn = [this, &mf] (std::string_view action = "") {
-                report_invalid_mutation_fragment(compaction_type::Scrub, _validator, mf, "");
-            };
-            maybe_abort_scrub(report_fn);
+            maybe_abort_scrub();

            const auto& key = _validator.previous_partition_key();

@@ -1413,7 +1292,8 @@ private:
            // The only case a partition end is invalid is when it comes after
            // another partition end, and we can just drop it in that case.
            if (!mf.is_end_of_partition() && _scrub_mode == compaction_type_options::scrub::mode::segregate) {
-                report_fn("Injecting partition start/end to segregate out-of-order fragment");
+                report_invalid_mutation_fragment(compaction_type::Scrub, _validator, mf,
+                        "Injecting partition start/end to segregate out-of-order fragment");
                push_mutation_fragment(*_schema, _permit, partition_end{});

                // We loose the partition tombstone if any, but it will be
@@ -1426,23 +1306,19 @@ private:
                return skip::no;
            }

-            report_fn("Skipping");
+            report_invalid_mutation_fragment(compaction_type::Scrub, _validator, mf, "Skipping");

            return skip::yes;
        }

        void on_invalid_end_of_stream() {
-            auto report_fn = [this] (std::string_view action = "") {
-                report_invalid_end_of_stream(compaction_type::Scrub, _validator, action);
-            };
-            maybe_abort_scrub(report_fn);
+            maybe_abort_scrub();
            // Handle missing partition_end
            push_mutation_fragment(mutation_fragment_v2(*_schema, _permit, partition_end{}));
-            report_fn("Rectifying by adding missing partition-end to the end of the stream");
+            report_invalid_end_of_stream(compaction_type::Scrub, _validator, "Rectifying by adding missing partition-end to the end of the stream");
        }

        void fill_buffer_from_underlying() {
-            utils::get_local_injector().inject("rest_api_keyspace_scrub_abort", [] { throw compaction_aborted_exception("", "", "scrub compaction found invalid data"); });
            while (!_reader.is_buffer_empty() && !is_buffer_full()) {
                auto mf = _reader.pop_mutation_fragment();
                if (mf.is_partition_start()) {
@@ -1482,12 +1358,11 @@ private:
        }

    public:
-        reader(flat_mutation_reader_v2 underlying, compaction_type_options::scrub::mode scrub_mode, uint64_t& validation_errors)
+        reader(flat_mutation_reader_v2 underlying, compaction_type_options::scrub::mode scrub_mode)
            : impl(underlying.schema(), underlying.permit())
            , _scrub_mode(scrub_mode)
            , _reader(std::move(underlying))
            , _validator(*_schema)
-            , _validation_errors(validation_errors)
        { }
        virtual future<> fill_buffer() override {
            if (_end_of_stream) {
@@ -1535,7 +1410,6 @@ private:
    std::string _scrub_start_description;
    mutable std::string _scrub_finish_description;
    uint64_t _bucket_count = 0;
-    mutable uint64_t _validation_errors = 0;

 public:
    scrub_compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata, compaction_type_options::scrub options)
@@ -1558,7 +1432,7 @@ public:

    flat_mutation_reader_v2 make_sstable_reader() const override {
        auto crawling_reader = _compacting->make_crawling_reader(_schema, _permit, _io_priority, nullptr);
-        return make_flat_mutation_reader_v2<reader>(std::move(crawling_reader), _options.operation_mode, _validation_errors);
+        return make_flat_mutation_reader_v2<reader>(std::move(crawling_reader), _options.operation_mode);
    }

    uint64_t partitions_per_sstable() const override {
@@ -1589,17 +1463,12 @@ public:
        return _options.operation_mode == compaction_type_options::scrub::mode::segregate;
    }

-    compaction_result finish(std::chrono::time_point<db_clock> started_at, std::chrono::time_point<db_clock> ended_at) override {
-        auto ret = compaction::finish(started_at, ended_at);
-        ret.stats.validation_errors = _validation_errors;
-        return ret;
-    }
-
-    friend flat_mutation_reader_v2 make_scrubbing_reader(flat_mutation_reader_v2 rd, compaction_type_options::scrub::mode scrub_mode, uint64_t& validation_errors);
+    friend flat_mutation_reader_v2 make_scrubbing_reader(flat_mutation_reader_v2 rd, compaction_type_options::scrub::mode scrub_mode);
+    friend flat_mutation_reader make_scrubbing_reader(flat_mutation_reader rd, compaction_type_options::scrub::mode scrub_mode);
 };

-flat_mutation_reader_v2 make_scrubbing_reader(flat_mutation_reader_v2 rd, compaction_type_options::scrub::mode scrub_mode, uint64_t& validation_errors) {
-    return make_flat_mutation_reader_v2<scrub_compaction::reader>(std::move(rd), scrub_mode, validation_errors);
+flat_mutation_reader_v2 make_scrubbing_reader(flat_mutation_reader_v2 rd, compaction_type_options::scrub::mode scrub_mode) {
+    return make_flat_mutation_reader_v2<scrub_compaction::reader>(std::move(rd), scrub_mode);
 }

 class resharding_compaction final : public compaction {
@@ -1617,7 +1486,7 @@ class resharding_compaction final : public compaction {
        uint64_t estimated_partitions = 0;
    };
    std::vector<estimated_values> _estimation_per_shard;
-    std::vector<run_id> _run_identifiers;
+    std::vector<utils::UUID> _run_identifiers;
 private:
    // return estimated partitions per sstable for a given shard
    uint64_t partitions_per_sstable(shard_id s) const {
@@ -1641,7 +1510,7 @@ public:
            }
        }
        for (auto i : boost::irange(0u, smp::count)) {
-            _run_identifiers[i] = run_id::create_random_id();
+            _run_identifiers[i] = utils::make_random_uuid();
        }
    }

@@ -1698,14 +1567,14 @@ public:

 future<compaction_result> compaction::run(std::unique_ptr<compaction> c) {
    return seastar::async([c = std::move(c)] () mutable {
-        c->setup().get();
+        c->setup();
        auto consumer = c->consume();

        auto start_time = db_clock::now();
        try {
           consumer.get();
        } catch (...) {
-            c->on_interrupt(std::current_exception());
+            c->delete_sstables_for_interrupted_compaction();
            c = nullptr; // make sure writers are stopped while running in thread context. This is because of calls to file.close().get();
            throw;
        }
@@ -1757,10 +1626,10 @@ static std::unique_ptr<compaction> make_compaction(table_state& table_s, sstable
    return descriptor.options.visit(visitor_factory);
 }

-future<uint64_t> scrub_validate_mode_validate_reader(flat_mutation_reader_v2 reader, const compaction_data& cdata) {
+future<bool> scrub_validate_mode_validate_reader(flat_mutation_reader_v2 reader, const compaction_data& cdata) {
    auto schema = reader.schema();

-    uint64_t errors = 0;
+    bool valid = true;
    std::exception_ptr ex;

    try {
@@ -1779,24 +1648,24 @@ future<uint64_t> scrub_validate_mode_validate_reader(flat_mutation_reader_v2 rea
                if (!validator(mf)) {
                    scrub_compaction::report_invalid_partition_start(compaction_type::Scrub, validator, ps.key());
                    validator.reset(mf);
-                    ++errors;
+                    valid = false;
                }
                if (!validator(ps.key())) {
                    scrub_compaction::report_invalid_partition(compaction_type::Scrub, validator, ps.key());
                    validator.reset(ps.key());
-                    ++errors;
+                    valid = false;
                }
            } else {
                if (!validator(mf)) {
                    scrub_compaction::report_invalid_mutation_fragment(compaction_type::Scrub, validator, mf);
                    validator.reset(mf);
-                    ++errors;
+                    valid = false;
                }
            }
        }
        if (!validator.on_end_of_stream()) {
            scrub_compaction::report_invalid_end_of_stream(compaction_type::Scrub, validator);
-            ++errors;
+            valid = false;
        }
    } catch (...) {
        ex = std::current_exception();
@@ -1808,14 +1677,14 @@ future<uint64_t> scrub_validate_mode_validate_reader(flat_mutation_reader_v2 rea
        co_return coroutine::exception(std::move(ex));
    }

-    co_return errors;
+    co_return valid;
 }

 static future<compaction_result> scrub_sstables_validate_mode(sstables::compaction_descriptor descriptor, compaction_data& cdata, table_state& table_s) {
    auto schema = table_s.schema();

    formatted_sstables_list sstables_list_msg;
-    auto sstables = make_lw_shared<sstables::sstable_set>(sstables::make_partitioned_sstable_set(schema, false));
+    auto sstables = make_lw_shared<sstables::sstable_set>(sstables::make_partitioned_sstable_set(schema, make_lw_shared<sstable_list>(sstable_list{}), false));
    for (const auto& sst : descriptor.sstables) {
        sstables_list_msg += sst;
        sstables->insert(sst);
@@ -1826,11 +1695,11 @@ static future<compaction_result> scrub_sstables_validate_mode(sstables::compacti
    auto permit = table_s.make_compaction_reader_permit();
    auto reader = sstables->make_crawling_reader(schema, permit, descriptor.io_priority, nullptr);

-    const auto validation_errors = co_await scrub_validate_mode_validate_reader(std::move(reader), cdata);
+    const auto valid = co_await scrub_validate_mode_validate_reader(std::move(reader), cdata);

-    clogger.info("Finished scrubbing in validate mode {} - sstable(s) are {}", sstables_list_msg, validation_errors == 0 ? "valid" : "invalid");
+    clogger.info("Finished scrubbing in validate mode {} - sstable(s) are {}", sstables_list_msg, valid ? "valid" : "invalid");

-    if (validation_errors != 0) {
+    if (!valid) {
        for (auto& sst : *sstables->all()) {
            co_await sst->move_to_quarantine();
        }
@@ -1838,10 +1707,7 @@ static future<compaction_result> scrub_sstables_validate_mode(sstables::compacti

    co_return compaction_result {
        .new_sstables = {},
-        .stats = {
-            .ended_at = db_clock::now(),
-            .validation_errors = validation_errors,
-        },
+        .ended_at = db_clock::now(),
    };
 }

@@ -1874,26 +1740,26 @@ get_fully_expired_sstables(const table_state& table_s, const std::vector<sstable
    int64_t min_timestamp = std::numeric_limits<int64_t>::max();

    for (auto& sstable : overlapping) {
-        auto gc_before = sstable->get_gc_before_for_fully_expire(compaction_time, table_s.get_tombstone_gc_state());
+        auto gc_before = sstable->get_gc_before_for_fully_expire(compaction_time);
        if (sstable->get_max_local_deletion_time() >= gc_before) {
            min_timestamp = std::min(min_timestamp, sstable->get_stats_metadata().min_timestamp);
        }
    }

-    auto compacted_undeleted_gens = boost::copy_range<std::unordered_set<generation_type>>(table_s.compacted_undeleted_sstables()
+    auto compacted_undeleted_gens = boost::copy_range<std::unordered_set<int64_t>>(table_s.compacted_undeleted_sstables()
        | boost::adaptors::transformed(std::mem_fn(&sstables::sstable::generation)));
    auto has_undeleted_ancestor = [&compacted_undeleted_gens] (auto& candidate) {
        // Get ancestors from sstable which is empty after restart. It works for this purpose because
        // we only need to check that a sstable compacted *in this instance* hasn't an ancestor undeleted.
        // Not getting it from sstable metadata because mc format hasn't it available.
-        return boost::algorithm::any_of(candidate->compaction_ancestors(), [&compacted_undeleted_gens] (const generation_type& gen) {
+        return boost::algorithm::any_of(candidate->compaction_ancestors(), [&compacted_undeleted_gens] (auto gen) {
            return compacted_undeleted_gens.contains(gen);
        });
    };

    // SStables that do not contain live data is added to list of possibly expired sstables.
    for (auto& candidate : compacting) {
-        auto gc_before = candidate->get_gc_before_for_fully_expire(compaction_time, table_s.get_tombstone_gc_state());
+        auto gc_before = candidate->get_gc_before_for_fully_expire(compaction_time);
        clogger.debug("Checking if candidate of generation {} and max_deletion_time {} is expired, gc_before is {}",
                    candidate->generation(), candidate->get_stats_metadata().max_local_deletion_time, gc_before);
        // A fully expired sstable which has an ancestor undeleted shouldn't be compacted because
@@ -1924,11 +1790,7 @@ get_fully_expired_sstables(const table_state& table_s, const std::vector<sstable
 }

 unsigned compaction_descriptor::fan_in() const {
-    return boost::copy_range<std::unordered_set<run_id>>(sstables | boost::adaptors::transformed(std::mem_fn(&sstables::sstable::run_identifier))).size();
-}
-
-uint64_t compaction_descriptor::sstables_size() const {
-    return boost::accumulate(sstables | boost::adaptors::transformed(std::mem_fn(&sstables::sstable::data_size)), uint64_t(0));
+    return boost::copy_range<std::unordered_set<utils::UUID>>(sstables | boost::adaptors::transformed(std::mem_fn(&sstables::sstable::run_identifier))).size();
 }

 }
--- a/compaction/compaction.hh
+++ b/compaction/compaction.hh
@@ -17,8 +17,8 @@
 #include "utils/UUID.hh"
 #include "table_state.hh"
 #include <seastar/core/thread.hh>
-#include <seastar/core/abort_source.hh>

+class flat_mutation_reader;
 using namespace compaction;

 namespace sstables {
@@ -40,19 +40,9 @@ public:
    friend std::ostream& operator<<(std::ostream&, pretty_printed_throughput);
 };

-// Return the name of the compaction type
-// as used over the REST api, e.g. "COMPACTION" or "CLEANUP".
 sstring compaction_name(compaction_type type);
-
-// Reverse map the name of the compaction type
-// as used over the REST api, e.g. "COMPACTION" or "CLEANUP",
-// to the compaction_type enum code.
 compaction_type to_compaction_type(sstring type_name);

-// Return a string respresenting the compaction type
-// as a verb for logging purposes, e.g. "Compact" or "Cleanup".
-std::string_view to_string(compaction_type type);
-
 struct compaction_info {
    utils::UUID compaction_uuid;
    compaction_type type = compaction_type::Compaction;
@@ -66,7 +56,6 @@ struct compaction_data {
    uint64_t total_partitions = 0;
    uint64_t total_keys_written = 0;
    sstring stop_requested;
-    abort_source abort;
    utils::UUID compaction_uuid;
    unsigned compaction_fan_in = 0;
    struct replacement {
@@ -81,33 +70,13 @@ struct compaction_data {

    void stop(sstring reason) {
        stop_requested = std::move(reason);
-        abort.request_abort();
-    }
-};
-
-struct compaction_stats {
-    std::chrono::time_point<db_clock> ended_at;
-    uint64_t start_size = 0;
-    uint64_t end_size = 0;
-    uint64_t validation_errors = 0;
-
-    compaction_stats& operator+=(const compaction_stats& r) {
-        ended_at = std::max(ended_at, r.ended_at);
-        start_size += r.start_size;
-        end_size += r.end_size;
-        validation_errors += r.validation_errors;
-        return *this;
-    }
-    friend compaction_stats operator+(const compaction_stats& l, const compaction_stats& r) {
-        auto tmp = l;
-        tmp += r;
-        return tmp;
    }
 };

 struct compaction_result {
    std::vector<sstables::shared_sstable> new_sstables;
-    compaction_stats stats;
+    std::chrono::time_point<db_clock> ended_at;
+    uint64_t end_size = 0;
 };

 // Compact a list of N sstables into M sstables.
@@ -126,9 +95,9 @@ std::unordered_set<sstables::shared_sstable>
 get_fully_expired_sstables(const table_state& table_s, const std::vector<sstables::shared_sstable>& compacting, gc_clock::time_point gc_before);

 // For tests, can drop after we virtualize sstables.
-flat_mutation_reader_v2 make_scrubbing_reader(flat_mutation_reader_v2 rd, compaction_type_options::scrub::mode scrub_mode, uint64_t& validation_errors);
+flat_mutation_reader_v2 make_scrubbing_reader(flat_mutation_reader_v2 rd, compaction_type_options::scrub::mode scrub_mode);

 // For tests, can drop after we virtualize sstables.
-future<uint64_t> scrub_validate_mode_validate_reader(flat_mutation_reader_v2 rd, const compaction_data& info);
+future<bool> scrub_validate_mode_validate_reader(flat_mutation_reader_v2 rd, const compaction_data& info);

 }
--- a/compaction/compaction_backlog_manager.hh
+++ b/compaction/compaction_backlog_manager.hh
@@ -60,7 +60,8 @@ public:
    using ongoing_compactions = std::unordered_map<sstables::shared_sstable, backlog_read_progress_manager*>;

    struct impl {
-        virtual void replace_sstables(std::vector<sstables::shared_sstable> old_ssts, std::vector<sstables::shared_sstable> new_ssts) = 0;
+        virtual void add_sstable(sstables::shared_sstable sst) = 0;
+        virtual void remove_sstable(sstables::shared_sstable sst) = 0;
        virtual double backlog(const ongoing_writes& ow, const ongoing_compactions& oc) const = 0;
        virtual ~impl() { }
    };
@@ -71,21 +72,22 @@ public:
    ~compaction_backlog_tracker();

    double backlog() const;
-    void replace_sstables(const std::vector<sstables::shared_sstable>& old_ssts, const std::vector<sstables::shared_sstable>& new_ssts);
+    void add_sstable(sstables::shared_sstable sst);
+    void remove_sstable(sstables::shared_sstable sst);
    void register_partially_written_sstable(sstables::shared_sstable sst, backlog_write_progress_manager& wp);
    void register_compacting_sstable(sstables::shared_sstable sst, backlog_read_progress_manager& rp);
    void transfer_ongoing_charges(compaction_backlog_tracker& new_bt, bool move_read_charges = true);
    void revert_charges(sstables::shared_sstable sst);
-
-    void disable() {
-        _impl = {};
-        _ongoing_writes = {};
-        _ongoing_compactions = {};
-    }
 private:
    // Returns true if this SSTable can be added or removed from the tracker.
    bool sstable_belongs_to_tracker(const sstables::shared_sstable& sst);
-    bool disabled() const noexcept { return !_impl; }
+
+    void disable() {
+        _disabled = true;
+        _ongoing_writes = {};
+        _ongoing_compactions = {};
+    }
+    bool _disabled = false;
    std::unique_ptr<impl> _impl;
    // We keep track of this so that we can transfer to a new tracker if the compaction strategy is
    // changed in the middle of a compaction.
--- a/compaction/compaction_descriptor.hh
+++ b/compaction/compaction_descriptor.hh
@@ -14,22 +14,12 @@
 #include <variant>
 #include <seastar/core/smp.hh>
 #include <seastar/core/file.hh>
-#include "sstables/types_fwd.hh"
+#include "sstables/shared_sstable.hh"
 #include "sstables/sstable_set.hh"
 #include "utils/UUID.hh"
 #include "dht/i_partitioner.hh"
 #include "compaction_weight_registration.hh"

-namespace compaction {
-
-using owned_ranges_ptr = lw_shared_ptr<const dht::token_range_vector>;
-
-inline owned_ranges_ptr make_owned_ranges_ptr(dht::token_range_vector&& ranges) {
-    return make_lw_shared<const dht::token_range_vector>(std::move(ranges));
-}
-
-} // namespace compaction
-
 namespace sstables {

 enum class compaction_type {
@@ -64,10 +54,10 @@ public:
    struct regular {
    };
    struct cleanup {
-        compaction::owned_ranges_ptr owned_ranges;
+        dht::token_range_vector owned_ranges;
    };
    struct upgrade {
-        compaction::owned_ranges_ptr owned_ranges;
+        dht::token_range_vector owned_ranges;
    };
    struct scrub {
        enum class mode {
@@ -112,11 +102,11 @@ public:
        return compaction_type_options(regular{});
    }

-    static compaction_type_options make_cleanup(compaction::owned_ranges_ptr owned_ranges) {
+    static compaction_type_options make_cleanup(dht::token_range_vector&& owned_ranges) {
        return compaction_type_options(cleanup{std::move(owned_ranges)});
    }

-    static compaction_type_options make_upgrade(compaction::owned_ranges_ptr owned_ranges) {
+    static compaction_type_options make_upgrade(dht::token_range_vector&& owned_ranges) {
        return compaction_type_options(upgrade{std::move(owned_ranges)});
    }

@@ -153,10 +143,10 @@ struct compaction_descriptor {
    int level;
    // Threshold size for sstable(s) to be created.
    uint64_t max_sstable_bytes;
-    // Can split large partitions at clustering boundary.
-    bool can_split_large_partition = false;
    // Run identifier of output sstables.
-    sstables::run_id run_identifier;
+    utils::UUID run_identifier;
+    // Calls compaction manager's task for this compaction to release reference to exhausted sstables.
+    std::function<void(const std::vector<shared_sstable>& exhausted_sstables)> release_exhausted;
    // The options passed down to the compaction code.
    // This also selects the kind of compaction to do.
    compaction_type_options options = compaction_type_options::make_regular();
@@ -175,12 +165,14 @@ struct compaction_descriptor {
    static constexpr uint64_t default_max_sstable_bytes = std::numeric_limits<uint64_t>::max();

    explicit compaction_descriptor(std::vector<sstables::shared_sstable> sstables,
+                                   std::optional<sstables::sstable_set> all_sstables_snapshot,
                                   ::io_priority_class io_priority,
                                   int level = default_level,
                                   uint64_t max_sstable_bytes = default_max_sstable_bytes,
-                                   run_id run_identifier = run_id::create_random_id(),
+                                   utils::UUID run_identifier = utils::make_random_uuid(),
                                   compaction_type_options options = compaction_type_options::make_regular())
        : sstables(std::move(sstables))
+        , all_sstables_snapshot(std::move(all_sstables_snapshot))
        , level(level)
        , max_sstable_bytes(max_sstable_bytes)
        , run_identifier(run_identifier)
@@ -190,11 +182,13 @@ struct compaction_descriptor {

    explicit compaction_descriptor(sstables::has_only_fully_expired has_only_fully_expired,
                                   std::vector<sstables::shared_sstable> sstables,
+                                   std::optional<sstables::sstable_set> all_sstables_snapshot,
                                   ::io_priority_class io_priority)
        : sstables(std::move(sstables))
+        , all_sstables_snapshot(std::move(all_sstables_snapshot))
        , level(default_level)
        , max_sstable_bytes(default_max_sstable_bytes)
-        , run_identifier(run_id::create_random_id())
+        , run_identifier(utils::make_random_uuid())
        , options(compaction_type_options::make_regular())
        , io_priority(io_priority)
        , has_only_fully_expired(has_only_fully_expired)
@@ -202,10 +196,6 @@ struct compaction_descriptor {

    // Return fan-in of this job, which is equal to its number of runs.
    unsigned fan_in() const;
-    // Enables garbage collection for this descriptor, meaning that compaction will be able to purge expired data
-    void enable_garbage_collection(sstables::sstable_set snapshot) { all_sstables_snapshot = std::move(snapshot); }
-    // Returns total size of all sstables contained in this descriptor
-    uint64_t sstables_size() const;
 };

 }
--- a/Show More
+++ b/Show More