schema/schema_registry: reformat code style

Reformat constructor initializer lists, brace placement, and line wrapping for consistency. The seastar logger already checks is_enabled() before formatting arguments, so explicit guards around trace calls with simple variable arguments are unnecessary. AI-assisted: OpenCode / Claude Opus 4.6 Signed-off-by: Yaniv Kaul <yaniv.kaul@scylladb.com>
utils/gcp/object_storage: fix dead code and format string bug
2026-03-24 18:30:41 +02:00 · 2026-03-24 18:30:41 +02:00 · 2026-03-24 18:30:40 +02:00 · 2026-03-24 18:30:40 +02:00 · 2026-03-24 18:30:40 +02:00 · 2026-03-24 18:30:40 +02:00
2762 changed files with 25904 additions and 17858 deletions
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -55,22 +55,26 @@ ninja build/<mode>/test/boost/<test_name>
 ninja build/<mode>/scylla

 # Run all tests in a file
-./test.py --mode=<mode> <test_path>
+./test.py --mode=<mode> test/<suite>/<test_name>.py

 # Run a single test case from a file
-./test.py --mode=<mode> <test_path>::<test_function_name>
+./test.py --mode=<mode> test/<suite>/<test_name>.py::<test_function_name>
+
+# Run all tests in a directory
+./test.py --mode=<mode> test/<suite>/

 # Examples
-./test.py --mode=dev alternator/
-./test.py --mode=dev cluster/test_raft_voters::test_raft_limited_voters_retain_coordinator
+./test.py --mode=dev test/alternator/
+./test.py --mode=dev test/cluster/test_raft_voters.py::test_raft_limited_voters_retain_coordinator
+./test.py --mode=dev test/cqlpy/test_json.py

 # Optional flags
-./test.py --mode=dev cluster/test_raft_no_quorum -v  # Verbose output
-./test.py --mode=dev cluster/test_raft_no_quorum --repeat 5  # Repeat test 5 times
+./test.py --mode=dev test/cluster/test_raft_no_quorum.py -v  # Verbose output
+./test.py --mode=dev test/cluster/test_raft_no_quorum.py --repeat 5  # Repeat test 5 times
 ```

 **Important:**
- Use path without `.py` extension (e.g., `cluster/test_raft_no_quorum`, not `cluster/test_raft_no_quorum.py`)
+- Use full path with `.py` extension (e.g., `test/cluster/test_raft_no_quorum.py`, not `cluster/test_raft_no_quorum`)
 - To run a single test case, append `::<test_function_name>` to the file path
 - Add `-v` for verbose output
 - Add `--repeat <num>` to repeat a test multiple times
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -1,6 +1,6 @@
 version: 2
 updates:
- package-ecosystem: "pip"
+- package-ecosystem: "uv"
  directory: "/docs"
  schedule:
    interval: "daily"
--- a/.github/workflows/backport-pr-fixes-validation.yaml
+++ b/.github/workflows/backport-pr-fixes-validation.yaml
@@ -8,6 +8,9 @@ on:
 jobs:
  check-fixes-prefix:
    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      issues: write
    steps:
      - name: Check PR body for "Fixes" prefix patterns
        uses: actions/github-script@v7
--- a/.github/workflows/call_jira_sync.yml
+++ b/.github/workflows/call_jira_sync.yml
@@ -1,8 +1,8 @@
-name: Sync Jira Based on PR Events
+name: Sync Jira Based on PR Events

 on:
  pull_request_target:
-    types: [opened, ready_for_review, review_requested, labeled, unlabeled, closed]
+    types: [opened, edited, ready_for_review, review_requested, labeled, unlabeled, closed]

 permissions:
  contents: read
@@ -10,32 +10,9 @@ permissions:
  issues: write

 jobs:
-  jira-sync-pr-opened:
-    if: github.event.action == 'opened'
-    uses: scylladb/github-automation/.github/workflows/main_jira_sync_pr_opened.yml@main
-    secrets:
-      caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
-
-  jira-sync-in-review:
-    if: github.event.action == 'ready_for_review' || github.event.action == 'review_requested'
-    uses: scylladb/github-automation/.github/workflows/main_jira_sync_in_review.yml@main
-    secrets:
-      caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
-
-  jira-sync-add-label:
-    if: github.event.action == 'labeled'
-    uses: scylladb/github-automation/.github/workflows/main_jira_sync_add_label.yml@main
-    secrets:
-      caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
-
-  jira-status-remove-label:
-    if: github.event.action == 'unlabeled'
-    uses: scylladb/github-automation/.github/workflows/main_jira_sync_remove_label.yml@main
-    secrets:
-      caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
-
-  jira-status-pr-closed:
-    if: github.event.action == 'closed' 
-    uses: scylladb/github-automation/.github/workflows/main_jira_sync_pr_closed.yml@main
+  jira-sync:
+    uses: scylladb/github-automation/.github/workflows/main_pr_events_jira_sync.yml@main
+    with:
+      caller_action: ${{ github.event.action }}
    secrets:
      caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
--- a/.github/workflows/call_sync_milestone_to_jira.yml
+++ b/.github/workflows/call_sync_milestone_to_jira.yml
@@ -1,14 +1,14 @@
-name: Call Jira release creation for new milestone
+name: Call Jira release creation for new milestone

 on:
  milestone:
-    types: [created]
+    types: [created, closed]

 jobs:
  sync-milestone-to-jira:
    uses: scylladb/github-automation/.github/workflows/main_sync_milestone_to_jira_release.yml@main
    with:
      # Comma-separated list of Jira project keys
-      jira_project_keys: "SCYLLADB,CUSTOMER,SMI"
+      jira_project_keys: "SCYLLADB,CUSTOMER,SMI,RELENG,VECTOR"
    secrets:
      caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
--- a/.github/workflows/docs-pages.yaml
+++ b/.github/workflows/docs-pages.yaml
@@ -19,6 +19,8 @@ on:
 jobs:
  release:
    permissions:
+      pages: write
+      id-token: write
      contents: write
    runs-on: ubuntu-latest
    steps:
@@ -31,7 +33,9 @@ jobs:
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
-          python-version: "3.10"
+          python-version: "3.12"
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
      - name: Set up env
        run: make -C docs FLAG="${{ env.FLAG }}" setupenv
      - name: Build docs
--- a/.github/workflows/docs-pr.yaml
+++ b/.github/workflows/docs-pr.yaml
@@ -29,7 +29,9 @@ jobs:
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
-          python-version: "3.10"
+          python-version: "3.12"
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
      - name: Set up env
        run: make -C docs FLAG="${{ env.FLAG }}" setupenv
      - name: Build docs
--- a/.github/workflows/trigger-scylla-ci.yaml
+++ b/.github/workflows/trigger-scylla-ci.yaml
@@ -1,4 +1,6 @@
 name: Trigger Scylla CI Route
+permissions:
+  contents: read

 on:
  issue_comment:
@@ -14,14 +16,20 @@ jobs:
    steps:
      - name: Verify Org Membership
        id: verify_author
+        env:
+          EVENT_NAME: ${{ github.event_name }}
+          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
+          PR_ASSOCIATION: ${{ github.event.pull_request.author_association }}
+          COMMENT_AUTHOR: ${{ github.event.comment.user.login }}
+          COMMENT_ASSOCIATION: ${{ github.event.comment.author_association }}
        shell: bash
        run: |
-          if [[ "${{ github.event_name }}" == "pull_request_target" ]]; then
-            AUTHOR="${{ github.event.pull_request.user.login }}"
-            ASSOCIATION="${{ github.event.pull_request.author_association }}"
+          if [[ "$EVENT_NAME" == "pull_request_target" ]]; then
+            AUTHOR="$PR_AUTHOR"
+            ASSOCIATION="$PR_ASSOCIATION"
          else
-            AUTHOR="${{ github.event.comment.user.login }}"
-            ASSOCIATION="${{ github.event.comment.author_association }}"
+            AUTHOR="$COMMENT_AUTHOR"
+            ASSOCIATION="$COMMENT_ASSOCIATION"
          fi
          if [[ "$ASSOCIATION" == "MEMBER" || "$ASSOCIATION" == "OWNER" ]]; then
            echo "member=true" >> $GITHUB_OUTPUT
@@ -33,13 +41,11 @@ jobs:
      - name: Validate Comment Trigger
        if: github.event_name == 'issue_comment'
        id: verify_comment
+        env:
+          COMMENT_BODY: ${{ github.event.comment.body }}
        shell: bash
        run: |
-          BODY=$(cat << 'EOF'
-          ${{ github.event.comment.body }}
-          EOF
-          )
-          CLEAN_BODY=$(echo "$BODY" | grep -v '^[[:space:]]*>')
+          CLEAN_BODY=$(echo "$COMMENT_BODY" | grep -v '^[[:space:]]*>')

          if echo "$CLEAN_BODY" | grep -qi '@scylladbbot' && echo "$CLEAN_BODY" | grep -qi 'trigger-ci'; then
            echo "trigger=true" >> $GITHUB_OUTPUT
--- a/.github/workflows/trigger_jenkins.yaml
+++ b/.github/workflows/trigger_jenkins.yaml
@@ -1,5 +1,8 @@
 name: Trigger next gating

+permissions:
+  contents: read
+
 on:
  push:
    branches:
--- a/alternator/auth.cc
+++ b/alternator/auth.cc
@@ -13,7 +13,8 @@
 #include <string_view>
 #include "alternator/auth.hh"
 #include <fmt/format.h>
-#include "auth/password_authenticator.hh"
+#include "db/consistency_level_type.hh"
+#include "db/system_keyspace.hh"
 #include "service/storage_proxy.hh"
 #include "alternator/executor.hh"
 #include "cql3/selection/selection.hh"
@@ -25,8 +26,8 @@ namespace alternator {

 static logging::logger alogger("alternator-auth");

-future<std::string> get_key_from_roles(service::storage_proxy& proxy, auth::service& as, std::string username) {
-    schema_ptr schema = proxy.data_dictionary().find_schema(auth::get_auth_ks_name(as.query_processor()), "roles");
+future<std::string> get_key_from_roles(service::storage_proxy& proxy, std::string username) {
+    schema_ptr schema = proxy.data_dictionary().find_schema(db::system_keyspace::NAME, "roles");
    partition_key pk = partition_key::from_single_value(*schema, utf8_type->decompose(username));
    dht::partition_range_vector partition_ranges{dht::partition_range(dht::decorate_key(*schema, pk))};
    std::vector<query::clustering_range> bounds{query::clustering_range::make_open_ended_both_sides()};
@@ -39,7 +40,7 @@ future<std::string> get_key_from_roles(service::storage_proxy& proxy, auth::serv
    auto partition_slice = query::partition_slice(std::move(bounds), {}, query::column_id_vector{salted_hash_col->id, can_login_col->id}, selection->get_query_options());
    auto command = ::make_lw_shared<query::read_command>(schema->id(), schema->version(), partition_slice,
            proxy.get_max_result_size(partition_slice), query::tombstone_limit(proxy.get_tombstone_limit()));
-    auto cl = auth::password_authenticator::consistency_for_user(username);
+    auto cl = db::consistency_level::LOCAL_ONE;

    service::client_state client_state{service::client_state::internal_tag()};
    service::storage_proxy::coordinator_query_result qr = co_await proxy.query(schema, std::move(command), std::move(partition_ranges), cl,
--- a/alternator/auth.hh
+++ b/alternator/auth.hh
@@ -20,6 +20,6 @@ namespace alternator {

 using key_cache = utils::loading_cache<std::string, std::string, 1>;

-future<std::string> get_key_from_roles(service::storage_proxy& proxy, auth::service& as, std::string username);
+future<std::string> get_key_from_roles(service::storage_proxy& proxy, std::string username);

 }
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
@@ -3463,7 +3463,11 @@ future<executor::request_return_type> executor::batch_write_item(client_state& c
    if (should_add_wcu) {
        rjson::add(ret, "ConsumedCapacity", std::move(consumed_capacity));
    }
-    _stats.api_operations.batch_write_item_latency.mark(std::chrono::steady_clock::now() - start_time);
+    auto duration = std::chrono::steady_clock::now() - start_time;
+    _stats.api_operations.batch_write_item_latency.mark(duration);
+    for (const auto& w : per_table_wcu) {
+        w.first->api_operations.batch_write_item_latency.mark(duration);
+    }
    co_return rjson::print(std::move(ret));
 }

@@ -4974,7 +4978,12 @@ future<executor::request_return_type> executor::batch_get_item(client_state& cli
    if (!some_succeeded && eptr) {
        co_await coroutine::return_exception_ptr(std::move(eptr));
    }
-    _stats.api_operations.batch_get_item_latency.mark(std::chrono::steady_clock::now() - start_time);
+    auto duration = std::chrono::steady_clock::now() - start_time;
+    _stats.api_operations.batch_get_item_latency.mark(duration);
+    for (const table_requests& rs : requests) {
+        lw_shared_ptr<stats> per_table_stats = get_stats_from_schema(_proxy, *rs.schema);
+        per_table_stats->api_operations.batch_get_item_latency.mark(duration);
+    }
    if (is_big(response)) {
        co_return make_streamed(std::move(response));
    } else {
--- a/alternator/server.cc
+++ b/alternator/server.cc
@@ -411,8 +411,8 @@ future<std::string> server::verify_signature(const request& req, const chunked_c
        }
    }

-    auto cache_getter = [&proxy = _proxy, &as = _auth_service] (std::string username) {
-        return get_key_from_roles(proxy, as, std::move(username));
+    auto cache_getter = [&proxy = _proxy] (std::string username) {
+        return get_key_from_roles(proxy, std::move(username));
    };
    return _key_cache.get_ptr(user, cache_getter).then_wrapped([this, &req, &content,
                                                    user = std::move(user),
@@ -771,7 +771,7 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
    if (!username.empty()) {
        client_state.set_login(auth::authenticated_user(username));
    }
-    co_await client_state.maybe_update_per_service_level_params();
+    client_state.maybe_update_per_service_level_params();

    tracing::trace_state_ptr trace_state = maybe_trace_query(client_state, username, op, content, _max_users_query_size_in_trace_output.get());
    tracing::trace(trace_state, "{}", op);
--- a/alternator/stats.cc
+++ b/alternator/stats.cc
@@ -14,20 +14,6 @@
 namespace alternator {

 const char* ALTERNATOR_METRICS = "alternator";
-static seastar::metrics::histogram estimated_histogram_to_metrics(const utils::estimated_histogram& histogram) {
-    seastar::metrics::histogram res;
-    res.buckets.resize(histogram.bucket_offsets.size());
-    uint64_t cumulative_count = 0;
-    res.sample_count = histogram._count;
-    res.sample_sum = histogram._sample_sum;
-    for (size_t i = 0; i < res.buckets.size(); i++) {
-        auto& v = res.buckets[i];
-        v.upper_bound = histogram.bucket_offsets[i];
-        cumulative_count += histogram.buckets[i];
-        v.count = cumulative_count;
-    }
-    return res;
-}

 static seastar::metrics::label column_family_label("cf");
 static seastar::metrics::label keyspace_label("ks");
@@ -151,21 +137,21 @@ static void register_metrics_with_optional_table(seastar::metrics::metric_groups
            seastar::metrics::make_counter("batch_item_count", seastar::metrics::description("The total number of items processed across all batches"), labels,
                    stats.api_operations.batch_get_item_batch_total)(op("BatchGetItem")).aggregate(aggregate_labels).set_skip_when_empty(),
            seastar::metrics::make_histogram("batch_item_count_histogram", seastar::metrics::description("Histogram of the number of items in a batch request"), labels,
-                    [&stats]{ return estimated_histogram_to_metrics(stats.api_operations.batch_get_item_histogram);})(op("BatchGetItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
+                    [&stats]{ return to_metrics_histogram(stats.api_operations.batch_get_item_histogram);})(op("BatchGetItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
            seastar::metrics::make_histogram("batch_item_count_histogram", seastar::metrics::description("Histogram of the number of items in a batch request"), labels,
-                    [&stats]{ return estimated_histogram_to_metrics(stats.api_operations.batch_write_item_histogram);})(op("BatchWriteItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
+                    [&stats]{ return to_metrics_histogram(stats.api_operations.batch_write_item_histogram);})(op("BatchWriteItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
            seastar::metrics::make_histogram("operation_size_kb", seastar::metrics::description("Histogram of item sizes involved in a request"), labels,
-                    [&stats]{ return estimated_histogram_to_metrics(stats.operation_sizes.get_item_op_size_kb);})(op("GetItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
+                    [&stats]{ return to_metrics_histogram(stats.operation_sizes.get_item_op_size_kb);})(op("GetItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
            seastar::metrics::make_histogram("operation_size_kb", seastar::metrics::description("Histogram of item sizes involved in a request"), labels,
-                    [&stats]{ return estimated_histogram_to_metrics(stats.operation_sizes.put_item_op_size_kb);})(op("PutItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
+                    [&stats]{ return to_metrics_histogram(stats.operation_sizes.put_item_op_size_kb);})(op("PutItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
            seastar::metrics::make_histogram("operation_size_kb", seastar::metrics::description("Histogram of item sizes involved in a request"), labels,
-                    [&stats]{ return estimated_histogram_to_metrics(stats.operation_sizes.delete_item_op_size_kb);})(op("DeleteItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
+                    [&stats]{ return to_metrics_histogram(stats.operation_sizes.delete_item_op_size_kb);})(op("DeleteItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
            seastar::metrics::make_histogram("operation_size_kb", seastar::metrics::description("Histogram of item sizes involved in a request"), labels,
-                    [&stats]{ return estimated_histogram_to_metrics(stats.operation_sizes.update_item_op_size_kb);})(op("UpdateItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
+                    [&stats]{ return to_metrics_histogram(stats.operation_sizes.update_item_op_size_kb);})(op("UpdateItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
            seastar::metrics::make_histogram("operation_size_kb", seastar::metrics::description("Histogram of item sizes involved in a request"), labels,
-                    [&stats]{ return estimated_histogram_to_metrics(stats.operation_sizes.batch_get_item_op_size_kb);})(op("BatchGetItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
+                    [&stats]{ return to_metrics_histogram(stats.operation_sizes.batch_get_item_op_size_kb);})(op("BatchGetItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
            seastar::metrics::make_histogram("operation_size_kb", seastar::metrics::description("Histogram of item sizes involved in a request"), labels,
-                    [&stats]{ return estimated_histogram_to_metrics(stats.operation_sizes.batch_write_item_op_size_kb);})(op("BatchWriteItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
+                    [&stats]{ return to_metrics_histogram(stats.operation_sizes.batch_write_item_op_size_kb);})(op("BatchWriteItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
    });

    seastar::metrics::label expression_label("expression");
--- a/alternator/stats.hh
+++ b/alternator/stats.hh
@@ -16,6 +16,8 @@
 #include "cql3/stats.hh"

 namespace alternator {
+using batch_histogram = utils::estimated_histogram_with_max<128>;
+using op_size_histogram = utils::estimated_histogram_with_max<512>;

 // Object holding per-shard statistics related to Alternator.
 // While this object is alive, these metrics are also registered to be
@@ -76,34 +78,34 @@ public:
        utils::timed_rate_moving_average_summary_and_histogram batch_get_item_latency;
        utils::timed_rate_moving_average_summary_and_histogram get_records_latency;

-        utils::estimated_histogram batch_get_item_histogram{22}; // a histogram that covers the range 1 - 100
-        utils::estimated_histogram batch_write_item_histogram{22}; // a histogram that covers the range 1 - 100
+        batch_histogram batch_get_item_histogram;
+        batch_histogram batch_write_item_histogram;
    } api_operations;
    // Operation size metrics
    struct {
        // Item size statistics collected per table and aggregated per node.
-        // Each histogram covers the range 0 - 446. Resolves #25143.
+        // Each histogram covers the range 0 - 512. Resolves #25143.
        // A size is the retrieved item's size.
-        utils::estimated_histogram get_item_op_size_kb{30};
+        op_size_histogram get_item_op_size_kb;
        // A size is the maximum of the new item's size and the old item's size.
-        utils::estimated_histogram put_item_op_size_kb{30};
+        op_size_histogram put_item_op_size_kb;
        // A size is the deleted item's size. If the deleted item's size is
        // unknown (i.e. read-before-write wasn't necessary and it wasn't
        // forced by a configuration option), it won't be recorded on the
        // histogram.
-        utils::estimated_histogram delete_item_op_size_kb{30};
+        op_size_histogram delete_item_op_size_kb;
        // A size is the maximum of existing item's size and the estimated size
        // of the update. This will be changed to the maximum of the existing item's
        // size and the new item's size in a subsequent PR.
-        utils::estimated_histogram update_item_op_size_kb{30};
+        op_size_histogram update_item_op_size_kb;

        // A size is the sum of the sizes of all items per table. This means
        // that a single BatchGetItem / BatchWriteItem updates the histogram
        // for each table that it has items in.
        // The sizes are the retrieved items' sizes grouped per table.
-        utils::estimated_histogram batch_get_item_op_size_kb{30};
+        op_size_histogram batch_get_item_op_size_kb;
        // The sizes are the the written items' sizes grouped per table.
-        utils::estimated_histogram batch_write_item_op_size_kb{30};
+        op_size_histogram batch_write_item_op_size_kb;
    } operation_sizes;
    // Count of authentication and authorization failures, counted if either
    // alternator_enforce_authorization or alternator_warn_authorization are
@@ -140,7 +142,7 @@ public:
    cql3::cql_stats cql_stats;

    // Enumeration of expression types only for stats
-    // if needed it can be extended e.g. per operation 
+    // if needed it can be extended e.g. per operation
    enum expression_types {
        UPDATE_EXPRESSION,
        CONDITION_EXPRESSION,
@@ -164,7 +166,7 @@ struct table_stats {
 void register_metrics(seastar::metrics::metric_groups& metrics, const stats& stats);

 inline uint64_t bytes_to_kb_ceil(uint64_t bytes) {
-    return (bytes + 1023) / 1024;
+    return (bytes) / 1024;
 }

 }
--- a/alternator/streams.cc
+++ b/alternator/streams.cc
@@ -33,6 +33,8 @@
 #include "data_dictionary/data_dictionary.hh"
 #include "utils/rjson.hh"

+static logging::logger elogger("alternator-streams");
+
 /**
 * Base template type to implement  rapidjson::internal::TypeHelper<...>:s
 * for types that are ostreamable/string constructible/castable.
@@ -428,6 +430,25 @@ using namespace std::chrono_literals;
 // Dynamo docs says no data shall live longer than 24h.
 static constexpr auto dynamodb_streams_max_window = 24h;

+// find the parent shard in previous generation for the given child shard
+// takes care of wrap-around case in vnodes
+// prev_streams must be sorted by token
+const cdc::stream_id& find_parent_shard_in_previous_generation(db_clock::time_point prev_timestamp, const utils::chunked_vector<cdc::stream_id> &prev_streams, const cdc::stream_id &child) {
+    if (prev_streams.empty()) {
+        // something is really wrong - streams are empty
+        // let's try internal_error in hope it will be notified and fixed
+        on_internal_error(elogger, fmt::format("streams are empty for cdc generation at {} ({})", prev_timestamp, prev_timestamp.time_since_epoch().count()));
+    }
+    auto it = std::lower_bound(prev_streams.begin(), prev_streams.end(), child.token(), [](const cdc::stream_id& id, const dht::token& t) {
+        return id.token() < t;
+    });
+    if (it == prev_streams.end()) {
+        // wrap around case - take first
+        it = prev_streams.begin();
+    }
+    return *it;
+}
+
 future<executor::request_return_type> executor::describe_stream(client_state& client_state, service_permit permit, rjson::value request) {
    _stats.api_operations.describe_stream++;

@@ -578,16 +599,8 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
            auto shard = rjson::empty_object();

            if (prev != e) {
-                auto& pids = prev->second.streams;
-                auto pid = std::upper_bound(pids.begin(), pids.end(), id.token(), [](const dht::token& t, const cdc::stream_id& id) {
-                    return t < id.token();
-                });
-                if (pid != pids.begin()) {
-                    pid = std::prev(pid);
-                }
-                if (pid != pids.end()) {
-                    rjson::add(shard, "ParentShardId", shard_id(prev->first, *pid));
-                }
+                auto &pid = find_parent_shard_in_previous_generation(prev->first, prev->second.streams, id);
+                rjson::add(shard, "ParentShardId", shard_id(prev->first, pid));
            }

            last.emplace(ts, id);
--- a/api/api-doc/messaging_service.json
+++ b/api/api-doc/messaging_service.json
@@ -243,7 +243,7 @@
                 "GOSSIP_DIGEST_SYN",
                 "GOSSIP_DIGEST_ACK2",
                 "GOSSIP_SHUTDOWN",
-                 "DEFINITIONS_UPDATE",
+                 "UNUSED__DEFINITIONS_UPDATE",
                 "TRUNCATE",
                 "UNUSED__REPLICATION_FINISHED",
                 "MIGRATION_REQUEST",
--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -1295,6 +1295,45 @@
            }
         ]
      },
+      {
+         "path":"/storage_service/logstor_compaction",
+         "operations":[
+            {
+               "method":"POST",
+               "summary":"Trigger compaction of the key-value storage",
+               "type":"void",
+               "nickname":"logstor_compaction",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"major",
+                     "description":"When true, perform a major compaction",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"boolean",
+                     "paramType":"query"
+                  }
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/storage_service/logstor_flush",
+         "operations":[
+            {
+               "method":"POST",
+               "summary":"Trigger flush of logstor storage",
+               "type":"void",
+               "nickname":"logstor_flush",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[]
+            }
+         ]
+      },
      {
         "path":"/storage_service/active_repair/",
         "operations":[
@@ -3229,6 +3268,38 @@
            }
         ]
      },
+      {
+         "path":"/storage_service/logstor_info",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Logstor segment information for one table",
+               "type":"table_logstor_info",
+               "nickname":"logstor_info",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"keyspace",
+                     "description":"The keyspace",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"table",
+                     "description":"table name",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  }
+               ]
+            }
+         ]
+      },
      {
         "path":"/storage_service/retrain_dict",
         "operations":[
@@ -3637,6 +3708,47 @@
            }
        }
      },
+        "logstor_hist_bucket":{
+         "id":"logstor_hist_bucket",
+         "properties":{
+            "bucket":{
+               "type":"long"
+            },
+            "count":{
+               "type":"long"
+            },
+            "min_data_size":{
+               "type":"long"
+            },
+            "max_data_size":{
+               "type":"long"
+            }
+         }
+        },
+        "table_logstor_info":{
+         "id":"table_logstor_info",
+         "description":"Per-table logstor segment distribution",
+         "properties":{
+            "keyspace":{
+               "type":"string"
+            },
+            "table":{
+               "type":"string"
+            },
+            "compaction_groups":{
+               "type":"long"
+            },
+            "segments":{
+               "type":"long"
+            },
+            "data_size_histogram":{
+               "type":"array",
+               "items":{
+                  "$ref":"logstor_hist_bucket"
+               }
+            }
+         }
+        },
      "tablet_repair_result":{
        "id":"tablet_repair_result",
        "description":"Tablet repair result",
--- a/api/api-doc/system.json
+++ b/api/api-doc/system.json
@@ -209,6 +209,21 @@
               "parameters":[]
            }
         ]
+      },
+      {
+         "path":"/system/chosen_sstable_version",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get sstable version currently chosen for use in new sstables",
+               "type":"string",
+               "nickname":"get_chosen_sstable_version",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[]
+            }
+         ]
      }
   ]
 }
--- a/api/api.cc
+++ b/api/api.cc
@@ -122,9 +122,9 @@ future<> unset_thrift_controller(http_context& ctx) {
    return ctx.http_server.set_routes([&ctx] (routes& r) { unset_thrift_controller(ctx, r); });
 }

-future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, service::raft_group0_client& group0_client) {
-    return ctx.http_server.set_routes([&ctx, &ss, &group0_client] (routes& r) {
-            set_storage_service(ctx, r, ss, group0_client);
+future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, sharded<db::snapshot_ctl>& ssc, service::raft_group0_client& group0_client) {
+    return ctx.http_server.set_routes([&ctx, &ss, &ssc, &group0_client] (routes& r) {
+            set_storage_service(ctx, r, ss, ssc, group0_client);
        });
 }

--- a/api/api_init.hh
+++ b/api/api_init.hh
@@ -98,7 +98,7 @@ future<> set_server_config(http_context& ctx, db::config& cfg);
 future<> unset_server_config(http_context& ctx);
 future<> set_server_snitch(http_context& ctx, sharded<locator::snitch_ptr>& snitch);
 future<> unset_server_snitch(http_context& ctx);
-future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, service::raft_group0_client&);
+future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, sharded<db::snapshot_ctl>&, service::raft_group0_client&);
 future<> unset_server_storage_service(http_context& ctx);
 future<> set_server_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr);
 future<> unset_server_client_routes(http_context& ctx);
--- a/api/column_family.cc
+++ b/api/column_family.cc
@@ -18,7 +18,9 @@
 #include "utils/assert.hh"
 #include "utils/estimated_histogram.hh"
 #include <algorithm>
+#include <sstream>
 #include "db/data_listeners.hh"
+#include "utils/hash.hh"
 #include "storage_service.hh"
 #include "compaction/compaction_manager.hh"
 #include "unimplemented.hh"
@@ -342,6 +344,56 @@ uint64_t accumulate_on_active_memtables(replica::table& t, noncopyable_function<
    return ret;
 }

+static
+future<json::json_return_type>
+rest_toppartitions_generic(sharded<replica::database>& db, std::unique_ptr<http::request> req) {
+        bool filters_provided = false;
+
+        std::unordered_set<std::tuple<sstring, sstring>, utils::tuple_hash> table_filters {};
+        if (auto filters = req->get_query_param("table_filters"); !filters.empty()) {
+            filters_provided = true;
+            std::stringstream ss { filters };
+            std::string filter;
+            while (!filters.empty() && ss.good()) {
+                std::getline(ss, filter, ',');
+                table_filters.emplace(parse_fully_qualified_cf_name(filter));
+            }
+        }
+
+        std::unordered_set<sstring> keyspace_filters {};
+        if (auto filters = req->get_query_param("keyspace_filters"); !filters.empty()) {
+            filters_provided = true;
+            std::stringstream ss { filters };
+            std::string filter;
+            while (!filters.empty() && ss.good()) {
+                std::getline(ss, filter, ',');
+                keyspace_filters.emplace(std::move(filter));
+            }
+        }
+
+        // when the query is empty return immediately
+        if (filters_provided && table_filters.empty() && keyspace_filters.empty()) {
+            apilog.debug("toppartitions query: processing results");
+            cf::toppartitions_query_results results;
+
+            results.read_cardinality = 0;
+            results.write_cardinality = 0;
+
+            return make_ready_future<json::json_return_type>(results);
+        }
+
+        api::req_param<std::chrono::milliseconds, unsigned> duration{*req, "duration", 1000ms};
+        api::req_param<unsigned> capacity(*req, "capacity", 256);
+        api::req_param<unsigned> list_size(*req, "list_size", 10);
+
+        apilog.info("toppartitions query: #table_filters={} #keyspace_filters={} duration={} list_size={} capacity={}",
+            !table_filters.empty() ? std::to_string(table_filters.size()) : "all", !keyspace_filters.empty() ? std::to_string(keyspace_filters.size()) : "all", duration.value, list_size.value, capacity.value);
+
+        return seastar::do_with(db::toppartitions_query(db, std::move(table_filters), std::move(keyspace_filters), duration.value, list_size, capacity), [] (db::toppartitions_query& q) {
+            return run_toppartitions_query(q);
+        });
+}
+
 void set_column_family(http_context& ctx, routes& r, sharded<replica::database>& db) {
    cf::get_column_family_name.set(r, [&db] (const_req req){
        std::vector<sstring> res;
@@ -1047,6 +1099,10 @@ void set_column_family(http_context& ctx, routes& r, sharded<replica::database>&
        });
    });

+    ss::toppartitions_generic.set(r, [&db] (std::unique_ptr<http::request> req) {
+        return rest_toppartitions_generic(db, std::move(req));
+    });
+
    cf::force_major_compaction.set(r, [&ctx, &db](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
        if (!req->get_query_param("split_output").empty()) {
            fail(unimplemented::cause::API);
@@ -1213,6 +1269,7 @@ void unset_column_family(http_context& ctx, routes& r) {
    cf::get_sstable_count_per_level.unset(r);
    cf::get_sstables_for_key.unset(r);
    cf::toppartitions.unset(r);
+    ss::toppartitions_generic.unset(r);
    cf::force_major_compaction.unset(r);
    ss::get_load.unset(r);
    ss::get_metrics_load.unset(r);
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -17,9 +17,7 @@
 #include "gms/feature_service.hh"
 #include "schema/schema_builder.hh"
 #include "sstables/sstables_manager.hh"
-#include "utils/hash.hh"
 #include <optional>
-#include <sstream>
 #include <stdexcept>
 #include <time.h>
 #include <algorithm>
@@ -612,56 +610,6 @@ rest_get_token_endpoint(http_context& ctx, sharded<service::storage_service>& ss
        co_return json::json_return_type(stream_range_as_array(token_endpoints, &map_to_json<dht::token, gms::inet_address>));
 }

-static
-future<json::json_return_type>
-rest_toppartitions_generic(http_context& ctx, std::unique_ptr<http::request> req) {
-        bool filters_provided = false;
-
-        std::unordered_set<std::tuple<sstring, sstring>, utils::tuple_hash> table_filters {};
-        if (auto filters = req->get_query_param("table_filters"); !filters.empty()) {
-            filters_provided = true;
-            std::stringstream ss { filters };
-            std::string filter;
-            while (!filters.empty() && ss.good()) {
-                std::getline(ss, filter, ',');
-                table_filters.emplace(parse_fully_qualified_cf_name(filter));
-            }
-        }
-
-        std::unordered_set<sstring> keyspace_filters {};
-        if (auto filters = req->get_query_param("keyspace_filters"); !filters.empty()) {
-            filters_provided = true;
-            std::stringstream ss { filters };
-            std::string filter;
-            while (!filters.empty() && ss.good()) {
-                std::getline(ss, filter, ',');
-                keyspace_filters.emplace(std::move(filter));
-            }
-        }
-
-        // when the query is empty return immediately
-        if (filters_provided && table_filters.empty() && keyspace_filters.empty()) {
-            apilog.debug("toppartitions query: processing results");
-            httpd::column_family_json::toppartitions_query_results results;
-
-            results.read_cardinality = 0;
-            results.write_cardinality = 0;
-
-            return make_ready_future<json::json_return_type>(results);
-        }
-
-        api::req_param<std::chrono::milliseconds, unsigned> duration{*req, "duration", 1000ms};
-        api::req_param<unsigned> capacity(*req, "capacity", 256);
-        api::req_param<unsigned> list_size(*req, "list_size", 10);
-
-        apilog.info("toppartitions query: #table_filters={} #keyspace_filters={} duration={} list_size={} capacity={}",
-            !table_filters.empty() ? std::to_string(table_filters.size()) : "all", !keyspace_filters.empty() ? std::to_string(keyspace_filters.size()) : "all", duration.value, list_size.value, capacity.value);
-
-        return seastar::do_with(db::toppartitions_query(ctx.db, std::move(table_filters), std::move(keyspace_filters), duration.value, list_size, capacity), [] (db::toppartitions_query& q) {
-            return run_toppartitions_query(q);
-        });
-}
-
 static
 json::json_return_type
 rest_get_release_version(sharded<service::storage_service>& ss, const_req& req) {
@@ -835,9 +783,31 @@ rest_force_keyspace_flush(http_context& ctx, std::unique_ptr<http::request> req)

 static
 future<json::json_return_type>
-rest_decommission(sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
+rest_logstor_compaction(http_context& ctx, std::unique_ptr<http::request> req) {
+        bool major = false;
+        if (auto major_param = req->get_query_param("major"); !major_param.empty()) {
+            major = validate_bool(major_param);
+        }
+        apilog.info("logstor_compaction: major={}", major);
+        auto& db = ctx.db;
+        co_await replica::database::trigger_logstor_compaction_on_all_shards(db, major);
+        co_return json_void();
+}
+
+static
+future<json::json_return_type>
+rest_logstor_flush(http_context& ctx, std::unique_ptr<http::request> req) {
+        apilog.info("logstor_flush");
+        auto& db = ctx.db;
+        co_await replica::database::flush_logstor_separator_on_all_shards(db);
+        co_return json_void();
+}
+
+static
+future<json::json_return_type>
+rest_decommission(sharded<service::storage_service>& ss, sharded<db::snapshot_ctl>& ssc, std::unique_ptr<http::request> req) {
        apilog.info("decommission");
-        return ss.local().decommission().then([] {
+        return ss.local().decommission(ssc).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
 }
@@ -1553,6 +1523,54 @@ rest_sstable_info(http_context& ctx, std::unique_ptr<http::request> req) {
        });
 }

+static
+future<json::json_return_type>
+rest_logstor_info(http_context& ctx, std::unique_ptr<http::request> req) {
+        auto keyspace = api::req_param<sstring>(*req, "keyspace", {}).value;
+        auto table = api::req_param<sstring>(*req, "table", {}).value;
+        if (table.empty()) {
+            table = api::req_param<sstring>(*req, "cf", {}).value;
+        }
+
+        if (keyspace.empty()) {
+            throw bad_param_exception("The query parameter 'keyspace' is required");
+        }
+        if (table.empty()) {
+            throw bad_param_exception("The query parameter 'table' is required");
+        }
+
+        keyspace = validate_keyspace(ctx, keyspace);
+        auto tid = validate_table(ctx.db.local(), keyspace, table);
+
+        auto& cf = ctx.db.local().find_column_family(tid);
+        if (!cf.uses_logstor()) {
+            throw bad_param_exception(fmt::format("Table {}.{} does not use logstor", keyspace, table));
+        }
+
+        return do_with(replica::logstor::table_segment_stats{}, [keyspace = std::move(keyspace), table = std::move(table), tid, &ctx] (replica::logstor::table_segment_stats& merged_stats) {
+            return ctx.db.map_reduce([&merged_stats](replica::logstor::table_segment_stats&& shard_stats) {
+                merged_stats += shard_stats;
+            }, [tid](const replica::database& db) {
+                return db.get_logstor_table_segment_stats(tid);
+            }).then([&merged_stats, keyspace = std::move(keyspace), table = std::move(table)] {
+                ss::table_logstor_info result;
+                result.keyspace = keyspace;
+                result.table = table;
+                result.compaction_groups = merged_stats.compaction_group_count;
+                result.segments = merged_stats.segment_count;
+
+                for (const auto& bucket : merged_stats.histogram) {
+                    ss::logstor_hist_bucket hist;
+                    hist.count = bucket.count;
+                    hist.max_data_size = bucket.max_data_size;
+                    result.data_size_histogram.push(std::move(hist));
+                }
+
+                return make_ready_future<json::json_return_type>(stream_object(result));
+            });
+        });
+}
+
 static
 future<json::json_return_type>
 rest_reload_raft_topology_state(sharded<service::storage_service>& ss, service::raft_group0_client& group0_client, std::unique_ptr<http::request> req) {
@@ -1572,10 +1590,7 @@ rest_upgrade_to_raft_topology(sharded<service::storage_service>& ss, std::unique
 static
 future<json::json_return_type>
 rest_raft_topology_upgrade_status(sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
-        const auto ustate = co_await ss.invoke_on(0, [] (auto& ss) {
-            return ss.get_topology_upgrade_state();
-        });
-        co_return sstring(format("{}", ustate));
+        co_return sstring("done");
 }

 static
@@ -1785,9 +1800,8 @@ rest_bind(FuncType func, BindArgs&... args) {
    return std::bind_front(func, std::ref(args)...);
 }

-void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_service>& ss, service::raft_group0_client& group0_client) {
+void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_service>& ss, sharded<db::snapshot_ctl>& ssc, service::raft_group0_client& group0_client) {
    ss::get_token_endpoint.set(r, rest_bind(rest_get_token_endpoint, ctx, ss));
-    ss::toppartitions_generic.set(r, rest_bind(rest_toppartitions_generic, ctx));
    ss::get_release_version.set(r, rest_bind(rest_get_release_version, ss));
    ss::get_scylla_release_version.set(r, rest_bind(rest_get_scylla_release_version, ss));
    ss::get_schema_version.set(r, rest_bind(rest_get_schema_version, ss));
@@ -1802,7 +1816,9 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    ss::reset_cleanup_needed.set(r, rest_bind(rest_reset_cleanup_needed, ctx, ss));
    ss::force_flush.set(r, rest_bind(rest_force_flush, ctx));
    ss::force_keyspace_flush.set(r, rest_bind(rest_force_keyspace_flush, ctx));
-    ss::decommission.set(r, rest_bind(rest_decommission, ss));
+    ss::decommission.set(r, rest_bind(rest_decommission, ss, ssc));
+    ss::logstor_compaction.set(r, rest_bind(rest_logstor_compaction, ctx));
+    ss::logstor_flush.set(r, rest_bind(rest_logstor_flush, ctx));
    ss::move.set(r, rest_bind(rest_move, ss));
    ss::remove_node.set(r, rest_bind(rest_remove_node, ss));
    ss::exclude_node.set(r, rest_bind(rest_exclude_node, ss));
@@ -1851,6 +1867,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    ss::retrain_dict.set(r, rest_bind(rest_retrain_dict, ctx, ss, group0_client));
    ss::estimate_compression_ratios.set(r, rest_bind(rest_estimate_compression_ratios, ctx, ss));
    ss::sstable_info.set(r, rest_bind(rest_sstable_info, ctx));
+    ss::logstor_info.set(r, rest_bind(rest_logstor_info, ctx));
    ss::reload_raft_topology_state.set(r, rest_bind(rest_reload_raft_topology_state, ss, group0_client));
    ss::upgrade_to_raft_topology.set(r, rest_bind(rest_upgrade_to_raft_topology, ss));
    ss::raft_topology_upgrade_status.set(r, rest_bind(rest_raft_topology_upgrade_status, ss));
@@ -1867,7 +1884,6 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_

 void unset_storage_service(http_context& ctx, routes& r) {
    ss::get_token_endpoint.unset(r);
-    ss::toppartitions_generic.unset(r);
    ss::get_release_version.unset(r);
    ss::get_scylla_release_version.unset(r);
    ss::get_schema_version.unset(r);
@@ -1881,6 +1897,8 @@ void unset_storage_service(http_context& ctx, routes& r) {
    ss::reset_cleanup_needed.unset(r);
    ss::force_flush.unset(r);
    ss::force_keyspace_flush.unset(r);
+    ss::logstor_compaction.unset(r);
+    ss::logstor_flush.unset(r);
    ss::decommission.unset(r);
    ss::move.unset(r);
    ss::remove_node.unset(r);
@@ -1928,6 +1946,7 @@ void unset_storage_service(http_context& ctx, routes& r) {
    ss::get_ownership.unset(r);
    ss::get_effective_ownership.unset(r);
    ss::sstable_info.unset(r);
+    ss::logstor_info.unset(r);
    ss::reload_raft_topology_state.unset(r);
    ss::upgrade_to_raft_topology.unset(r);
    ss::raft_topology_upgrade_status.unset(r);
@@ -2144,6 +2163,7 @@ void unset_snapshot(http_context& ctx, routes& r) {
    ss::start_backup.unset(r);
    cf::get_true_snapshots_size.unset(r);
    cf::get_all_true_snapshots_size.unset(r);
+    ss::decommission.unset(r);
 }

 }
--- a/api/storage_service.hh
+++ b/api/storage_service.hh
@@ -66,7 +66,7 @@ struct scrub_info {

 scrub_info parse_scrub_options(const http_context& ctx, std::unique_ptr<http::request> req);

-void set_storage_service(http_context& ctx, httpd::routes& r, sharded<service::storage_service>& ss, service::raft_group0_client&);
+void set_storage_service(http_context& ctx, httpd::routes& r, sharded<service::storage_service>& ss, sharded<db::snapshot_ctl>&, service::raft_group0_client&);
 void unset_storage_service(http_context& ctx, httpd::routes& r);
 void set_sstables_loader(http_context& ctx, httpd::routes& r, sharded<sstables_loader>& sst_loader);
 void unset_sstables_loader(http_context& ctx, httpd::routes& r);
--- a/api/system.cc
+++ b/api/system.cc
@@ -190,6 +190,13 @@ void set_system(http_context& ctx, routes& r) {
            return make_ready_future<json::json_return_type>(seastar::to_sstring(format));
        });
    });
+
+    hs::get_chosen_sstable_version.set(r, [&ctx] (std::unique_ptr<request> req) {
+        return smp::submit_to(0, [&ctx] {
+            auto format = ctx.db.local().get_user_sstables_manager().get_preferred_sstable_version();
+            return make_ready_future<json::json_return_type>(seastar::to_sstring(format));
+        });
+    });
 }

 }
--- a/auth/CMakeLists.txt
+++ b/auth/CMakeLists.txt
@@ -19,12 +19,12 @@ target_sources(scylla_auth
    permission.cc
    resource.cc
    role_or_anonymous.cc
-    roles-metadata.cc
    sasl_challenge.cc
    saslauthd_authenticator.cc
    service.cc
    standard_role_manager.cc
    transitional.cc
+    maintenance_socket_authenticator.cc
    maintenance_socket_role_manager.cc)
 target_include_directories(scylla_auth
  PUBLIC
@@ -48,4 +48,4 @@ if (Scylla_USE_PRECOMPILED_HEADER_USE)
  target_precompile_headers(scylla_auth REUSE_FROM scylla-precompiled-header)
 endif()
 check_headers(check-headers scylla_auth
-  GLOB_RECURSE ${CMAKE_CURRENT_SOURCE_DIR}/*.hh)
+  GLOB_RECURSE ${CMAKE_CURRENT_SOURCE_DIR}/*.hh)
--- a/auth/allow_all_authenticator.cc
+++ b/auth/allow_all_authenticator.cc
@@ -9,19 +9,9 @@
 #include "auth/allow_all_authenticator.hh"

 #include "service/migration_manager.hh"
-#include "utils/class_registrator.hh"

 namespace auth {

 constexpr std::string_view allow_all_authenticator_name("org.apache.cassandra.auth.AllowAllAuthenticator");

-// To ensure correct initialization order, we unfortunately need to use a string literal.
-static const class_registrator<
-        authenticator,
-        allow_all_authenticator,
-        cql3::query_processor&,
-        ::service::raft_group0_client&,
-        ::service::migration_manager&,
-        cache&> registration("org.apache.cassandra.auth.AllowAllAuthenticator");
-
 }
--- a/auth/allow_all_authorizer.cc
+++ b/auth/allow_all_authorizer.cc
@@ -9,18 +9,9 @@
 #include "auth/allow_all_authorizer.hh"

 #include "auth/common.hh"
-#include "utils/class_registrator.hh"

 namespace auth {

 constexpr std::string_view allow_all_authorizer_name("org.apache.cassandra.auth.AllowAllAuthorizer");

-// To ensure correct initialization order, we unfortunately need to use a string literal.
-static const class_registrator<
-    authorizer,
-    allow_all_authorizer,
-    cql3::query_processor&,
-    ::service::raft_group0_client&,
-    ::service::migration_manager&> registration("org.apache.cassandra.auth.AllowAllAuthorizer");
-
 }
--- a/auth/allow_all_authorizer.hh
+++ b/auth/allow_all_authorizer.hh
@@ -26,7 +26,7 @@ extern const std::string_view allow_all_authorizer_name;

 class allow_all_authorizer final  : public authorizer {
 public:
-    allow_all_authorizer(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&) {
+    allow_all_authorizer(cql3::query_processor&) {
    }

    virtual future<> start() override {
--- a/auth/cache.cc
+++ b/auth/cache.cc
@@ -47,7 +47,7 @@ void cache::set_permission_loader(permission_loader_func loader) {
    _permission_loader = std::move(loader);
 }

-lw_shared_ptr<const cache::role_record> cache::get(const role_name_t& role) const noexcept {
+lw_shared_ptr<const cache::role_record> cache::get(std::string_view role) const noexcept {
    auto it = _roles.find(role);
    if (it == _roles.end()) {
        return {};
@@ -55,6 +55,16 @@ lw_shared_ptr<const cache::role_record> cache::get(const role_name_t& role) cons
    return it->second;
 }

+void cache::for_each_role(const std::function<void(const role_name_t&, const role_record&)>& func) const {
+    for (const auto& [name, record] : _roles) {
+        func(name, *record);
+    }
+}
+
+size_t cache::roles_count() const noexcept {
+    return _roles.size();
+}
+
 future<permission_set> cache::get_permissions(const role_or_anonymous& role, const resource& r) {
    std::unordered_map<resource, permission_set>* perms_cache;
    lw_shared_ptr<role_record> role_ptr;
@@ -209,9 +219,6 @@ future<> cache::prune_all() noexcept {
 }

 future<> cache::load_all() {
-    if (legacy_mode(_qp)) {
-        co_return;
-    }
    SCYLLA_ASSERT(this_shard_id() == 0);
    auto units = co_await get_units(_loading_sem, 1, _as);

@@ -263,9 +270,6 @@ future<> cache::gather_inheriting_roles(std::unordered_set<role_name_t>& roles,
 }

 future<> cache::load_roles(std::unordered_set<role_name_t> roles) {
-    if (legacy_mode(_qp)) {
-        co_return;
-    }
    SCYLLA_ASSERT(this_shard_id() == 0);
    auto units = co_await get_units(_loading_sem, 1, _as);

--- a/auth/cache.hh
+++ b/auth/cache.hh
@@ -9,6 +9,7 @@
 #pragma once

 #include <seastar/core/abort_source.hh>
+#include <string_view>
 #include <unordered_set>
 #include <unordered_map>

@@ -19,7 +20,7 @@
 #include <seastar/core/semaphore.hh>
 #include <seastar/core/metrics_registration.hh>

-#include <absl/container/flat_hash_map.h>
+#include "absl-flat_hash_map.hh"

 #include "auth/permission.hh"
 #include "auth/common.hh"
@@ -42,8 +43,8 @@ public:
        std::unordered_set<role_name_t> member_of;
        std::unordered_set<role_name_t> members;
        sstring salted_hash;
-        std::unordered_map<sstring, sstring> attributes;
-        std::unordered_map<sstring, permission_set> permissions;
+        std::unordered_map<sstring, sstring, sstring_hash, sstring_eq> attributes;
+        std::unordered_map<sstring, permission_set, sstring_hash, sstring_eq> permissions;
    private:
        friend cache;
        // cached permissions include effects of role's inheritance
@@ -52,7 +53,7 @@ public:
    };

    explicit cache(cql3::query_processor& qp, abort_source& as) noexcept;
-    lw_shared_ptr<const role_record> get(const role_name_t& role) const noexcept;
+    lw_shared_ptr<const role_record> get(std::string_view role) const noexcept;
    void set_permission_loader(permission_loader_func loader);
    future<permission_set> get_permissions(const role_or_anonymous& role, const resource& r);
    future<> prune(const resource& r);
@@ -61,8 +62,15 @@ public:
    future<> load_roles(std::unordered_set<role_name_t> roles);
    static bool includes_table(const table_id&) noexcept;

+    // Returns the number of roles in the cache.
+    size_t roles_count() const noexcept;
+
+    // The callback doesn't suspend (no co_await) so it observes the state
+    // of the cache atomically.
+    void for_each_role(const std::function<void(const role_name_t&, const role_record&)>& func) const;
+
 private:
-    using roles_map = absl::flat_hash_map<role_name_t, lw_shared_ptr<role_record>>;
+    using roles_map = absl::flat_hash_map<role_name_t, lw_shared_ptr<role_record>, sstring_hash, sstring_eq>;
    roles_map _roles;
    // anonymous permissions map exists mainly due to compatibility with
    // higher layers which use role_or_anonymous to get permissions.
--- a/auth/certificate_authenticator.cc
+++ b/auth/certificate_authenticator.cc
@@ -13,14 +13,11 @@
 #include <boost/regex.hpp>
 #include <fmt/ranges.h>

-#include "utils/class_registrator.hh"
 #include "utils/to_string.hh"
 #include "data_dictionary/data_dictionary.hh"
 #include "cql3/query_processor.hh"
 #include "db/config.hh"

-static const auto CERT_AUTH_NAME = "com.scylladb.auth.CertificateAuthenticator";
-const std::string_view auth::certificate_authenticator_name(CERT_AUTH_NAME);

 static logging::logger clogger("certificate_authenticator");

@@ -30,13 +27,6 @@ static const std::string cfg_query_attr = "query";
 static const std::string cfg_source_subject = "SUBJECT";
 static const std::string cfg_source_altname = "ALTNAME";

-static const class_registrator<auth::authenticator
-    , auth::certificate_authenticator
-    , cql3::query_processor&
-    , ::service::raft_group0_client&
-    , ::service::migration_manager&
-    , auth::cache&> cert_auth_reg(CERT_AUTH_NAME);
-
 enum class auth::certificate_authenticator::query_source {
    subject, altname
 };
@@ -99,7 +89,7 @@ future<> auth::certificate_authenticator::stop() {
 }

 std::string_view auth::certificate_authenticator::qualified_java_name() const {
-    return certificate_authenticator_name;
+    return "com.scylladb.auth.CertificateAuthenticator";
 }

 bool auth::certificate_authenticator::require_authentication() const {
--- a/auth/certificate_authenticator.hh
+++ b/auth/certificate_authenticator.hh
@@ -27,8 +27,6 @@ namespace auth {

 class cache;

-extern const std::string_view certificate_authenticator_name;
-
 class certificate_authenticator : public authenticator {
    enum class query_source;
    std::vector<std::pair<query_source, boost::regex>> _queries;
--- a/auth/common.cc
+++ b/auth/common.cc
@@ -14,18 +14,11 @@
 #include <seastar/core/sharded.hh>

 #include "mutation/canonical_mutation.hh"
-#include "schema/schema_fwd.hh"
 #include "mutation/timestamp.hh"
-#include "utils/assert.hh"
 #include "utils/exponential_backoff_retry.hh"
 #include "cql3/query_processor.hh"
-#include "cql3/statements/create_table_statement.hh"
-#include "schema/schema_builder.hh"
-#include "service/migration_manager.hh"
 #include "service/raft/group0_state_machine.hh"
 #include "timeout_config.hh"
-#include "utils/error_injection.hh"
-#include "db/system_keyspace.hh"

 namespace auth {

@@ -33,22 +26,14 @@ namespace meta {

 namespace legacy {
    constinit const std::string_view AUTH_KS("system_auth");
-    constinit const std::string_view USERS_CF("users");
 } // namespace legacy
 constinit const std::string_view AUTH_PACKAGE_NAME("org.apache.cassandra.auth.");
 } // namespace meta

 static logging::logger auth_log("auth");

-bool legacy_mode(cql3::query_processor& qp) {
-    return qp.auth_version < db::auth_version_t::v2;
-}
-
-std::string_view get_auth_ks_name(cql3::query_processor& qp) {
-    if (legacy_mode(qp)) {
-        return meta::legacy::AUTH_KS;
-    }
-    return db::system_keyspace::NAME;
+std::string default_superuser(cql3::query_processor& qp) {
+    return qp.db().get_config().auth_superuser_name();
 }

 // Func must support being invoked more than once.
@@ -65,47 +50,6 @@ future<> do_after_system_ready(seastar::abort_source& as, seastar::noncopyable_f
    }).discard_result();
 }

-static future<> create_legacy_metadata_table_if_missing_impl(
-        std::string_view table_name,
-        cql3::query_processor& qp,
-        std::string_view cql,
-        ::service::migration_manager& mm) {
-    SCYLLA_ASSERT(this_shard_id() == 0); // once_among_shards makes sure a function is executed on shard 0 only
-
-    auto db = qp.db();
-    auto parsed_statement = cql3::query_processor::parse_statement(cql, cql3::dialect{});
-    auto& parsed_cf_statement = static_cast<cql3::statements::raw::cf_statement&>(*parsed_statement);
-
-    parsed_cf_statement.prepare_keyspace(meta::legacy::AUTH_KS);
-
-    auto statement = static_pointer_cast<cql3::statements::create_table_statement>(
-            parsed_cf_statement.prepare(db, qp.get_cql_stats())->statement);
-
-    const auto schema = statement->get_cf_meta_data(qp.db());
-    const auto uuid = generate_legacy_id(schema->ks_name(), schema->cf_name());
-
-    schema_builder b(schema);
-    b.set_uuid(uuid);
-    schema_ptr table = b.build();
-
-    if (!db.has_schema(table->ks_name(), table->cf_name())) {
-        auto group0_guard = co_await mm.start_group0_operation();
-        auto ts = group0_guard.write_timestamp();
-        try {
-            co_return co_await mm.announce(co_await ::service::prepare_new_column_family_announcement(qp.proxy(), table, ts),
-                    std::move(group0_guard), format("auth: create {} metadata table", table->cf_name()));
-        } catch (const exceptions::already_exists_exception&) {}
-    }
-}
-
-future<> create_legacy_metadata_table_if_missing(
-        std::string_view table_name,
-        cql3::query_processor& qp,
-        std::string_view cql,
-        ::service::migration_manager& mm) noexcept {
-    return futurize_invoke(create_legacy_metadata_table_if_missing_impl, table_name, qp, cql, mm);
-}
-
 ::service::query_state& internal_distributed_query_state() noexcept {
 #ifdef DEBUG
    // Give the much slower debug tests more headroom for completing auth queries.
@@ -140,56 +84,6 @@ static future<> announce_mutations_with_guard(
    return group0_client.add_entry(std::move(group0_cmd), std::move(group0_guard), as, timeout);
 }

-future<> announce_mutations_with_batching(
-        ::service::raft_group0_client& group0_client,
-        start_operation_func_t start_operation_func,
-        std::function<::service::mutations_generator(api::timestamp_type t)> gen,
-        seastar::abort_source& as,
-        std::optional<::service::raft_timeout> timeout) {
-    // account for command's overhead, it's better to use smaller threshold than constantly bounce off the limit
-    size_t memory_threshold = group0_client.max_command_size() * 0.75;
-    utils::get_local_injector().inject("auth_announce_mutations_command_max_size",
-        [&memory_threshold] {
-        memory_threshold = 1000;
-    });
-
-    size_t memory_usage = 0;
-    utils::chunked_vector<canonical_mutation> muts;
-
-    // guard has to be taken before we execute code in gen as
-    // it can do read-before-write and we want announce_mutations
-    // operation to be linearizable with other such calls,
-    // for instance if we do select and then delete in gen
-    // we want both to operate on the same data or fail
-    // if someone else modified it in the middle
-    std::optional<::service::group0_guard> group0_guard;
-    group0_guard = co_await start_operation_func(as);
-    auto timestamp = group0_guard->write_timestamp();
-
-    auto g = gen(timestamp);
-    while (auto mut = co_await g()) {
-        muts.push_back(canonical_mutation{*mut});
-        memory_usage += muts.back().representation().size();
-        if (memory_usage >= memory_threshold) {
-            if (!group0_guard) {
-                group0_guard = co_await start_operation_func(as);
-                timestamp = group0_guard->write_timestamp();
-            }
-            co_await announce_mutations_with_guard(group0_client, std::move(muts), std::move(*group0_guard), as, timeout);
-            group0_guard = std::nullopt;
-            memory_usage = 0;
-            muts = {};
-        }
-    }
-    if (!muts.empty()) {
-        if (!group0_guard) {
-            group0_guard = co_await start_operation_func(as);
-            timestamp = group0_guard->write_timestamp();
-        }
-        co_await announce_mutations_with_guard(group0_client, std::move(muts), std::move(*group0_guard), as, timeout);
-    }
-}
-
 future<> announce_mutations(
        cql3::query_processor& qp,
        ::service::raft_group0_client& group0_client,
--- a/auth/common.hh
+++ b/auth/common.hh
@@ -21,12 +21,7 @@

 using namespace std::chrono_literals;

-namespace replica {
-class database;
-}
-
 namespace service {
-class migration_manager;
 class query_state;
 }

@@ -40,10 +35,8 @@ namespace meta {

 namespace legacy {
 extern constinit const std::string_view AUTH_KS;
-extern constinit const std::string_view USERS_CF;
 } // namespace legacy

-constexpr std::string_view DEFAULT_SUPERUSER_NAME("cassandra");
 extern constinit const std::string_view AUTH_PACKAGE_NAME;

 } // namespace meta
@@ -52,12 +45,7 @@ constexpr std::string_view PERMISSIONS_CF = "role_permissions";
 constexpr std::string_view ROLE_MEMBERS_CF = "role_members";
 constexpr std::string_view ROLE_ATTRIBUTES_CF = "role_attributes";

-// This is a helper to check whether auth-v2 is on.
-bool legacy_mode(cql3::query_processor& qp);
-
-// We have legacy implementation using different keyspace
-// and need to parametrize depending on runtime feature.
-std::string_view get_auth_ks_name(cql3::query_processor& qp);
+std::string default_superuser(cql3::query_processor& qp);

 template <class Task>
 future<> once_among_shards(Task&& f) {
@@ -71,12 +59,6 @@ future<> once_among_shards(Task&& f) {
 // Func must support being invoked more than once.
 future<> do_after_system_ready(seastar::abort_source& as, seastar::noncopyable_function<future<>()> func);

-future<> create_legacy_metadata_table_if_missing(
-        std::string_view table_name,
-        cql3::query_processor&,
-        std::string_view cql,
-        ::service::migration_manager&) noexcept;
-
 ///
 /// Time-outs for internal, non-local CQL queries.
 ///
@@ -84,20 +66,6 @@ future<> create_legacy_metadata_table_if_missing(

 ::service::raft_timeout get_raft_timeout() noexcept;

-// Execute update query via group0 mechanism, mutations will be applied on all nodes.
-// Use this function when need to perform read before write on a single guard or if
-// you have more than one mutation and potentially exceed single command size limit.
-using start_operation_func_t = std::function<future<::service::group0_guard>(abort_source&)>;
-future<> announce_mutations_with_batching(
-        ::service::raft_group0_client& group0_client,
-        // since we can operate also in topology coordinator context where we need stronger
-        // guarantees than start_operation from group0_client gives we allow to inject custom
-        // function here
-        start_operation_func_t start_operation_func,
-        std::function<::service::mutations_generator(api::timestamp_type t)> gen,
-        seastar::abort_source& as,
-        std::optional<::service::raft_timeout> timeout);
-
 // Execute update query via group0 mechanism, mutations will be applied on all nodes.
 future<> announce_mutations(
        cql3::query_processor& qp,
--- a/auth/default_authorizer.cc
+++ b/auth/default_authorizer.cc
@@ -26,7 +26,6 @@ extern "C" {
 #include "cql3/untyped_result_set.hh"
 #include "exceptions/exceptions.hh"
 #include "utils/log.hh"
-#include "utils/class_registrator.hh"

 namespace auth {

@@ -40,111 +39,14 @@ static constexpr std::string_view PERMISSIONS_NAME = "permissions";

 static logging::logger alogger("default_authorizer");

-// To ensure correct initialization order, we unfortunately need to use a string literal.
-static const class_registrator<
-        authorizer,
-        default_authorizer,
-        cql3::query_processor&,
-        ::service::raft_group0_client&,
-        ::service::migration_manager&> password_auth_reg("org.apache.cassandra.auth.CassandraAuthorizer");
-
-default_authorizer::default_authorizer(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm)
-        : _qp(qp)
-        , _migration_manager(mm) {
+default_authorizer::default_authorizer(cql3::query_processor& qp)
+        : _qp(qp) {
 }

 default_authorizer::~default_authorizer() {
 }

-static const sstring legacy_table_name{"permissions"};
-
-bool default_authorizer::legacy_metadata_exists() const {
-    return _qp.db().has_schema(meta::legacy::AUTH_KS, legacy_table_name);
-}
-
-future<bool> default_authorizer::legacy_any_granted() const {
-    static const sstring query = seastar::format("SELECT * FROM {}.{} LIMIT 1", meta::legacy::AUTH_KS, PERMISSIONS_CF);
-
-    return _qp.execute_internal(
-            query,
-            db::consistency_level::LOCAL_ONE,
-            {},
-            cql3::query_processor::cache_internal::yes).then([](::shared_ptr<cql3::untyped_result_set> results) {
-        return !results->empty();
-    });
-}
-
-future<> default_authorizer::migrate_legacy_metadata() {
-    alogger.info("Starting migration of legacy permissions metadata.");
-    static const sstring query = seastar::format("SELECT * FROM {}.{}", meta::legacy::AUTH_KS, legacy_table_name);
-
-    return _qp.execute_internal(
-            query,
-            db::consistency_level::LOCAL_ONE,
-            cql3::query_processor::cache_internal::no).then([this](::shared_ptr<cql3::untyped_result_set> results) {
-        return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
-            return do_with(
-                    row.get_as<sstring>("username"),
-                    parse_resource(row.get_as<sstring>(RESOURCE_NAME)),
-                    ::service::group0_batch::unused(),
-                    [this, &row](const auto& username, const auto& r, auto& mc) {
-                const permission_set perms = permissions::from_strings(row.get_set<sstring>(PERMISSIONS_NAME));
-                return grant(username, perms, r, mc);
-            });
-        }).finally([results] {});
-    }).then([] {
-        alogger.info("Finished migrating legacy permissions metadata.");
-    }).handle_exception([](std::exception_ptr ep) {
-        alogger.error("Encountered an error during migration!");
-        std::rethrow_exception(ep);
-    });
-}
-
-future<> default_authorizer::start_legacy() {
-    static const sstring create_table = fmt::format(
-            "CREATE TABLE {}.{} ("
-            "{} text,"
-            "{} text,"
-            "{} set<text>,"
-            "PRIMARY KEY({}, {})"
-            ") WITH gc_grace_seconds={}",
-            meta::legacy::AUTH_KS,
-            PERMISSIONS_CF,
-            ROLE_NAME,
-            RESOURCE_NAME,
-            PERMISSIONS_NAME,
-            ROLE_NAME,
-            RESOURCE_NAME,
-            90 * 24 * 60 * 60); // 3 months.
-
-    return once_among_shards([this] {
-        return create_legacy_metadata_table_if_missing(
-                PERMISSIONS_CF,
-                _qp,
-                create_table,
-                _migration_manager).then([this] {
-            _finished = do_after_system_ready(_as, [this] {
-                return async([this] {
-                    _migration_manager.wait_for_schema_agreement(_qp.db().real_database(), db::timeout_clock::time_point::max(), &_as).get();
-
-                    if (legacy_metadata_exists()) {
-                        if (!legacy_any_granted().get()) {
-                            migrate_legacy_metadata().get();
-                            return;
-                        }
-
-                        alogger.warn("Ignoring legacy permissions metadata since role permissions exist.");
-                    }
-                });
-            });
-        });
-    });
-}
-
 future<> default_authorizer::start() {
-    if (legacy_mode(_qp)) {
-        return start_legacy();
-    }
    return make_ready_future<>();
 }

@@ -161,7 +63,7 @@ default_authorizer::authorize(const role_or_anonymous& maybe_role, const resourc

    const sstring query = seastar::format("SELECT {} FROM {}.{} WHERE {} = ? AND {} = ?",
            PERMISSIONS_NAME,
-            get_auth_ks_name(_qp),
+            db::system_keyspace::NAME,
            PERMISSIONS_CF,
            ROLE_NAME,
            RESOURCE_NAME);
@@ -185,21 +87,13 @@ default_authorizer::modify(
        std::string_view op,
        ::service::group0_batch& mc) {
    const sstring query = seastar::format("UPDATE {}.{} SET {} = {} {} ? WHERE {} = ? AND {} = ?",
-            get_auth_ks_name(_qp),
+            db::system_keyspace::NAME,
            PERMISSIONS_CF,
            PERMISSIONS_NAME,
            PERMISSIONS_NAME,
            op,
            ROLE_NAME,
            RESOURCE_NAME);
-    if (legacy_mode(_qp)) {
-        co_return co_await _qp.execute_internal(
-                query,
-                db::consistency_level::ONE,
-                internal_distributed_query_state(),
-                {permissions::to_strings(set), sstring(role_name), resource.name()},
-                cql3::query_processor::cache_internal::no).discard_result();
-    }
    co_await collect_mutations(_qp, mc, query,
            {permissions::to_strings(set), sstring(role_name), resource.name()});
 }
@@ -218,7 +112,7 @@ future<std::vector<permission_details>> default_authorizer::list_all() const {
            ROLE_NAME,
            RESOURCE_NAME,
            PERMISSIONS_NAME,
-            get_auth_ks_name(_qp),
+            db::system_keyspace::NAME,
            PERMISSIONS_CF);

    const auto results = co_await _qp.execute_internal(
@@ -243,74 +137,16 @@ future<std::vector<permission_details>> default_authorizer::list_all() const {
 future<> default_authorizer::revoke_all(std::string_view role_name, ::service::group0_batch& mc) {
    try {
        const sstring query = seastar::format("DELETE FROM {}.{} WHERE {} = ?",
-                get_auth_ks_name(_qp),
+                db::system_keyspace::NAME,
                PERMISSIONS_CF,
                ROLE_NAME);
-        if (legacy_mode(_qp)) {
-            co_await _qp.execute_internal(
-                    query,
-                    db::consistency_level::ONE,
-                    internal_distributed_query_state(),
-                    {sstring(role_name)},
-                    cql3::query_processor::cache_internal::no).discard_result();
-        } else {
-            co_await collect_mutations(_qp, mc, query, {sstring(role_name)});
-        }
+        co_await collect_mutations(_qp, mc, query, {sstring(role_name)});
    } catch (const exceptions::request_execution_exception& e) {
        alogger.warn("CassandraAuthorizer failed to revoke all permissions of {}: {}", role_name, e);
    }
 }

-future<> default_authorizer::revoke_all_legacy(const resource& resource) {
-    static const sstring query = seastar::format("SELECT {} FROM {}.{} WHERE {} = ? ALLOW FILTERING",
-            ROLE_NAME,
-            get_auth_ks_name(_qp),
-            PERMISSIONS_CF,
-            RESOURCE_NAME);
-
-    return _qp.execute_internal(
-            query,
-            db::consistency_level::LOCAL_ONE,
-            {resource.name()},
-            cql3::query_processor::cache_internal::no).then_wrapped([this, resource](future<::shared_ptr<cql3::untyped_result_set>> f) {
-        try {
-            auto res = f.get();
-            return parallel_for_each(
-                    res->begin(),
-                    res->end(),
-                    [this, res, resource](const cql3::untyped_result_set::row& r) {
-                static const sstring query = seastar::format("DELETE FROM {}.{} WHERE {} = ? AND {} = ?",
-                        get_auth_ks_name(_qp),
-                        PERMISSIONS_CF,
-                        ROLE_NAME,
-                        RESOURCE_NAME);
-
-                return _qp.execute_internal(
-                        query,
-                        db::consistency_level::LOCAL_ONE,
-                        {r.get_as<sstring>(ROLE_NAME), resource.name()},
-                        cql3::query_processor::cache_internal::no).discard_result().handle_exception(
-                                [resource](auto ep) {
-                    try {
-                        std::rethrow_exception(ep);
-                    } catch (const exceptions::request_execution_exception& e) {
-                        alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
-                    }
-
-                });
-            });
-        } catch (const exceptions::request_execution_exception& e) {
-            alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
-            return make_ready_future();
-        }
-    });
-}
-
 future<> default_authorizer::revoke_all(const resource& resource, ::service::group0_batch& mc) {
-    if (legacy_mode(_qp)) {
-        co_return co_await revoke_all_legacy(resource);
-    }
-
    if (resource.kind() == resource_kind::data &&
            data_resource_view(resource).is_keyspace()) {
        revoke_all_keyspace_resources(resource, mc);
@@ -321,7 +157,7 @@ future<> default_authorizer::revoke_all(const resource& resource, ::service::gro
    auto gen = [this, name] (api::timestamp_type t) -> ::service::mutations_generator {
        const sstring query = seastar::format("SELECT {} FROM {}.{} WHERE {} = ? ALLOW FILTERING",
                ROLE_NAME,
-                get_auth_ks_name(_qp),
+                db::system_keyspace::NAME,
                PERMISSIONS_CF,
                RESOURCE_NAME);
        auto res = co_await _qp.execute_internal(
@@ -331,7 +167,7 @@ future<> default_authorizer::revoke_all(const resource& resource, ::service::gro
                cql3::query_processor::cache_internal::no);
        for (const auto& r : *res) {
            const sstring query = seastar::format("DELETE FROM {}.{} WHERE {} = ? AND {} = ?",
-                    get_auth_ks_name(_qp),
+                    db::system_keyspace::NAME,
                    PERMISSIONS_CF,
                    ROLE_NAME,
                    RESOURCE_NAME);
@@ -356,7 +192,7 @@ void default_authorizer::revoke_all_keyspace_resources(const resource& ks_resour
        const sstring query = seastar::format("SELECT {}, {} FROM {}.{}",
                ROLE_NAME,
                RESOURCE_NAME,
-                get_auth_ks_name(_qp),
+                db::system_keyspace::NAME,
                PERMISSIONS_CF);
        auto res = co_await _qp.execute_internal(
                query,
@@ -371,7 +207,7 @@ void default_authorizer::revoke_all_keyspace_resources(const resource& ks_resour
                continue;
            }
            const sstring query = seastar::format("DELETE FROM {}.{} WHERE {} = ? AND {} = ?",
-                    get_auth_ks_name(_qp),
+                    db::system_keyspace::NAME,
                    PERMISSIONS_CF,
                    ROLE_NAME,
                    RESOURCE_NAME);
--- a/auth/default_authorizer.hh
+++ b/auth/default_authorizer.hh
@@ -27,14 +27,12 @@ namespace auth {
 class default_authorizer : public authorizer {
    cql3::query_processor& _qp;

-    ::service::migration_manager& _migration_manager;
-
    abort_source _as{};

    future<> _finished{make_ready_future<>()};

 public:
-    default_authorizer(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&);
+    default_authorizer(cql3::query_processor&);

    ~default_authorizer();

@@ -59,16 +57,6 @@ public:
    virtual const resource_set& protected_resources() const override;

 private:
-    future<> start_legacy();
-
-    bool legacy_metadata_exists() const;
-
-    future<> revoke_all_legacy(const resource&);
-
-    future<bool> legacy_any_granted() const;
-
-    future<> migrate_legacy_metadata();
-
    future<> modify(std::string_view, permission_set, const resource&, std::string_view, ::service::group0_batch&);

    void revoke_all_keyspace_resources(const resource& ks_resource, ::service::group0_batch& mc);
--- a/auth/ldap_role_manager.cc
+++ b/auth/ldap_role_manager.cc
@@ -24,7 +24,6 @@
 #include "exceptions/exceptions.hh"
 #include "seastarx.hh"
 #include "service/raft/raft_group0_client.hh"
-#include "utils/class_registrator.hh"
 #include "db/config.hh"
 #include "utils/exponential_backoff_retry.hh"

@@ -33,7 +32,7 @@ namespace {
 logger mylog{"ldap_role_manager"}; // `log` is taken by math.

 struct url_desc_deleter {
-    void operator()(LDAPURLDesc *p) {
+    void operator()(LDAPURLDesc* p) {
        ldap_free_urldesc(p);
    }
 };
@@ -41,7 +40,7 @@ struct url_desc_deleter {
 using url_desc_ptr = std::unique_ptr<LDAPURLDesc, url_desc_deleter>;

 url_desc_ptr parse_url(std::string_view url) {
-    LDAPURLDesc *desc = nullptr;
+    LDAPURLDesc* desc = nullptr;
    if (ldap_url_parse(url.data(), &desc)) {
        mylog.error("error in ldap_url_parse({})", url);
    }
@@ -54,8 +53,12 @@ std::vector<sstring> get_attr_values(LDAP* ld, LDAPMessage* res, const char* att
    mylog.debug("Analyzing search results");
    for (auto e = ldap_first_entry(ld, res); e; e = ldap_next_entry(ld, e)) {
        struct deleter {
-            void operator()(berval** p) { ldap_value_free_len(p); }
-            void operator()(char* p) { ldap_memfree(p); }
+            void operator()(berval** p) {
+                ldap_value_free_len(p);
+            }
+            void operator()(char* p) {
+                ldap_memfree(p);
+            }
        };
        const std::unique_ptr<char, deleter> dname(ldap_get_dn(ld, e));
        mylog.debug("Analyzing entry {}", dname.get());
@@ -72,50 +75,37 @@ std::vector<sstring> get_attr_values(LDAP* ld, LDAPMessage* res, const char* att
    return values;
 }

-const char* ldap_role_manager_full_name = "com.scylladb.auth.LDAPRoleManager";
-
 } // anonymous namespace

 namespace auth {

-static const class_registrator<
-    role_manager,
-    ldap_role_manager,
-    cql3::query_processor&,
-    ::service::raft_group0_client&,
-    ::service::migration_manager&,
-    cache&> registration(ldap_role_manager_full_name);
-
-ldap_role_manager::ldap_role_manager(
-        std::string_view query_template, std::string_view target_attr, std::string_view bind_name, std::string_view bind_password,
-        uint32_t permissions_update_interval_in_ms,
-        utils::observer<uint32_t>  permissions_update_interval_in_ms_observer,
-        cql3::query_processor& qp, ::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache)
-        : _std_mgr(qp, rg0c, mm, cache), _group0_client(rg0c), _query_template(query_template), _target_attr(target_attr), _bind_name(bind_name)
-        , _bind_password(bind_password)
-        , _permissions_update_interval_in_ms(permissions_update_interval_in_ms)
-        , _permissions_update_interval_in_ms_observer(std::move(permissions_update_interval_in_ms_observer))
-        , _connection_factory(bind(std::mem_fn(&ldap_role_manager::reconnect), std::ref(*this)))
-        , _cache(cache)
-        , _cache_pruner(make_ready_future<>()) {
+ldap_role_manager::ldap_role_manager(std::string_view query_template, std::string_view target_attr, std::string_view bind_name, std::string_view bind_password,
+        uint32_t permissions_update_interval_in_ms, utils::observer<uint32_t> permissions_update_interval_in_ms_observer, cql3::query_processor& qp,
+        ::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache)
+    : _std_mgr(qp, rg0c, mm, cache)
+    , _group0_client(rg0c)
+    , _query_template(query_template)
+    , _target_attr(target_attr)
+    , _bind_name(bind_name)
+    , _bind_password(bind_password)
+    , _permissions_update_interval_in_ms(permissions_update_interval_in_ms)
+    , _permissions_update_interval_in_ms_observer(std::move(permissions_update_interval_in_ms_observer))
+    , _connection_factory(bind(std::mem_fn(&ldap_role_manager::reconnect), std::ref(*this)))
+    , _cache(cache)
+    , _cache_pruner(make_ready_future<>()) {
 }

 ldap_role_manager::ldap_role_manager(cql3::query_processor& qp, ::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache)
-    : ldap_role_manager(
-            qp.db().get_config().ldap_url_template(),
-            qp.db().get_config().ldap_attr_role(),
-            qp.db().get_config().ldap_bind_dn(),
-            qp.db().get_config().ldap_bind_passwd(),
-            qp.db().get_config().permissions_update_interval_in_ms(),
-            qp.db().get_config().permissions_update_interval_in_ms.observe([this] (const uint32_t& v) { _permissions_update_interval_in_ms = v; }),
-            qp,
-            rg0c,
-            mm,
-            cache) {
+    : ldap_role_manager(qp.db().get_config().ldap_url_template(), qp.db().get_config().ldap_attr_role(), qp.db().get_config().ldap_bind_dn(),
+              qp.db().get_config().ldap_bind_passwd(), qp.db().get_config().permissions_update_interval_in_ms(),
+              qp.db().get_config().permissions_update_interval_in_ms.observe([this](const uint32_t& v) {
+                  _permissions_update_interval_in_ms = v;
+              }),
+              qp, rg0c, mm, cache) {
 }

 std::string_view ldap_role_manager::qualified_java_name() const noexcept {
-    return ldap_role_manager_full_name;
+    return "com.scylladb.auth.LDAPRoleManager";
 }

 const resource_set& ldap_role_manager::protected_resources() const {
@@ -124,17 +114,16 @@ const resource_set& ldap_role_manager::protected_resources() const {

 future<> ldap_role_manager::start() {
    if (!parse_url(get_url("dummy-user"))) { // Just need host and port -- any user should do.
-        return make_exception_future(
-                std::runtime_error(fmt::format("error getting LDAP server address from template {}", _query_template)));
+        return make_exception_future(std::runtime_error(fmt::format("error getting LDAP server address from template {}", _query_template)));
    }
-    _cache_pruner = futurize_invoke([this] () -> future<> {
+    _cache_pruner = futurize_invoke([this]() -> future<> {
        while (true) {
            try {
                co_await seastar::sleep_abortable(std::chrono::milliseconds(_permissions_update_interval_in_ms), _as);
            } catch (const seastar::sleep_aborted&) {
                co_return; // ignore
            }
-            co_await _cache.container().invoke_on_all([] (cache& c) -> future<> {
+            co_await _cache.container().invoke_on_all([](cache& c) -> future<> {
                try {
                    co_await c.reload_all_permissions();
                } catch (...) {
@@ -176,7 +165,7 @@ future<conn_ptr> ldap_role_manager::connect() {
 future<conn_ptr> ldap_role_manager::reconnect() {
    unsigned retries_left = 5;
    using namespace std::literals::chrono_literals;
-    conn_ptr conn = co_await exponential_backoff_retry::do_until_value(1s, 32s, _as, [this, &retries_left] () -> future<std::optional<conn_ptr>> {
+    conn_ptr conn = co_await exponential_backoff_retry::do_until_value(1s, 32s, _as, [this, &retries_left]() -> future<std::optional<conn_ptr>> {
        if (!retries_left) {
            co_return conn_ptr{};
        }
@@ -199,11 +188,13 @@ future<conn_ptr> ldap_role_manager::reconnect() {

 future<> ldap_role_manager::stop() {
    _as.request_abort();
-    return std::move(_cache_pruner).then([this] {
-        return _std_mgr.stop();
-    }).then([this] {
-        return _connection_factory.stop();
-    });
+    return std::move(_cache_pruner)
+            .then([this] {
+                return _std_mgr.stop();
+            })
+            .then([this] {
+                return _connection_factory.stop();
+            });
 }

 future<> ldap_role_manager::create(std::string_view name, const role_config& config, ::service::group0_batch& mc) {
@@ -232,43 +223,42 @@ future<role_set> ldap_role_manager::query_granted(std::string_view grantee_name,
    if (!desc) {
        return make_exception_future<role_set>(std::runtime_error(format("Error parsing URL {}", url)));
    }
-    return _connection_factory.with_connection([this, desc = std::move(desc), grantee_name_ = sstring(grantee_name)]
-                                               (ldap_connection& conn) -> future<role_set> {
-        sstring grantee_name = std::move(grantee_name_);
-        ldap_msg_ptr res = co_await conn.search(desc->lud_dn, desc->lud_scope, desc->lud_filter, desc->lud_attrs,
-                           /*attrsonly=*/0, /*serverctrls=*/nullptr, /*clientctrls=*/nullptr,
-                           /*timeout=*/nullptr, /*sizelimit=*/0);
-        mylog.trace("query_granted: got search results");
-        const auto mtype = ldap_msgtype(res.get());
-        if (mtype != LDAP_RES_SEARCH_ENTRY && mtype != LDAP_RES_SEARCH_RESULT && mtype != LDAP_RES_SEARCH_REFERENCE) {
-            mylog.error("ldap search yielded result {} of type {}", static_cast<const void*>(res.get()), mtype);
-            co_return coroutine::exception(std::make_exception_ptr(std::runtime_error("ldap_role_manager: search result has wrong type")));
-        }
-        std::vector<sstring> values = get_attr_values(conn.get_ldap(), res.get(), _target_attr.c_str());
-        auth::role_set valid_roles{grantee_name};
-
-        // Each value is a role to be granted.
-        co_await parallel_for_each(values, [this, &valid_roles] (const sstring& ldap_role) {
-            return _std_mgr.exists(ldap_role).then([&valid_roles, &ldap_role] (bool exists) {
-                if (exists) {
-                    valid_roles.insert(ldap_role);
-                } else {
-                    mylog.error("unrecognized role received from LDAP: {}", ldap_role);
+    return _connection_factory.with_connection(
+            [this, desc = std::move(desc), grantee_name_ = sstring(grantee_name)](ldap_connection& conn) -> future<role_set> {
+                sstring grantee_name = std::move(grantee_name_);
+                ldap_msg_ptr res = co_await conn.search(desc->lud_dn, desc->lud_scope, desc->lud_filter, desc->lud_attrs,
+                        /*attrsonly=*/0, /*serverctrls=*/nullptr, /*clientctrls=*/nullptr,
+                        /*timeout=*/nullptr, /*sizelimit=*/0);
+                mylog.trace("query_granted: got search results");
+                const auto mtype = ldap_msgtype(res.get());
+                if (mtype != LDAP_RES_SEARCH_ENTRY && mtype != LDAP_RES_SEARCH_RESULT && mtype != LDAP_RES_SEARCH_REFERENCE) {
+                    mylog.error("ldap search yielded result {} of type {}", static_cast<const void*>(res.get()), mtype);
+                    co_return coroutine::exception(std::make_exception_ptr(std::runtime_error("ldap_role_manager: search result has wrong type")));
                }
-            });
-        });
+                std::vector<sstring> values = get_attr_values(conn.get_ldap(), res.get(), _target_attr.c_str());
+                auth::role_set valid_roles{grantee_name};

-        co_return std::move(valid_roles);
-    });
+                // Each value is a role to be granted.
+                co_await parallel_for_each(values, [this, &valid_roles](const sstring& ldap_role) {
+                    return _std_mgr.exists(ldap_role).then([&valid_roles, &ldap_role](bool exists) {
+                        if (exists) {
+                            valid_roles.insert(ldap_role);
+                        } else {
+                            mylog.error("unrecognized role received from LDAP: {}", ldap_role);
+                        }
+                    });
+                });
+
+                co_return std::move(valid_roles);
+            });
 }

-future<role_to_directly_granted_map>
-ldap_role_manager::query_all_directly_granted(::service::query_state& qs) {
+future<role_to_directly_granted_map> ldap_role_manager::query_all_directly_granted(::service::query_state& qs) {
    role_to_directly_granted_map result;
    auto roles = co_await query_all(qs);
-    for (auto& role: roles) {
+    for (auto& role : roles) {
        auto granted_set = co_await query_granted(role, recursive_role_query::no);
-        for (auto& granted: granted_set) {
+        for (auto& granted : granted_set) {
            if (granted != role) {
                result.insert({role, granted});
            }
@@ -282,7 +272,7 @@ future<role_set> ldap_role_manager::query_all(::service::query_state& qs) {
 }

 future<> ldap_role_manager::create_role(std::string_view role_name) {
-    return smp::submit_to(0, [this, role_name] () -> future<> {
+    return smp::submit_to(0, [this, role_name]() -> future<> {
        int retries = 10;
        while (true) {
            auto guard = co_await _group0_client.start_operation(_as, ::service::raft_timeout{});
@@ -294,8 +284,8 @@ future<> ldap_role_manager::create_role(std::string_view role_name) {
            } catch (const role_already_exists&) {
                // ok
            } catch (const ::service::group0_concurrent_modification& ex) {
-                mylog.warn("Failed to auto-create role \"{}\" due to guard conflict.{}.",
-                        role_name, retries ? " Retrying" : " Number of retries exceeded, giving up");
+                mylog.warn("Failed to auto-create role \"{}\" due to guard conflict.{}.", role_name,
+                        retries ? " Retrying" : " Number of retries exceeded, giving up");
                if (retries--) {
                    continue;
                }
@@ -340,8 +330,7 @@ future<bool> ldap_role_manager::can_login(std::string_view role_name) {
    return _std_mgr.can_login(role_name);
 }

-future<std::optional<sstring>> ldap_role_manager::get_attribute(
-        std::string_view role_name, std::string_view attribute_name, ::service::query_state& qs) {
+future<std::optional<sstring>> ldap_role_manager::get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state& qs) {
    return _std_mgr.get_attribute(role_name, attribute_name, qs);
 }

--- a/auth/ldap_role_manager.hh
+++ b/auth/ldap_role_manager.hh
@@ -57,8 +57,7 @@ class ldap_role_manager : public role_manager {
            cache& cache ///< Passed to standard_role_manager.
    );

-    /// Retrieves LDAP configuration entries from qp and invokes the other constructor.  Required by
-    /// class_registrator<role_manager>.
+    /// Retrieves LDAP configuration entries from qp and invokes the other constructor.
    ldap_role_manager(cql3::query_processor& qp, ::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache);

    /// Thrown when query-template parsing fails.
--- a/auth/maintenance_socket_authenticator.cc
+++ b/auth/maintenance_socket_authenticator.cc
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2026-present ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
+ */
+
+#include "auth/maintenance_socket_authenticator.hh"
+
+
+namespace auth {
+
+maintenance_socket_authenticator::~maintenance_socket_authenticator() {
+}
+
+future<> maintenance_socket_authenticator::start() {
+    return make_ready_future<>();
+}
+
+future<> maintenance_socket_authenticator::ensure_superuser_is_created() const {
+    return make_ready_future<>();
+}
+
+bool maintenance_socket_authenticator::require_authentication() const {
+    return false;
+}
+
+} // namespace auth
--- a/auth/maintenance_socket_authenticator.hh
+++ b/auth/maintenance_socket_authenticator.hh
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2026-present ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
+ */
+
+#pragma once
+
+#include <seastar/core/shared_future.hh>
+
+#include "password_authenticator.hh"
+
+namespace auth {
+
+// maintenance_socket_authenticator is used for clients connecting to the
+// maintenance socket. It does not require authentication,
+// while still allowing the managing of roles and their credentials.
+class maintenance_socket_authenticator : public password_authenticator {
+public:
+    using password_authenticator::password_authenticator;
+
+    virtual ~maintenance_socket_authenticator();
+
+    virtual future<> start() override;
+
+    virtual future<> ensure_superuser_is_created() const override;
+
+    bool require_authentication() const override;
+};
+
+} // namespace auth
+
--- a/auth/maintenance_socket_authorizer.hh
+++ b/auth/maintenance_socket_authorizer.hh
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2026-present ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
+ */
+
+#pragma once
+
+#include "auth/default_authorizer.hh"
+#include "auth/permission.hh"
+
+namespace auth {
+
+// maintenance_socket_authorizer is used for clients connecting to the
+// maintenance socket. It grants all permissions unconditionally (like
+// AllowAllAuthorizer) while still supporting grant/revoke operations
+// (delegated to the underlying CassandraAuthorizer / default_authorizer).
+class maintenance_socket_authorizer : public default_authorizer {
+public:
+    using default_authorizer::default_authorizer;
+
+    ~maintenance_socket_authorizer() override = default;
+
+    future<> start() override {
+        return make_ready_future<>();
+    }
+
+    future<permission_set> authorize(const role_or_anonymous&, const resource&) const override {
+        return make_ready_future<permission_set>(permissions::ALL);
+    }
+};
+
+} // namespace auth
--- a/auth/maintenance_socket_role_manager.cc
+++ b/auth/maintenance_socket_role_manager.cc
@@ -13,23 +13,48 @@
 #include <string_view>
 #include "auth/cache.hh"
 #include "cql3/description.hh"
-#include "utils/class_registrator.hh"
+#include "utils/log.hh"
+#include "utils/on_internal_error.hh"

 namespace auth {

-constexpr std::string_view maintenance_socket_role_manager_name = "com.scylladb.auth.MaintenanceSocketRoleManager";
+static logging::logger log("maintenance_socket_role_manager");

-static const class_registrator<
-        role_manager,
-        maintenance_socket_role_manager,
-        cql3::query_processor&,
-        ::service::raft_group0_client&,
-        ::service::migration_manager&,
-        cache&> registration(sstring{maintenance_socket_role_manager_name});
+future<> maintenance_socket_role_manager::ensure_role_operations_are_enabled() {
+    if (_is_maintenance_mode) {
+        on_internal_error(log, "enabling role operations not allowed in maintenance mode");
+    }

+    if (_std_mgr.has_value()) {
+        on_internal_error(log, "role operations are already enabled");
+    }
+
+    _std_mgr.emplace(_qp, _group0_client, _migration_manager, _cache);
+    return _std_mgr->start();
+}
+
+void maintenance_socket_role_manager::set_maintenance_mode() {
+    if (_std_mgr.has_value()) {
+        on_internal_error(log, "cannot enter maintenance mode after role operations have been enabled");
+    }
+    _is_maintenance_mode = true;
+}
+
+maintenance_socket_role_manager::maintenance_socket_role_manager(
+        cql3::query_processor& qp,
+        ::service::raft_group0_client& rg0c,
+        ::service::migration_manager& mm,
+        cache& c)
+    : _qp(qp)
+    , _group0_client(rg0c)
+    , _migration_manager(mm)
+    , _cache(c)
+    , _std_mgr(std::nullopt)
+    , _is_maintenance_mode(false) {
+}

 std::string_view maintenance_socket_role_manager::qualified_java_name() const noexcept {
-    return maintenance_socket_role_manager_name;
+    return "com.scylladb.auth.MaintenanceSocketRoleManager";
 }

 const resource_set& maintenance_socket_role_manager::protected_resources() const {
@@ -43,81 +68,161 @@ future<> maintenance_socket_role_manager::start() {
 }

 future<> maintenance_socket_role_manager::stop() {
-    return make_ready_future<>();
+    return _std_mgr ? _std_mgr->stop() : make_ready_future<>();
 }

 future<> maintenance_socket_role_manager::ensure_superuser_is_created() {
-    return make_ready_future<>();
+    return _std_mgr ? _std_mgr->ensure_superuser_is_created() : make_ready_future<>();
 }

 template<typename T = void>
-future<T> operation_not_supported_exception(std::string_view operation) {
+future<T> operation_not_available_in_maintenance_mode_exception(std::string_view operation) {
    return make_exception_future<T>(
-        std::runtime_error(fmt::format("role manager: {} operation not supported through maintenance socket", operation)));
+        std::runtime_error(fmt::format("role manager: {} operation not available through maintenance socket in maintenance mode", operation)));
 }

-future<> maintenance_socket_role_manager::create(std::string_view role_name, const role_config&, ::service::group0_batch&) {
-    return operation_not_supported_exception("CREATE");
+template<typename T = void>
+future<T> manager_not_ready_exception(std::string_view operation) {
+    return make_exception_future<T>(
+        std::runtime_error(fmt::format("role manager: {} operation not available because manager not ready yet (role operations not enabled)", operation)));
+}
+
+future<> maintenance_socket_role_manager::validate_operation(std::string_view name) const {
+    if (_is_maintenance_mode) {
+        return operation_not_available_in_maintenance_mode_exception(name);
+    }
+    if (!_std_mgr) {
+        return manager_not_ready_exception(name);
+    }
+    return make_ready_future<>();
+}
+
+future<> maintenance_socket_role_manager::create(std::string_view role_name, const role_config& c, ::service::group0_batch& mc) {
+    auto f = validate_operation("CREATE");
+    if (f.failed()) {
+        return f;
+    }
+    return _std_mgr->create(role_name, c, mc);
 }

 future<> maintenance_socket_role_manager::drop(std::string_view role_name, ::service::group0_batch& mc) {
-    return operation_not_supported_exception("DROP");
+    auto f = validate_operation("DROP");
+    if (f.failed()) {
+        return f;
+    }
+    return _std_mgr->drop(role_name, mc);
 }

-future<> maintenance_socket_role_manager::alter(std::string_view role_name, const role_config_update&, ::service::group0_batch&) {
-    return operation_not_supported_exception("ALTER");
+future<> maintenance_socket_role_manager::alter(std::string_view role_name, const role_config_update& u, ::service::group0_batch& mc) {
+    auto f = validate_operation("ALTER");
+    if (f.failed()) {
+        return f;
+    }
+    return _std_mgr->alter(role_name, u, mc);
 }

 future<> maintenance_socket_role_manager::grant(std::string_view grantee_name, std::string_view role_name, ::service::group0_batch& mc) {
-    return operation_not_supported_exception("GRANT");
+    auto f = validate_operation("GRANT");
+    if (f.failed()) {
+        return f;
+    }
+    return _std_mgr->grant(grantee_name, role_name, mc);
 }

 future<> maintenance_socket_role_manager::revoke(std::string_view revokee_name, std::string_view role_name, ::service::group0_batch& mc) {
-    return operation_not_supported_exception("REVOKE");
+    auto f = validate_operation("REVOKE");
+    if (f.failed()) {
+        return f;
+    }
+    return _std_mgr->revoke(revokee_name, role_name, mc);
 }

-future<role_set> maintenance_socket_role_manager::query_granted(std::string_view grantee_name, recursive_role_query) {
-    return operation_not_supported_exception<role_set>("QUERY GRANTED");
+future<role_set> maintenance_socket_role_manager::query_granted(std::string_view grantee_name, recursive_role_query m) {
+    auto f = validate_operation("QUERY GRANTED");
+    if (f.failed()) {
+        return make_exception_future<role_set>(f.get_exception());
+    }
+    return _std_mgr->query_granted(grantee_name, m);
 }

-future<role_to_directly_granted_map> maintenance_socket_role_manager::query_all_directly_granted(::service::query_state&) {
-    return operation_not_supported_exception<role_to_directly_granted_map>("QUERY ALL DIRECTLY GRANTED");
+future<role_to_directly_granted_map> maintenance_socket_role_manager::query_all_directly_granted(::service::query_state& qs) {
+    auto f = validate_operation("QUERY ALL DIRECTLY GRANTED");
+    if (f.failed()) {
+        return make_exception_future<role_to_directly_granted_map>(f.get_exception());
+    }
+    return _std_mgr->query_all_directly_granted(qs);
 }

-future<role_set> maintenance_socket_role_manager::query_all(::service::query_state&) {
-    return operation_not_supported_exception<role_set>("QUERY ALL");
+future<role_set> maintenance_socket_role_manager::query_all(::service::query_state& qs) {
+    auto f = validate_operation("QUERY ALL");
+    if (f.failed()) {
+        return make_exception_future<role_set>(f.get_exception());
+    }
+    return _std_mgr->query_all(qs);
 }

 future<bool> maintenance_socket_role_manager::exists(std::string_view role_name) {
-    return operation_not_supported_exception<bool>("EXISTS");
+    auto f = validate_operation("EXISTS");
+    if (f.failed()) {
+        return make_exception_future<bool>(f.get_exception());
+    }
+    return _std_mgr->exists(role_name);
 }

 future<bool> maintenance_socket_role_manager::is_superuser(std::string_view role_name) {
-    return make_ready_future<bool>(true);
+    auto f = validate_operation("IS SUPERUSER");
+    if (f.failed()) {
+        return make_exception_future<bool>(f.get_exception());
+    }
+    return _std_mgr->is_superuser(role_name);
 }

 future<bool> maintenance_socket_role_manager::can_login(std::string_view role_name) {
-    return make_ready_future<bool>(true);
+    auto f = validate_operation("CAN LOGIN");
+    if (f.failed()) {
+        return make_exception_future<bool>(f.get_exception());
+    }
+    return _std_mgr->can_login(role_name);
 }

-future<std::optional<sstring>> maintenance_socket_role_manager::get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state&) {
-    return operation_not_supported_exception<std::optional<sstring>>("GET ATTRIBUTE");
+future<std::optional<sstring>> maintenance_socket_role_manager::get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state& qs) {
+    auto f = validate_operation("GET ATTRIBUTE");
+    if (f.failed()) {
+        return make_exception_future<std::optional<sstring>>(f.get_exception());
+    }
+    return _std_mgr->get_attribute(role_name, attribute_name, qs);
 }

-future<role_manager::attribute_vals> maintenance_socket_role_manager::query_attribute_for_all(std::string_view attribute_name, ::service::query_state&) {
-    return operation_not_supported_exception<role_manager::attribute_vals>("QUERY ATTRIBUTE");
+future<role_manager::attribute_vals> maintenance_socket_role_manager::query_attribute_for_all(std::string_view attribute_name, ::service::query_state& qs) {
+    auto f = validate_operation("QUERY ATTRIBUTE FOR ALL");
+    if (f.failed()) {
+        return make_exception_future<role_manager::attribute_vals>(f.get_exception());
+    }
+    return _std_mgr->query_attribute_for_all(attribute_name, qs);
 }

 future<> maintenance_socket_role_manager::set_attribute(std::string_view role_name, std::string_view attribute_name, std::string_view attribute_value, ::service::group0_batch& mc) {
-    return operation_not_supported_exception("SET ATTRIBUTE");
+    auto f = validate_operation("SET ATTRIBUTE");
+    if (f.failed()) {
+        return f;
+    }
+    return _std_mgr->set_attribute(role_name, attribute_name, attribute_value, mc);
 }

 future<> maintenance_socket_role_manager::remove_attribute(std::string_view role_name, std::string_view attribute_name, ::service::group0_batch& mc) {
-    return operation_not_supported_exception("REMOVE ATTRIBUTE");
+    auto f = validate_operation("REMOVE ATTRIBUTE");
+    if (f.failed()) {
+        return f;
+    }
+    return _std_mgr->remove_attribute(role_name, attribute_name, mc);
 }

 future<std::vector<cql3::description>> maintenance_socket_role_manager::describe_role_grants() {
-    return operation_not_supported_exception<std::vector<cql3::description>>("DESCRIBE SCHEMA WITH INTERNALS");
+    auto f = validate_operation("DESCRIBE ROLE GRANTS");
+    if (f.failed()) {
+        return make_exception_future<std::vector<cql3::description>>(f.get_exception());
+    }
+    return _std_mgr->describe_role_grants();
 }

 } // namespace auth
--- a/auth/maintenance_socket_role_manager.hh
+++ b/auth/maintenance_socket_role_manager.hh
@@ -11,6 +11,7 @@
 #include "auth/cache.hh"
 #include "auth/resource.hh"
 #include "auth/role_manager.hh"
+#include "auth/standard_role_manager.hh"
 #include <seastar/core/future.hh>

 namespace cql3 {
@@ -24,13 +25,26 @@ class raft_group0_client;

 namespace auth {

-extern const std::string_view maintenance_socket_role_manager_name;
-
-// This role manager is used by the maintenance socket. It has disabled all role management operations to not depend on
-// system_auth keyspace, which may be not yet created when the maintenance socket starts listening.
+// This role manager is used by the maintenance socket. It has disabled all role management operations
+// in maintenance mode. In normal mode it delegates all operations to a standard_role_manager,
+// which is created on demand when the node joins the cluster.
 class maintenance_socket_role_manager final : public role_manager {
+    cql3::query_processor& _qp;
+    ::service::raft_group0_client& _group0_client;
+    ::service::migration_manager& _migration_manager;
+    cache& _cache;
+    std::optional<standard_role_manager> _std_mgr;
+    bool _is_maintenance_mode;
+
 public:
-    maintenance_socket_role_manager(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&) {}
+    void set_maintenance_mode() override;
+
+    // Ensures role management operations are enabled.
+    // It must be called once the node has joined the cluster.
+    // In the meantime all role management operations will fail.
+    future<> ensure_role_operations_are_enabled() override;
+
+    maintenance_socket_role_manager(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);

    virtual std::string_view qualified_java_name() const noexcept override;

@@ -42,21 +56,21 @@ public:

    virtual future<> ensure_superuser_is_created() override;

-    virtual future<> create(std::string_view role_name, const role_config&, ::service::group0_batch&) override;
+    virtual future<> create(std::string_view role_name, const role_config& c, ::service::group0_batch& mc) override;

    virtual future<> drop(std::string_view role_name, ::service::group0_batch& mc) override;

-    virtual future<> alter(std::string_view role_name, const role_config_update&, ::service::group0_batch&) override;
+    virtual future<> alter(std::string_view role_name, const role_config_update& u, ::service::group0_batch& mc) override;

    virtual future<> grant(std::string_view grantee_name, std::string_view role_name, ::service::group0_batch& mc) override;

    virtual future<> revoke(std::string_view revokee_name, std::string_view role_name, ::service::group0_batch& mc) override;

-    virtual future<role_set> query_granted(std::string_view grantee_name, recursive_role_query) override;
+    virtual future<role_set> query_granted(std::string_view grantee_name, recursive_role_query m) override;

-    virtual future<role_to_directly_granted_map> query_all_directly_granted(::service::query_state&) override;
+    virtual future<role_to_directly_granted_map> query_all_directly_granted(::service::query_state& qs) override;

-    virtual future<role_set> query_all(::service::query_state&) override;
+    virtual future<role_set> query_all(::service::query_state& qs) override;

    virtual future<bool> exists(std::string_view role_name) override;

@@ -64,15 +78,19 @@ public:

    virtual future<bool> can_login(std::string_view role_name) override;

-    virtual future<std::optional<sstring>> get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state&) override;
+    virtual future<std::optional<sstring>> get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state& qs) override;

-    virtual future<role_manager::attribute_vals> query_attribute_for_all(std::string_view attribute_name, ::service::query_state&) override;
+    virtual future<role_manager::attribute_vals> query_attribute_for_all(std::string_view attribute_name, ::service::query_state& qs) override;

    virtual future<> set_attribute(std::string_view role_name, std::string_view attribute_name, std::string_view attribute_value, ::service::group0_batch& mc) override;

    virtual future<> remove_attribute(std::string_view role_name, std::string_view attribute_name, ::service::group0_batch& mc) override;

    virtual future<std::vector<cql3::description>> describe_role_grants() override;
+
+private:
+    future<> validate_operation(std::string_view name) const;
+
 };

 }
--- a/auth/password_authenticator.cc
+++ b/auth/password_authenticator.cc
@@ -26,10 +26,9 @@
 #include "cql3/untyped_result_set.hh"
 #include "utils/log.hh"
 #include "service/migration_manager.hh"
-#include "utils/class_registrator.hh"
-#include "replica/database.hh"
 #include "cql3/query_processor.hh"
 #include "db/config.hh"
+#include "db/system_keyspace.hh"

 namespace auth {

@@ -37,29 +36,10 @@ constexpr std::string_view password_authenticator_name("org.apache.cassandra.aut

 // name of the hash column.
 static constexpr std::string_view SALTED_HASH = "salted_hash";
-static constexpr std::string_view DEFAULT_USER_NAME = meta::DEFAULT_SUPERUSER_NAME;
-static const sstring DEFAULT_USER_PASSWORD = sstring(meta::DEFAULT_SUPERUSER_NAME);
-
 static logging::logger plogger("password_authenticator");

-// To ensure correct initialization order, we unfortunately need to use a string literal.
-static const class_registrator<
-        authenticator,
-        password_authenticator,
-        cql3::query_processor&,
-        ::service::raft_group0_client&,
-        ::service::migration_manager&,
-        cache&> password_auth_reg("org.apache.cassandra.auth.PasswordAuthenticator");
-
 static thread_local auto rng_for_salt = std::default_random_engine(std::random_device{}());

-static std::string_view get_config_value(std::string_view value, std::string_view def) {
-    return value.empty() ? def : value;
-}
-std::string password_authenticator::default_superuser(const db::config& cfg) {
-    return std::string(get_config_value(cfg.auth_superuser_name(), DEFAULT_USER_NAME));
-}
-
 password_authenticator::~password_authenticator() {
 }

@@ -69,7 +49,6 @@ password_authenticator::password_authenticator(cql3::query_processor& qp, ::serv
    , _migration_manager(mm)
    , _cache(cache)
    , _stopped(make_ready_future<>()) 
-    , _superuser(default_superuser(qp.db().get_config()))
 {}

 static bool has_salted_hash(const cql3::untyped_result_set_row& row) {
@@ -78,76 +57,18 @@ static bool has_salted_hash(const cql3::untyped_result_set_row& row) {

 sstring password_authenticator::update_row_query() const {
    return seastar::format("UPDATE {}.{} SET {} = ? WHERE {} = ?",
-            get_auth_ks_name(_qp),
+            db::system_keyspace::NAME,
            meta::roles_table::name,
            SALTED_HASH,
            meta::roles_table::role_col_name);
 }

-static const sstring legacy_table_name{"credentials"};
-
-bool password_authenticator::legacy_metadata_exists() const {
-    return _qp.db().has_schema(meta::legacy::AUTH_KS, legacy_table_name);
-}
-
-future<> password_authenticator::migrate_legacy_metadata() const {
-    plogger.info("Starting migration of legacy authentication metadata.");
-    static const sstring query = seastar::format("SELECT * FROM {}.{}", meta::legacy::AUTH_KS, legacy_table_name);
-
-    return _qp.execute_internal(
-            query,
-            db::consistency_level::QUORUM,
-            internal_distributed_query_state(),
-            cql3::query_processor::cache_internal::no).then([this](::shared_ptr<cql3::untyped_result_set> results) {
-        return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
-            auto username = row.get_as<sstring>("username");
-            auto salted_hash = row.get_as<sstring>(SALTED_HASH);
-            static const auto query = seastar::format("UPDATE {}.{} SET {} = ? WHERE {} = ?",
-                    meta::legacy::AUTH_KS,
-                    meta::roles_table::name,
-                    SALTED_HASH,
-                    meta::roles_table::role_col_name);
-            return _qp.execute_internal(
-                    query,
-                    consistency_for_user(username),
-                    internal_distributed_query_state(),
-                    {std::move(salted_hash), username},
-                    cql3::query_processor::cache_internal::no).discard_result();
-        }).finally([results] {});
-    }).then([] {
-       plogger.info("Finished migrating legacy authentication metadata.");
-    }).handle_exception([](std::exception_ptr ep) {
-        plogger.error("Encountered an error during migration!");
-        std::rethrow_exception(ep);
-    });
-}
-
-future<> password_authenticator::legacy_create_default_if_missing() {
-    const auto exists = co_await legacy::default_role_row_satisfies(_qp, &has_salted_hash, _superuser);
-    if (exists) {
-        co_return;
-    }
-    std::string salted_pwd(get_config_value(_qp.db().get_config().auth_superuser_salted_password(), ""));
-    if (salted_pwd.empty()) {
-        salted_pwd = passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt, _scheme);
-    }
-    const auto query = seastar::format("UPDATE {}.{} SET {} = ? WHERE {} = ?",
-            meta::legacy::AUTH_KS,
-            meta::roles_table::name,
-            SALTED_HASH,
-            meta::roles_table::role_col_name);
-    co_await _qp.execute_internal(
-            query,
-            db::consistency_level::QUORUM,
-            internal_distributed_query_state(),
-            {salted_pwd, _superuser},
-            cql3::query_processor::cache_internal::no);
-    plogger.info("Created default superuser authentication record.");
-}
-
 future<> password_authenticator::maybe_create_default_password() {
    auto needs_password = [this] () -> future<bool> {
-        const sstring query = seastar::format("SELECT * FROM {}.{} WHERE is_superuser = true ALLOW FILTERING", get_auth_ks_name(_qp), meta::roles_table::name);
+        if (default_superuser(_qp).empty()) {
+            co_return false;
+        }
+        const sstring query = seastar::format("SELECT * FROM {}.{} WHERE is_superuser = true ALLOW FILTERING", db::system_keyspace::NAME, meta::roles_table::name);
        auto results = co_await _qp.execute_internal(query,
                db::consistency_level::LOCAL_ONE,
                internal_distributed_query_state(), cql3::query_processor::cache_internal::yes);
@@ -157,7 +78,7 @@ future<> password_authenticator::maybe_create_default_password() {
        bool has_default = false;
        bool has_superuser_with_password = false;
        for (auto& result : *results) {
-            if (result.get_as<sstring>(meta::roles_table::role_col_name) == _superuser) {
+            if (result.get_as<sstring>(meta::roles_table::role_col_name) == default_superuser(_qp)) {
                has_default = true;
            }
            if (has_salted_hash(result)) {
@@ -178,12 +99,12 @@ future<> password_authenticator::maybe_create_default_password() {
        co_return;
    }
    // Set default superuser's password.
-    std::string salted_pwd(get_config_value(_qp.db().get_config().auth_superuser_salted_password(), ""));
+    std::string salted_pwd(_qp.db().get_config().auth_superuser_salted_password());
    if (salted_pwd.empty()) {
-        salted_pwd = passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt, _scheme);
+        co_return;
    }
    const auto update_query = update_row_query();
-    co_await collect_mutations(_qp, batch, update_query, {salted_pwd, _superuser});
+    co_await collect_mutations(_qp, batch, update_query, {salted_pwd, default_superuser(_qp)});
    co_await std::move(batch).commit(_group0_client, _as, get_raft_timeout());
    plogger.info("Created default superuser authentication record.");
 }
@@ -216,58 +137,14 @@ future<> password_authenticator::start() {

        _stopped = do_after_system_ready(_as, [this] {
            return async([this] {
-                if (legacy_mode(_qp)) {
-                    if (!_superuser_created_promise.available()) {
-                        // Counterintuitively, we mark promise as ready before any startup work
-                        // because wait_for_schema_agreement() below will block indefinitely
-                        // without cluster majority. In that case, blocking node startup
-                        // would lead to a cluster deadlock.
-                        _superuser_created_promise.set_value();
-                    }
-                    _migration_manager.wait_for_schema_agreement(_qp.db().real_database(), db::timeout_clock::time_point::max(), &_as).get();
-
-                    if (legacy::any_nondefault_role_row_satisfies(_qp, &has_salted_hash, _superuser).get()) {
-                        if (legacy_metadata_exists()) {
-                            plogger.warn("Ignoring legacy authentication metadata since nondefault data already exist.");
-                        }
-
-                        return;
-                    }
-
-                    if (legacy_metadata_exists()) {
-                        migrate_legacy_metadata().get();
-                        return;
-                    }
-                    legacy_create_default_if_missing().get();
-                }
                utils::get_local_injector().inject("password_authenticator_start_pause", utils::wait_for_message(5min)).get();
-                if (!legacy_mode(_qp)) {
-                    maybe_create_default_password_with_retries().get();
-                    if (!_superuser_created_promise.available()) {
-                        _superuser_created_promise.set_value();
-                    }
+                maybe_create_default_password_with_retries().get();
+                if (!_superuser_created_promise.available()) {
+                    _superuser_created_promise.set_value();
                }
            });
        });

-        if (legacy_mode(_qp)) {
-            static const sstring create_roles_query = fmt::format(
-                    "CREATE TABLE {}.{} ("
-                    "  {} text PRIMARY KEY,"
-                    "  can_login boolean,"
-                    "  is_superuser boolean,"
-                    "  member_of set<text>,"
-                    "  salted_hash text"
-                    ")",
-                    meta::legacy::AUTH_KS,
-                    meta::roles_table::name,
-                    meta::roles_table::role_col_name);
-            return create_legacy_metadata_table_if_missing(
-                    meta::roles_table::name,
-                    _qp,
-                    create_roles_query,
-                    _migration_manager);
-        }
        return make_ready_future<>();
    });
 }
@@ -277,15 +154,6 @@ future<> password_authenticator::stop() {
    return _stopped.handle_exception_type([] (const sleep_aborted&) { }).handle_exception_type([](const abort_requested_exception&) {});
 }

-db::consistency_level password_authenticator::consistency_for_user(std::string_view role_name) {
-    // TODO: this is plain dung. Why treat hardcoded default special, but for example a user-created
-    // super user uses plain LOCAL_ONE?
-    if (role_name == DEFAULT_USER_NAME) {
-        return db::consistency_level::QUORUM;
-    }
-    return db::consistency_level::LOCAL_ONE;
-}
-
 std::string_view password_authenticator::qualified_java_name() const {
    return password_authenticator_name;
 }
@@ -315,20 +183,12 @@ future<authenticated_user> password_authenticator::authenticate(
    const sstring password = credentials.at(PASSWORD_KEY);

    try {
-        std::optional<sstring> salted_hash;
-        if (legacy_mode(_qp)) {
-            salted_hash = co_await get_password_hash(username);
-            if (!salted_hash) {
-                throw exceptions::authentication_exception("Username and/or password are incorrect");
-            }
-        } else {
-            auto role = _cache.get(username);
-            if (!role || role->salted_hash.empty()) {
-                throw exceptions::authentication_exception("Username and/or password are incorrect");
-            }
-            salted_hash = role->salted_hash;
+        auto role = _cache.get(username);
+        if (!role || role->salted_hash.empty()) {
+            throw exceptions::authentication_exception("Username and/or password are incorrect");
        }
-        const bool password_match = co_await passwords::check(password, *salted_hash);
+        const auto& salted_hash = role->salted_hash;
+        const bool password_match = co_await passwords::check(password, salted_hash);
        if (!password_match) {
            throw exceptions::authentication_exception("Username and/or password are incorrect");
        }
@@ -367,16 +227,7 @@ future<> password_authenticator::create(std::string_view role_name, const authen
    }

    const auto query = update_row_query();
-    if (legacy_mode(_qp)) {
-        co_await _qp.execute_internal(
-                query,
-                consistency_for_user(role_name),
-                internal_distributed_query_state(),
-                {std::move(*maybe_hash), sstring(role_name)},
-                cql3::query_processor::cache_internal::no).discard_result();
-    } else {
-        co_await collect_mutations(_qp, mc, query, {std::move(*maybe_hash), sstring(role_name)});
-    }
+    co_await collect_mutations(_qp, mc, query, {std::move(*maybe_hash), sstring(role_name)});
 }

 future<> password_authenticator::alter(std::string_view role_name, const authentication_options& options, ::service::group0_batch& mc) {
@@ -387,38 +238,21 @@ future<> password_authenticator::alter(std::string_view role_name, const authent
    const auto password = std::get<password_option>(*options.credentials).password;

    const sstring query = seastar::format("UPDATE {}.{} SET {} = ? WHERE {} = ?",
-            get_auth_ks_name(_qp),
+            db::system_keyspace::NAME,
            meta::roles_table::name,
            SALTED_HASH,
            meta::roles_table::role_col_name);
-    if (legacy_mode(_qp)) {
-        co_await _qp.execute_internal(
-                query,
-                consistency_for_user(role_name),
-                internal_distributed_query_state(),
-                {passwords::hash(password, rng_for_salt, _scheme), sstring(role_name)},
-                cql3::query_processor::cache_internal::no).discard_result();
-    } else {
-        co_await collect_mutations(_qp, mc, query,
-                {passwords::hash(password, rng_for_salt, _scheme), sstring(role_name)});
-    }
+    co_await collect_mutations(_qp, mc, query,
+            {passwords::hash(password, rng_for_salt, _scheme), sstring(role_name)});
 }

 future<> password_authenticator::drop(std::string_view name, ::service::group0_batch& mc) {
    const sstring query = seastar::format("DELETE {} FROM {}.{} WHERE {} = ?",
            SALTED_HASH,
-            get_auth_ks_name(_qp),
+            db::system_keyspace::NAME,
            meta::roles_table::name,
            meta::roles_table::role_col_name);
-    if (legacy_mode(_qp)) {
-        co_await _qp.execute_internal(
-                query, consistency_for_user(name),
-                internal_distributed_query_state(),
-                {sstring(name)},
-                cql3::query_processor::cache_internal::no).discard_result();
-    } else {
-        co_await collect_mutations(_qp, mc, query, {sstring(name)});
-    }
+    co_await collect_mutations(_qp, mc, query, {sstring(name)});
 }

 future<custom_options> password_authenticator::query_custom_options(std::string_view role_name) const {
@@ -437,13 +271,13 @@ future<std::optional<sstring>> password_authenticator::get_password_hash(std::st
    // that a map lookup string->statement is not gonna kill us much.
    const sstring query = seastar::format("SELECT {} FROM {}.{} WHERE {} = ?",
                SALTED_HASH,
-                get_auth_ks_name(_qp),
+                db::system_keyspace::NAME,
                meta::roles_table::name,
                meta::roles_table::role_col_name);

    const auto res = co_await _qp.execute_internal(
            query,
-            consistency_for_user(role_name),
+            db::consistency_level::LOCAL_ONE,
            internal_distributed_query_state(),
            {role_name},
            cql3::query_processor::cache_internal::yes);
--- a/auth/password_authenticator.hh
+++ b/auth/password_authenticator.hh
@@ -13,7 +13,6 @@
 #include <seastar/core/abort_source.hh>
 #include <seastar/core/shared_future.hh>

-#include "db/consistency_level_type.hh"
 #include "auth/authenticator.hh"
 #include "auth/passwords.hh"
 #include "auth/cache.hh"
@@ -44,15 +43,11 @@ class password_authenticator : public authenticator {
    cache& _cache;
    future<> _stopped;
    abort_source _as;
-    std::string _superuser; // default superuser name from the config (may or may not be present in roles table)
    shared_promise<> _superuser_created_promise;
    // We used to also support bcrypt, SHA-256, and MD5 (ref. scylladb#24524).
    constexpr static auth::passwords::scheme _scheme = passwords::scheme::sha_512;

 public:
-    static db::consistency_level consistency_for_user(std::string_view role_name);
-    static std::string default_superuser(const db::config&);
-
    password_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);

    ~password_authenticator();
@@ -90,12 +85,6 @@ public:
    virtual future<> ensure_superuser_is_created() const override;

 private:
-    bool legacy_metadata_exists() const;
-
-    future<> migrate_legacy_metadata() const;
-
-    future<> legacy_create_default_if_missing();
-
    future<> maybe_create_default_password();
    future<> maybe_create_default_password_with_retries();

--- a/auth/role_manager.hh
+++ b/auth/role_manager.hh
@@ -112,6 +112,11 @@ public:

    virtual future<> stop() = 0;

+    ///
+    /// Notify that the maintenance mode is starting.
+    ///
+    virtual void set_maintenance_mode() {}
+
    ///
    /// Ensure that superuser role exists.
    ///
@@ -119,6 +124,11 @@ public:
    ///
    virtual future<> ensure_superuser_is_created() = 0;

+    ///
+    /// Ensure role management operations are enabled. Some role managers may defer initialization.
+    ///
+    virtual future<> ensure_role_operations_are_enabled() { return make_ready_future<>(); }
+
    ///
    /// \returns an exceptional future with \ref role_already_exists for a role that has previously been created.
    ///
--- a/auth/roles-metadata.cc
+++ b/auth/roles-metadata.cc
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2018-present ScyllaDB
- */
-
-/*
- * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
- */
-
-#include "auth/roles-metadata.hh"
-
-#include <seastar/core/format.hh>
-#include <seastar/core/shared_ptr.hh>
-#include <seastar/core/sstring.hh>
-
-#include "auth/common.hh"
-#include "cql3/query_processor.hh"
-#include "cql3/untyped_result_set.hh"
-
-namespace auth {
-
-namespace legacy {
-
-future<bool> default_role_row_satisfies(
-        cql3::query_processor& qp,
-        std::function<bool(const cql3::untyped_result_set_row&)> p,
-        std::optional<std::string> rolename) {
-    const sstring query = seastar::format("SELECT * FROM {}.{} WHERE {} = ?",
-            auth::meta::legacy::AUTH_KS,
-            meta::roles_table::name,
-            meta::roles_table::role_col_name);
-
-    for (auto cl : { db::consistency_level::ONE, db::consistency_level::QUORUM }) {
-        auto results = co_await qp.execute_internal(query, cl
-            , internal_distributed_query_state()
-            , {rolename.value_or(std::string(auth::meta::DEFAULT_SUPERUSER_NAME))}
-            , cql3::query_processor::cache_internal::yes
-            );
-        if (!results->empty()) {
-            co_return p(results->one());
-        }
-    }
-    co_return false;
-}
-
-future<bool> any_nondefault_role_row_satisfies(
-        cql3::query_processor& qp,
-        std::function<bool(const cql3::untyped_result_set_row&)> p,
-        std::optional<std::string> rolename) {
-    const sstring query = seastar::format("SELECT * FROM {}.{}", auth::meta::legacy::AUTH_KS, meta::roles_table::name);
-
-    auto results = co_await qp.execute_internal(query, db::consistency_level::QUORUM
-        , internal_distributed_query_state(), cql3::query_processor::cache_internal::no
-        );
-    if (results->empty()) {
-        co_return false;
-    }
-    static const sstring col_name = sstring(meta::roles_table::role_col_name);
-
-    co_return std::ranges::any_of(*results, [&](const cql3::untyped_result_set_row& row) {
-        auto superuser = rolename ? std::string_view(*rolename) : meta::DEFAULT_SUPERUSER_NAME;
-        const bool is_nondefault = row.get_as<sstring>(col_name) != superuser;
-        return is_nondefault && p(row);
-    });
-}
-
-} // namespace legacy
-
-} // namespace auth
--- a/auth/roles-metadata.hh
+++ b/auth/roles-metadata.hh
@@ -8,18 +8,7 @@

 #pragma once

-#include <optional>
 #include <string_view>
-#include <functional>
-
-#include <seastar/core/future.hh>
-
-#include "seastarx.hh"
-
-namespace cql3 {
-class query_processor;
-class untyped_result_set_row;
-}

 namespace auth {

@@ -35,26 +24,4 @@ constexpr std::string_view role_col_name{"role", 4};

 } // namespace meta

-namespace legacy {
-
-///
-/// Check that the default role satisfies a predicate, or `false` if the default role does not exist.
-///
-future<bool> default_role_row_satisfies(
-        cql3::query_processor&,
-        std::function<bool(const cql3::untyped_result_set_row&)>,
-        std::optional<std::string> rolename = {}
-        );
-
-///
-/// Check that any nondefault role satisfies a predicate. `false` if no nondefault roles exist.
-///
-future<bool> any_nondefault_role_row_satisfies(
-        cql3::query_processor&,
-        std::function<bool(const cql3::untyped_result_set_row&)>,
-        std::optional<std::string> rolename = {}
-        );
-
-} // namespace legacy
-
 } // namespace auth
--- a/auth/saslauthd_authenticator.cc
+++ b/auth/saslauthd_authenticator.cc
@@ -22,21 +22,11 @@
 #include "db/config.hh"
 #include "utils/log.hh"
 #include "seastarx.hh"
-#include "utils/class_registrator.hh"

 namespace auth {

 static logging::logger mylog("saslauthd_authenticator");

-// To ensure correct initialization order, we unfortunately need to use a string literal.
-static const class_registrator<
-        authenticator,
-        saslauthd_authenticator,
-        cql3::query_processor&,
-        ::service::raft_group0_client&,
-        ::service::migration_manager&,
-        cache&> saslauthd_auth_reg("com.scylladb.auth.SaslauthdAuthenticator");
-
 saslauthd_authenticator::saslauthd_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, cache&)
    : _socket_path(qp.db().get_config().saslauthd_socket_path())
 {}
--- a/auth/service.cc
+++ b/auth/service.cc
@@ -16,6 +16,8 @@
 #include <algorithm>
 #include <chrono>

+#include <boost/algorithm/string.hpp>
+
 #include <seastar/core/future-util.hh>
 #include <seastar/core/shard_id.hh>
 #include <seastar/core/sharded.hh>
@@ -23,8 +25,18 @@

 #include "auth/allow_all_authenticator.hh"
 #include "auth/allow_all_authorizer.hh"
+#include "auth/certificate_authenticator.hh"
 #include "auth/common.hh"
+#include "auth/default_authorizer.hh"
+#include "auth/ldap_role_manager.hh"
+#include "auth/maintenance_socket_authenticator.hh"
+#include "auth/maintenance_socket_authorizer.hh"
+#include "auth/maintenance_socket_role_manager.hh"
+#include "auth/password_authenticator.hh"
 #include "auth/role_or_anonymous.hh"
+#include "auth/saslauthd_authenticator.hh"
+#include "auth/standard_role_manager.hh"
+#include "auth/transitional.hh"
 #include "cql3/functions/functions.hh"
 #include "cql3/query_processor.hh"
 #include "cql3/description.hh"
@@ -43,7 +55,6 @@
 #include "service/raft/raft_group0_client.hh"
 #include "mutation/timestamp.hh"
 #include "utils/assert.hh"
-#include "utils/class_registrator.hh"
 #include "locator/abstract_replication_strategy.hh"
 #include "data_dictionary/keyspace_metadata.hh"
 #include "service/storage_service.hh"
@@ -63,88 +74,6 @@ static const sstring superuser_col_name("super");

 static logging::logger log("auth_service");

-class auth_migration_listener final : public ::service::migration_listener {
-    service& _service;
-    cql3::query_processor& _qp;
-
-public:
-    explicit auth_migration_listener(service& s, cql3::query_processor& qp) : _service(s),  _qp(qp) {
-    }
-
-private:
-    void on_create_keyspace(const sstring& ks_name) override {}
-    void on_create_column_family(const sstring& ks_name, const sstring& cf_name) override {}
-    void on_create_user_type(const sstring& ks_name, const sstring& type_name) override {}
-    void on_create_function(const sstring& ks_name, const sstring& function_name) override {}
-    void on_create_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
-    void on_create_view(const sstring& ks_name, const sstring& view_name) override {}
-
-    void on_update_keyspace(const sstring& ks_name) override {}
-    void on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool) override {}
-    void on_update_user_type(const sstring& ks_name, const sstring& type_name) override {}
-    void on_update_function(const sstring& ks_name, const sstring& function_name) override {}
-    void on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
-    void on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed) override {}
-
-    void on_drop_keyspace(const sstring& ks_name) override {
-        if (!legacy_mode(_qp)) {
-            // in non legacy path revoke is part of schema change statement execution
-            return;
-        }
-        // Do it in the background.
-        (void)do_with(auth::make_data_resource(ks_name), ::service::group0_batch::unused(), [this] (auto& r, auto& mc) mutable {
-            return _service.revoke_all(r, mc);
-        }).handle_exception([] (std::exception_ptr e) {
-            log.error("Unexpected exception while revoking all permissions on dropped keyspace: {}", e);
-        });
-
-        (void)do_with(auth::make_functions_resource(ks_name), ::service::group0_batch::unused(), [this] (auto& r, auto& mc) mutable {
-            return _service.revoke_all(r, mc);
-        }).handle_exception([] (std::exception_ptr e) {
-            log.error("Unexpected exception while revoking all permissions on functions in dropped keyspace: {}", e);
-        });
-    }
-
-    void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) override {
-        if (!legacy_mode(_qp)) {
-            // in non legacy path revoke is part of schema change statement execution
-            return;
-        }
-        // Do it in the background.
-        (void)do_with(auth::make_data_resource(ks_name, cf_name), ::service::group0_batch::unused(), [this] (auto& r, auto& mc) mutable {
-            return _service.revoke_all(r, mc);
-        }).handle_exception([] (std::exception_ptr e) {
-            log.error("Unexpected exception while revoking all permissions on dropped table: {}", e);
-        });
-    }
-
-    void on_drop_user_type(const sstring& ks_name, const sstring& type_name) override {}
-    void on_drop_function(const sstring& ks_name, const sstring& function_name) override {
-        if (!legacy_mode(_qp)) {
-            // in non legacy path revoke is part of schema change statement execution
-            return;
-        }
-        // Do it in the background.
-        (void)do_with(auth::make_functions_resource(ks_name, function_name), ::service::group0_batch::unused(), [this] (auto& r, auto& mc) mutable {
-            return _service.revoke_all(r, mc);
-        }).handle_exception([] (std::exception_ptr e) {
-            log.error("Unexpected exception while revoking all permissions on dropped function: {}", e);
-        });
-    }
-    void on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {
-        if (!legacy_mode(_qp)) {
-            // in non legacy path revoke is part of schema change statement execution
-            return;
-        }
-        (void)do_with(auth::make_functions_resource(ks_name, aggregate_name), ::service::group0_batch::unused(), [this] (auto& r, auto& mc) mutable {
-            return _service.revoke_all(r, mc);
-        }).handle_exception([] (std::exception_ptr e) {
-            log.error("Unexpected exception while revoking all permissions on dropped aggregate: {}", e);
-        });
-    }
-    void on_drop_view(const sstring& ks_name, const sstring& view_name) override {}
-};
-
 static future<> validate_role_exists(const service& ser, std::string_view role_name) {
    return ser.underlying_role_manager().exists(role_name).then([role_name](bool exists) {
        if (!exists) {
@@ -157,7 +86,6 @@ service::service(
        cache& cache,
        cql3::query_processor& qp,
        ::service::raft_group0_client& g0,
-        ::service::migration_notifier& mn,
        std::unique_ptr<authorizer> z,
        std::unique_ptr<authenticator> a,
        std::unique_ptr<role_manager> r,
@@ -165,29 +93,26 @@ service::service(
            : _cache(cache)
            , _qp(qp)
            , _group0_client(g0)
-            , _mnotifier(mn)
            , _authorizer(std::move(z))
            , _authenticator(std::move(a))
            , _role_manager(std::move(r))
-            , _migration_listener(std::make_unique<auth_migration_listener>(*this, qp))
            , _used_by_maintenance_socket(used_by_maintenance_socket) {}

 service::service(
        cql3::query_processor& qp,
        ::service::raft_group0_client& g0,
-        ::service::migration_notifier& mn,
-        ::service::migration_manager& mm,
-        const service_config& sc,
+        authorizer_factory authorizer_factory,
+        authenticator_factory authenticator_factory,
+        role_manager_factory role_manager_factory,
        maintenance_socket_enabled used_by_maintenance_socket,
        cache& cache)
            : service(
                      cache,
                      qp,
                      g0,
-                      mn,
-                      create_object<authorizer>(sc.authorizer_java_name, qp, g0, mm),
-                      create_object<authenticator>(sc.authenticator_java_name, qp, g0, mm, cache),
-                      create_object<role_manager>(sc.role_manager_java_name, qp, g0, mm, cache),
+                      authorizer_factory(),
+                      authenticator_factory(),
+                      role_manager_factory(),
                      used_by_maintenance_socket) {
 }

@@ -220,9 +145,6 @@ future<> service::create_legacy_keyspace_if_missing(::service::migration_manager
 }

 future<> service::start(::service::migration_manager& mm, db::system_keyspace& sys_ks) {
-    auto auth_version = co_await sys_ks.get_auth_version();
-    // version is set in query processor to be easily available in various places we call auth::legacy_mode check.
-    _qp.auth_version = auth_version;
    if (this_shard_id() == 0) {
        co_await _cache.load_all();
    }
@@ -252,22 +174,12 @@ future<> service::start(::service::migration_manager& mm, db::system_keyspace& s
                &service::get_uncached_permissions,
                this, std::placeholders::_1, std::placeholders::_2));
    }
-    co_await once_among_shards([this] {
-        _mnotifier.register_listener(_migration_listener.get());
-        return make_ready_future<>();
-    });
 }

 future<> service::stop() {
    _as.request_abort();
-    // Only one of the shards has the listener registered, but let's try to
-    // unregister on each one just to make sure.
-    return _mnotifier.unregister_listener(_migration_listener.get()).then([this] {
-        _cache.set_permission_loader(nullptr);
-        return make_ready_future<>();
-    }).then([this] {
-        return when_all_succeed(_role_manager->stop(), _authorizer->stop(), _authenticator->stop()).discard_result();
-    });
+    _cache.set_permission_loader(nullptr);
+    return when_all_succeed(_role_manager->stop(), _authorizer->stop(), _authenticator->stop()).discard_result();
 }

 future<> service::ensure_superuser_is_created() {
@@ -301,12 +213,16 @@ service::get_uncached_permissions(const role_or_anonymous& maybe_role, const res
 }

 future<permission_set> service::get_permissions(const role_or_anonymous& maybe_role, const resource& r) const {
-    if (legacy_mode(_qp) || _used_by_maintenance_socket) {
+    if (_used_by_maintenance_socket) {
        return get_uncached_permissions(maybe_role, r);
    }
    return _cache.get_permissions(maybe_role, r);
 }

+void service::set_maintenance_mode() {
+    _role_manager->set_maintenance_mode();
+}
+
 future<bool> service::has_superuser(std::string_view role_name, const role_set& roles) const {
    for (const auto& role : roles) {
        if (co_await _role_manager->is_superuser(role)) {
@@ -342,6 +258,10 @@ static void validate_authentication_options_are_supported(
    }
 }

+future<> service::ensure_role_operations_are_enabled() {
+    return _role_manager->ensure_role_operations_are_enabled();
+}
+
 future<> service::create_role(std::string_view name,
        const role_config& config,
        const authentication_options& options,
@@ -359,11 +279,6 @@ future<> service::create_role(std::string_view name,
        ep = std::current_exception();
    }
    if (ep) {
-        // Rollback only in legacy mode as normally mutations won't be
-        // applied in case exception is raised
-        if (legacy_mode(_qp)) {
-            co_await underlying_role_manager().drop(name, mc);
-        }
        std::rethrow_exception(std::move(ep));
    }
 }
@@ -442,11 +357,11 @@ future<std::vector<cql3::description>> service::describe_roles(bool with_hashed_

    const bool authenticator_uses_password_hashes = _authenticator->uses_password_hashes();

-    auto produce_create_statement = [with_hashed_passwords] (const sstring& formatted_role_name,
+    const auto default_su = cql3::util::maybe_quote(default_superuser(_qp));
+
+    auto produce_create_statement = [&default_su, with_hashed_passwords] (const sstring& formatted_role_name,
            const std::optional<sstring>& maybe_hashed_password, bool can_login, bool is_superuser) {
-        // Even after applying formatting to a role, `formatted_role_name` can only equal `meta::DEFAULT_SUPER_NAME`
-        // if the original identifier was equal to it.
-        const sstring role_part = formatted_role_name == meta::DEFAULT_SUPERUSER_NAME
+        const sstring role_part = formatted_role_name == default_su
                ? seastar::format("IF NOT EXISTS {}", formatted_role_name)
                : formatted_role_name;

@@ -659,6 +574,10 @@ future<std::vector<cql3::description>> service::describe_auth(bool with_hashed_p
 // Free functions.
 //

+void set_maintenance_mode(service& ser) {
+    ser.set_maintenance_mode();
+}
+
 future<bool> has_superuser(const service& ser, const authenticated_user& u) {
    if (is_anonymous(u)) {
        return make_ready_future<bool>(false);
@@ -667,6 +586,10 @@ future<bool> has_superuser(const service& ser, const authenticated_user& u) {
    return ser.has_superuser(*u.name);
 }

+future<> ensure_role_operations_are_enabled(service& ser) {
+    return ser.underlying_role_manager().ensure_role_operations_are_enabled();
+}
+
 future<role_set> get_roles(const service& ser, const authenticated_user& u) {
    if (is_anonymous(u)) {
        return make_ready_future<role_set>();
@@ -849,83 +772,115 @@ future<> commit_mutations(service& ser, ::service::group0_batch&& mc) {
    return ser.commit_mutations(std::move(mc));
 }

-future<> migrate_to_auth_v2(db::system_keyspace& sys_ks, ::service::raft_group0_client& g0, start_operation_func_t start_operation_func, abort_source& as) {
-    // FIXME: if this function fails it may leave partial data in the new tables
-    // that should be cleared
-    auto gen = [&sys_ks] (api::timestamp_type ts) -> ::service::mutations_generator {
-        auto& qp = sys_ks.query_processor();
-        for (const auto& cf_name : std::vector<sstring>{
-                "roles", "role_members", "role_attributes", "role_permissions"}) {
-            schema_ptr schema;
-            try {
-                schema = qp.db().find_schema(meta::legacy::AUTH_KS, cf_name);
-            } catch (const data_dictionary::no_such_column_family&) {
-                continue; // some tables might not have been created if they were not used
-            }
+namespace {

-            std::vector<sstring> col_names;
-            for (const auto& col : schema->all_columns()) {
-                col_names.push_back(col.name_as_cql_string());
-            }
-            sstring val_binders_str = "?";
-            for (size_t i = 1; i < col_names.size(); ++i) {
-                val_binders_str += ", ?";
-            }
+std::string_view get_short_name(std::string_view name) {
+    auto pos = name.find_last_of('.');
+    if (pos == std::string_view::npos) {
+        return name;
+    }
+    return name.substr(pos + 1);
+}

-            std::vector<mutation> collected;
-            // use longer than usual timeout as we scan the whole table
-            // but not infinite or very long as we want to fail reasonably fast
-            const auto t = 5min;
-            const timeout_config tc{t, t, t, t, t, t, t};
-            ::service::client_state cs(::service::client_state::internal_tag{}, tc);
-            ::service::query_state qs(cs, empty_service_permit());
+} // anonymous namespace

-            co_await qp.query_internal(
-                seastar::format("SELECT * FROM {}.{}", meta::legacy::AUTH_KS, cf_name),
-                db::consistency_level::ALL,
-                {},
-                1000,
-                [&qp, &cf_name, &col_names, &val_binders_str, &schema, ts, &collected] (const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
-                    std::vector<data_value_or_unset> values;
-                    for (const auto& col : schema->all_columns()) {
-                        if (row.has(col.name_as_text())) {
-                            values.push_back(
-                                    col.type->deserialize(row.get_blob_unfragmented(col.name_as_text())));
-                        } else {
-                            values.push_back(unset_value{});
-                        }
-                    }
-                    auto muts = co_await qp.get_mutations_internal(
-                            seastar::format("INSERT INTO {}.{} ({}) VALUES ({})",
-                                    db::system_keyspace::NAME,
-                                    cf_name,
-                                    fmt::join(col_names, ", "),
-                                    val_binders_str),
-                            internal_distributed_query_state(),
-                            ts,
-                            std::move(values));
-                    if (muts.size() != 1) {
-                        on_internal_error(log,
-                                format("expecting single insert mutation, got {}", muts.size()));
-                    }
+authorizer_factory make_authorizer_factory(
+        std::string_view name,
+        sharded<cql3::query_processor>& qp) {
+    std::string_view short_name = get_short_name(name);

-                    collected.push_back(std::move(muts[0]));
-                    co_return stop_iteration::no;
-                },
-                std::move(qs));
+    if (boost::iequals(short_name, "AllowAllAuthorizer")) {
+        return [&qp] {
+            return std::make_unique<allow_all_authorizer>(qp.local());
+        };
+    } else if (boost::iequals(short_name, "CassandraAuthorizer")) {
+        return [&qp] {
+            return std::make_unique<default_authorizer>(qp.local());
+        };
+    } else if (boost::iequals(short_name, "TransitionalAuthorizer")) {
+        return [&qp] {
+            return std::make_unique<transitional_authorizer>(qp.local());
+        };
+    }
+    throw std::invalid_argument(fmt::format("Unknown authorizer: {}", name));
+}

-            for (auto& m : collected) {
-                co_yield std::move(m);
-            }
-        }
-        co_yield co_await sys_ks.make_auth_version_mutation(ts,
-                db::system_keyspace::auth_version_t::v2);
+authenticator_factory make_authenticator_factory(
+        std::string_view name,
+        sharded<cql3::query_processor>& qp,
+        ::service::raft_group0_client& g0,
+        sharded<::service::migration_manager>& mm,
+        sharded<cache>& auth_cache) {
+    std::string_view short_name = get_short_name(name);
+
+    if (boost::iequals(short_name, "AllowAllAuthenticator")) {
+        return [&qp, &g0, &mm, &auth_cache] {
+            return std::make_unique<allow_all_authenticator>(qp.local(), g0, mm.local(), auth_cache.local());
+        };
+    } else if (boost::iequals(short_name, "PasswordAuthenticator")) {
+        return [&qp, &g0, &mm, &auth_cache] {
+            return std::make_unique<password_authenticator>(qp.local(), g0, mm.local(), auth_cache.local());
+        };
+    } else if (boost::iequals(short_name, "CertificateAuthenticator")) {
+        return [&qp, &g0, &mm, &auth_cache] {
+            return std::make_unique<certificate_authenticator>(qp.local(), g0, mm.local(), auth_cache.local());
+        };
+    } else if (boost::iequals(short_name, "SaslauthdAuthenticator")) {
+        return [&qp, &g0, &mm, &auth_cache] {
+            return std::make_unique<saslauthd_authenticator>(qp.local(), g0, mm.local(), auth_cache.local());
+        };
+    } else if (boost::iequals(short_name, "TransitionalAuthenticator")) {
+        return [&qp, &g0, &mm, &auth_cache] {
+            return std::make_unique<transitional_authenticator>(qp.local(), g0, mm.local(), auth_cache.local());
+        };
+    }
+    throw std::invalid_argument(fmt::format("Unknown authenticator: {}", name));
+}
+
+role_manager_factory make_role_manager_factory(
+        std::string_view name,
+        sharded<cql3::query_processor>& qp,
+        ::service::raft_group0_client& g0,
+        sharded<::service::migration_manager>& mm,
+        sharded<cache>& auth_cache) {
+    std::string_view short_name = get_short_name(name);
+
+    if (boost::iequals(short_name, "CassandraRoleManager")) {
+        return [&qp, &g0, &mm, &auth_cache] {
+            return std::make_unique<standard_role_manager>(qp.local(), g0, mm.local(), auth_cache.local());
+        };
+    } else if (boost::iequals(short_name, "LDAPRoleManager")) {
+        return [&qp, &g0, &mm, &auth_cache] {
+            return std::make_unique<ldap_role_manager>(qp.local(), g0, mm.local(), auth_cache.local());
+        };
+    }
+    throw std::invalid_argument(fmt::format("Unknown role manager: {}", name));
+}
+
+authenticator_factory make_maintenance_socket_authenticator_factory(
+        sharded<cql3::query_processor>& qp,
+        ::service::raft_group0_client& g0,
+        sharded<::service::migration_manager>& mm,
+        sharded<cache>& auth_cache) {
+    return [&qp, &g0, &mm, &auth_cache] {
+        return std::make_unique<maintenance_socket_authenticator>(qp.local(), g0, mm.local(), auth_cache.local());
+    };
+}
+
+authorizer_factory make_maintenance_socket_authorizer_factory(sharded<cql3::query_processor>& qp) {
+    return [&qp] {
+        return std::make_unique<maintenance_socket_authorizer>(qp.local());
+    };
+}
+
+role_manager_factory make_maintenance_socket_role_manager_factory(
+        sharded<cql3::query_processor>& qp,
+        ::service::raft_group0_client& g0,
+        sharded<::service::migration_manager>& mm,
+        sharded<cache>& auth_cache) {
+    return [&qp, &g0, &mm, &auth_cache] {
+        return std::make_unique<maintenance_socket_role_manager>(qp.local(), g0, mm.local(), auth_cache.local());
    };
-    co_await announce_mutations_with_batching(g0,
-            start_operation_func,
-            std::move(gen),
-            as,
-            std::nullopt);
 }

 }
--- a/auth/service.hh
+++ b/auth/service.hh
@@ -12,6 +12,7 @@
 #include <memory>
 #include <optional>

+#include <seastar/core/coroutine.hh>
 #include <seastar/core/future.hh>
 #include <seastar/core/sstring.hh>
 #include <seastar/util/bool_class.hh>
@@ -36,19 +37,16 @@ class query_processor;

 namespace service {
 class migration_manager;
-class migration_notifier;
-class migration_listener;
 }

 namespace auth {

 class role_or_anonymous;

-struct service_config final {
-    sstring authorizer_java_name;
-    sstring authenticator_java_name;
-    sstring role_manager_java_name;
-};
+/// Factory function types for creating auth module instances on each shard.
+using authorizer_factory = std::function<std::unique_ptr<authorizer>()>;
+using authenticator_factory = std::function<std::unique_ptr<authenticator>()>;
+using role_manager_factory = std::function<std::unique_ptr<role_manager>()>;

 ///
 /// Due to poor (in this author's opinion) decisions of Apache Cassandra, certain choices of one role-manager,
@@ -80,17 +78,12 @@ class service final : public seastar::peering_sharded_service<service> {

    ::service::raft_group0_client& _group0_client;

-    ::service::migration_notifier& _mnotifier;
-
    authorizer::ptr_type _authorizer;

    authenticator::ptr_type _authenticator;

    role_manager::ptr_type _role_manager;

-    // Only one of these should be registered, so we end up with some unused instances. Not the end of the world.
-    std::unique_ptr<::service::migration_listener> _migration_listener;
-
    maintenance_socket_enabled _used_by_maintenance_socket;

    abort_source _as;
@@ -100,7 +93,6 @@ public:
            cache& cache,
            cql3::query_processor&,
            ::service::raft_group0_client&,
-            ::service::migration_notifier&,
            std::unique_ptr<authorizer>,
            std::unique_ptr<authenticator>,
            std::unique_ptr<role_manager>,
@@ -108,15 +100,15 @@ public:

    ///
    /// This constructor is intended to be used when the class is sharded via \ref seastar::sharded. In that case, the
-    /// arguments must be copyable, which is why we delay construction with instance-construction instructions instead
+    /// arguments must be copyable, which is why we delay construction with instance-construction factories instead
    /// of the instances themselves.
    ///
    service(
            cql3::query_processor&,
            ::service::raft_group0_client&,
-            ::service::migration_notifier&,
-            ::service::migration_manager&,
-            const service_config&,
+            authorizer_factory,
+            authenticator_factory,
+            role_manager_factory,
            maintenance_socket_enabled,
            cache&);

@@ -138,6 +130,11 @@ public:
    ///
    future<permission_set> get_uncached_permissions(const role_or_anonymous&, const resource&) const;

+    ///
+    /// Notify the service that the node is entering maintenance mode.
+    ///
+    void set_maintenance_mode();
+
    ///
    /// Query whether the named role has been granted a role that is a superuser.
    ///
@@ -147,6 +144,11 @@ public:
    ///
    future<bool> has_superuser(std::string_view role_name) const;

+    ///
+    /// Ensure that the role operations are enabled. Some role managers defer initialization.
+    ///
+    future<> ensure_role_operations_are_enabled();
+    
    ///
    /// Create a role with optional authentication information.
    ///
@@ -192,12 +194,9 @@ public:
        return *_role_manager;
    }

-    cql3::query_processor& query_processor() const noexcept {
-        return _qp;
-    }
-
    future<> commit_mutations(::service::group0_batch&& mc) {
-        return std::move(mc).commit(_group0_client, _as, ::service::raft_timeout{});
+        co_await std::move(mc).commit(_group0_client, _as, ::service::raft_timeout{});
+        co_await _group0_client.send_group0_read_barrier_to_live_members();
    }

 private:
@@ -208,8 +207,12 @@ private:
    future<std::vector<cql3::description>> describe_permissions() const;
 };

+void set_maintenance_mode(service&);
+
 future<bool> has_superuser(const service&, const authenticated_user&);

+future<> ensure_role_operations_are_enabled(service&);
+
 future<role_set> get_roles(const service&, const authenticated_user&);

 future<permission_set> get_permissions(const service&, const authenticated_user&, const resource&);
@@ -393,7 +396,55 @@ future<std::vector<permission_details>> list_filtered_permissions(
 // Finalizes write operations performed in auth by committing mutations via raft group0.
 future<> commit_mutations(service& ser, ::service::group0_batch&& mc);

-// Migrates data from old keyspace to new one which supports linearizable writes via raft.
-future<> migrate_to_auth_v2(db::system_keyspace& sys_ks, ::service::raft_group0_client& g0, start_operation_func_t start_operation_func, abort_source& as);
+///
+/// Factory helper functions for creating auth module instances.
+/// These are intended for use with sharded<service>::start() where copyable arguments are required.
+/// The returned factories capture the sharded references and call .local() when invoked on each shard.
+///
+
+/// Creates an authorizer factory for config-selectable authorizer types.
+/// @param name The authorizer class name (e.g., "CassandraAuthorizer", "AllowAllAuthorizer")
+authorizer_factory make_authorizer_factory(
+        std::string_view name,
+        sharded<cql3::query_processor>& qp);
+
+/// Creates an authenticator factory for config-selectable authenticator types.
+/// @param name The authenticator class name (e.g., "PasswordAuthenticator", "AllowAllAuthenticator")
+authenticator_factory make_authenticator_factory(
+        std::string_view name,
+        sharded<cql3::query_processor>& qp,
+        ::service::raft_group0_client& g0,
+        sharded<::service::migration_manager>& mm,
+        sharded<cache>& cache);
+
+/// Creates a role_manager factory for config-selectable role manager types.
+/// @param name The role manager class name (e.g., "CassandraRoleManager")
+role_manager_factory make_role_manager_factory(
+        std::string_view name,
+        sharded<cql3::query_processor>& qp,
+        ::service::raft_group0_client& g0,
+        sharded<::service::migration_manager>& mm,
+        sharded<cache>& cache);
+
+/// Creates a factory for the maintenance socket authenticator.
+/// This authenticator is not config-selectable and is only used for the maintenance socket.
+authenticator_factory make_maintenance_socket_authenticator_factory(
+        sharded<cql3::query_processor>& qp,
+        ::service::raft_group0_client& g0,
+        sharded<::service::migration_manager>& mm,
+        sharded<cache>& cache);
+
+/// Creates a factory for the maintenance socket authorizer.
+/// This authorizer is not config-selectable and is only used for the maintenance socket.
+/// It grants all permissions unconditionally while delegating grant/revoke to the default authorizer.
+authorizer_factory make_maintenance_socket_authorizer_factory(sharded<cql3::query_processor>& qp);
+
+/// Creates a factory for the maintenance socket role manager.
+/// This role manager is not config-selectable and is only used for the maintenance socket.
+role_manager_factory make_maintenance_socket_role_manager_factory(
+        sharded<cql3::query_processor>& qp,
+        ::service::raft_group0_client& g0,
+        sharded<::service::migration_manager>& mm,
+        sharded<cache>& cache);

 }
--- a/auth/standard_role_manager.cc
+++ b/auth/standard_role_manager.cc
@@ -28,15 +28,14 @@
 #include "cql3/untyped_result_set.hh"
 #include "cql3/util.hh"
 #include "db/consistency_level_type.hh"
+#include "db/system_keyspace.hh"
 #include "exceptions/exceptions.hh"
 #include "utils/error_injection.hh"
 #include "utils/log.hh"
 #include <seastar/core/loop.hh>
 #include <seastar/coroutine/maybe_yield.hh>
 #include "service/raft/raft_group0_client.hh"
-#include "utils/class_registrator.hh"
 #include "service/migration_manager.hh"
-#include "password_authenticator.hh"
 #include "utils/managed_string.hh"

 namespace auth {
@@ -44,59 +43,13 @@ namespace auth {

 static logging::logger log("standard_role_manager");

-static const class_registrator<
-        role_manager,
-        standard_role_manager,
-        cql3::query_processor&,
-        ::service::raft_group0_client&,
-        ::service::migration_manager&,
-        cache&> registration("org.apache.cassandra.auth.CassandraRoleManager");
-
-static db::consistency_level consistency_for_role(std::string_view role_name) noexcept {
-    if (role_name == meta::DEFAULT_SUPERUSER_NAME) {
-        return db::consistency_level::QUORUM;
-    }
-
-    return db::consistency_level::LOCAL_ONE;
-}
-
-future<std::optional<standard_role_manager::record>> standard_role_manager::legacy_find_record(std::string_view role_name) {
-    const sstring query = seastar::format("SELECT * FROM {}.{} WHERE {} = ?",
-            get_auth_ks_name(_qp),
-            meta::roles_table::name,
-            meta::roles_table::role_col_name);
-
-    const auto results = co_await _qp.execute_internal(
-            query,
-            consistency_for_role(role_name),
-            internal_distributed_query_state(),
-            {sstring(role_name)},
-            cql3::query_processor::cache_internal::yes);
-    if (results->empty()) {
-        co_return std::optional<record>();
-    }
-
-    const cql3::untyped_result_set_row& row = results->one();
-    co_return std::make_optional(record{
-            row.get_as<sstring>(sstring(meta::roles_table::role_col_name)),
-            row.get_or<bool>("is_superuser", false),
-            row.get_or<bool>("can_login", false),
-            (row.has("member_of")
-                        ? row.get_set<sstring>("member_of")
-                        : role_set())});
-}
-
 future<std::optional<standard_role_manager::record>> standard_role_manager::find_record(std::string_view role_name) {
-    if (legacy_mode(_qp)) {
-        return legacy_find_record(role_name);
-    }
-    auto name = sstring(role_name);
-    auto role = _cache.get(name);
+    auto role = _cache.get(role_name);
    if (!role) {
        return make_ready_future<std::optional<record>>(std::nullopt);
    }
    return make_ready_future<std::optional<record>>(std::make_optional(record{
-        .name = std::move(name),
+        .name = sstring(role_name),
        .is_superuser = role->is_superuser,
        .can_login = role->can_login,
        .member_of = role->member_of
@@ -123,7 +76,6 @@ standard_role_manager::standard_role_manager(cql3::query_processor& qp, ::servic
    , _migration_manager(mm)
    , _cache(cache)
    , _stopped(make_ready_future<>())
-    , _superuser(password_authenticator::default_superuser(qp.db().get_config()))
 {}

 std::string_view standard_role_manager::qualified_java_name() const noexcept {
@@ -138,79 +90,12 @@ const resource_set& standard_role_manager::protected_resources() const {
    return resources;
 }

-future<> standard_role_manager::create_legacy_metadata_tables_if_missing() const {
-    static const sstring create_roles_query = fmt::format(
-            "CREATE TABLE {}.{} ("
-            "  {} text PRIMARY KEY,"
-            "  can_login boolean,"
-            "  is_superuser boolean,"
-            "  member_of set<text>,"
-            "  salted_hash text"
-            ")",
-            meta::legacy::AUTH_KS,
-            meta::roles_table::name,
-            meta::roles_table::role_col_name);
-    static const sstring create_role_members_query = fmt::format(
-            "CREATE TABLE {}.{} ("
-            "  role text,"
-            "  member text,"
-            "  PRIMARY KEY (role, member)"
-            ")",
-            meta::legacy::AUTH_KS,
-            ROLE_MEMBERS_CF);
-    static const sstring create_role_attributes_query = seastar::format(
-            "CREATE TABLE {}.{} ("
-            "  role text,"
-            "  name text,"
-            "  value text,"
-            "  PRIMARY KEY(role, name)"
-            ")",
-            meta::legacy::AUTH_KS,
-            ROLE_ATTRIBUTES_CF);
-    return when_all_succeed(
-            create_legacy_metadata_table_if_missing(
-                    meta::roles_table::name,
-                    _qp,
-                    create_roles_query,
-                    _migration_manager),
-            create_legacy_metadata_table_if_missing(
-                    ROLE_MEMBERS_CF,
-                    _qp,
-                    create_role_members_query,
-                    _migration_manager),
-            create_legacy_metadata_table_if_missing(
-                    ROLE_ATTRIBUTES_CF,
-                    _qp,
-                    create_role_attributes_query,
-                    _migration_manager)).discard_result();
-}
-
-future<> standard_role_manager::legacy_create_default_role_if_missing() {
-    try {
-        const auto exists = co_await legacy::default_role_row_satisfies(_qp, &has_can_login, _superuser);
-        if (exists) {
-            co_return;
-        }
-        const sstring query = seastar::format("INSERT INTO {}.{} ({}, is_superuser, can_login) VALUES (?, true, true)",
-                meta::legacy::AUTH_KS,
-                meta::roles_table::name,
-                meta::roles_table::role_col_name);
-        co_await _qp.execute_internal(
-                query,
-                db::consistency_level::QUORUM,
-                internal_distributed_query_state(),
-                {_superuser},
-                cql3::query_processor::cache_internal::no).discard_result();
-        log.info("Created default superuser role '{}'.", _superuser);
-    } catch (const exceptions::unavailable_exception& e) {
-        log.warn("Skipped default role setup: some nodes were not ready; will retry");
-        throw e;
-    }
-}
-
 future<> standard_role_manager::maybe_create_default_role() {
+    if (default_superuser(_qp).empty()) {
+        co_return;
+    }
    auto has_superuser = [this] () -> future<bool> {
-        const sstring query = seastar::format("SELECT * FROM {}.{} WHERE is_superuser = true ALLOW FILTERING", get_auth_ks_name(_qp), meta::roles_table::name);
+        const sstring query = seastar::format("SELECT * FROM {}.{} WHERE is_superuser = true ALLOW FILTERING", db::system_keyspace::NAME, meta::roles_table::name);
        auto results = co_await _qp.execute_internal(query, db::consistency_level::LOCAL_ONE,
                internal_distributed_query_state(), cql3::query_processor::cache_internal::yes);
        for (const auto& result : *results) {
@@ -234,12 +119,12 @@ future<> standard_role_manager::maybe_create_default_role() {
    // There is no superuser which has can_login field - create default role.
    // Note that we don't check if can_login is set to true.
    const sstring insert_query = seastar::format("INSERT INTO {}.{} ({}, is_superuser, can_login) VALUES (?, true, true)",
-            get_auth_ks_name(_qp),
+            db::system_keyspace::NAME,
            meta::roles_table::name,
            meta::roles_table::role_col_name);
-    co_await collect_mutations(_qp, batch, insert_query, {_superuser});
+    co_await collect_mutations(_qp, batch, insert_query, {default_superuser(_qp)});
    co_await std::move(batch).commit(_group0_client, _as, get_raft_timeout());
-    log.info("Created default superuser role '{}'.", _superuser);
+    log.info("Created default superuser role '{}'.", default_superuser(_qp));
 }

 future<> standard_role_manager::maybe_create_default_role_with_retries() {
@@ -262,78 +147,12 @@ future<> standard_role_manager::maybe_create_default_role_with_retries() {
    }
 }

-static const sstring legacy_table_name{"users"};
-
-bool standard_role_manager::legacy_metadata_exists() {
-    return _qp.db().has_schema(meta::legacy::AUTH_KS, legacy_table_name);
-}
-
-future<> standard_role_manager::migrate_legacy_metadata() {
-    log.info("Starting migration of legacy user metadata.");
-    static const sstring query = seastar::format("SELECT * FROM {}.{}", meta::legacy::AUTH_KS, legacy_table_name);
-
-    return _qp.execute_internal(
-            query,
-            db::consistency_level::QUORUM,
-            internal_distributed_query_state(),
-            cql3::query_processor::cache_internal::no).then([this](::shared_ptr<cql3::untyped_result_set> results) {
-        return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
-            role_config config;
-            config.is_superuser = row.get_or<bool>("super", false);
-            config.can_login = true;
-
-            return do_with(
-                    row.get_as<sstring>("name"),
-                    std::move(config),
-                    ::service::group0_batch::unused(),
-                    [this](const auto& name, const auto& config, auto& mc) {
-                return create_or_replace(meta::legacy::AUTH_KS, name, config, mc);
-            });
-        }).finally([results] {});
-    }).then([] {
-        log.info("Finished migrating legacy user metadata.");
-    }).handle_exception([](std::exception_ptr ep) {
-        log.error("Encountered an error during migration!");
-        std::rethrow_exception(ep);
-    });
-}
-
 future<> standard_role_manager::start() {
    return once_among_shards([this] () -> future<> {
-        if (legacy_mode(_qp)) {
-            co_await create_legacy_metadata_tables_if_missing();
-        }
-
        auto handler = [this] () -> future<> {
-            const bool legacy = legacy_mode(_qp);
-            if (legacy) {
-                if (!_superuser_created_promise.available()) {
-                    // Counterintuitively, we mark promise as ready before any startup work
-                    // because wait_for_schema_agreement() below will block indefinitely
-                    // without cluster majority. In that case, blocking node startup
-                    // would lead to a cluster deadlock.
-                    _superuser_created_promise.set_value();
-                }
-                co_await _migration_manager.wait_for_schema_agreement(_qp.db().real_database(), db::timeout_clock::time_point::max(), &_as);
-
-                if (co_await legacy::any_nondefault_role_row_satisfies(_qp, &has_can_login)) {
-                    if (legacy_metadata_exists()) {
-                        log.warn("Ignoring legacy user metadata since nondefault roles already exist.");
-                    }
-                    co_return;
-                }
-
-                if (legacy_metadata_exists()) {
-                    co_await migrate_legacy_metadata();
-                    co_return;
-                }
-                co_await legacy_create_default_role_if_missing();
-            }
-            if (!legacy) {
-                co_await maybe_create_default_role_with_retries();
-                if (!_superuser_created_promise.available()) {
-                    _superuser_created_promise.set_value();
-                }
+            co_await maybe_create_default_role_with_retries();
+            if (!_superuser_created_promise.available()) {
+                _superuser_created_promise.set_value();
            }
        };

@@ -352,21 +171,12 @@ future<> standard_role_manager::ensure_superuser_is_created() {
    return _superuser_created_promise.get_shared_future();
 }

-future<> standard_role_manager::create_or_replace(std::string_view auth_ks_name, std::string_view role_name, const role_config& c, ::service::group0_batch& mc) {
+future<> standard_role_manager::create_or_replace(std::string_view role_name, const role_config& c, ::service::group0_batch& mc) {
    const sstring query = seastar::format("INSERT INTO {}.{} ({}, is_superuser, can_login) VALUES (?, ?, ?)",
-            auth_ks_name,
+            db::system_keyspace::NAME,
            meta::roles_table::name,
            meta::roles_table::role_col_name);
-    if (auth_ks_name == meta::legacy::AUTH_KS) {
-        co_await _qp.execute_internal(
-                query,
-                consistency_for_role(role_name),
-                internal_distributed_query_state(),
-                {sstring(role_name), c.is_superuser, c.can_login},
-                cql3::query_processor::cache_internal::yes).discard_result();
-    } else {
-        co_await collect_mutations(_qp, mc,  query, {sstring(role_name), c.is_superuser, c.can_login});
-    }
+    co_await collect_mutations(_qp, mc,  query, {sstring(role_name), c.is_superuser, c.can_login});
 }

 future<>
@@ -376,7 +186,7 @@ standard_role_manager::create(std::string_view role_name, const role_config& c,
            throw role_already_exists(role_name);
        }

-        return create_or_replace(get_auth_ks_name(_qp), role_name, c, mc);
+        return create_or_replace(role_name, c, mc);
    });
 }

@@ -401,20 +211,11 @@ standard_role_manager::alter(std::string_view role_name, const role_config_updat
            return make_ready_future<>();
        }
        const sstring query = seastar::format("UPDATE {}.{} SET {} WHERE {} = ?",
-            get_auth_ks_name(_qp),
+            db::system_keyspace::NAME,
            meta::roles_table::name,
            build_column_assignments(u),
            meta::roles_table::role_col_name);
-        if (legacy_mode(_qp)) {
-            return _qp.execute_internal(
-                    std::move(query),
-                    consistency_for_role(role_name),
-                    internal_distributed_query_state(),
-                    {sstring(role_name)},
-                    cql3::query_processor::cache_internal::no).discard_result();
-        } else {
-            return collect_mutations(_qp, mc, std::move(query), {sstring(role_name)});
-        }
+        return collect_mutations(_qp, mc, std::move(query), {sstring(role_name)});
    });
 }

@@ -425,11 +226,11 @@ future<> standard_role_manager::drop(std::string_view role_name, ::service::grou
    // First, revoke this role from all roles that are members of it.
    const auto revoke_from_members = [this, role_name, &mc] () -> future<> {
        const sstring query = seastar::format("SELECT member FROM {}.{} WHERE role = ?",
-                get_auth_ks_name(_qp),
+                db::system_keyspace::NAME,
                ROLE_MEMBERS_CF);
        const auto members = co_await _qp.execute_internal(
                query,
-                consistency_for_role(role_name),
+                db::consistency_level::LOCAL_ONE,
                internal_distributed_query_state(),
                {sstring(role_name)},
                cql3::query_processor::cache_internal::no);
@@ -457,102 +258,33 @@ future<> standard_role_manager::drop(std::string_view role_name, ::service::grou
    // Delete all attributes for that role
    const auto remove_attributes_of = [this, role_name, &mc] () -> future<> {
        const sstring query = seastar::format("DELETE FROM {}.{} WHERE role = ?",
-                get_auth_ks_name(_qp),
+                db::system_keyspace::NAME,
                ROLE_ATTRIBUTES_CF);
-        if (legacy_mode(_qp)) {
-            co_await _qp.execute_internal(query, {sstring(role_name)},
-                cql3::query_processor::cache_internal::yes).discard_result();
-        } else {
-            co_await collect_mutations(_qp, mc, query, {sstring(role_name)});
-        }
+        co_await collect_mutations(_qp, mc, query, {sstring(role_name)});
    };
    // Finally, delete the role itself.
    const auto delete_role = [this, role_name, &mc] () -> future<> {
        const sstring query = seastar::format("DELETE FROM {}.{} WHERE {} = ?",
-                get_auth_ks_name(_qp),
+                db::system_keyspace::NAME,
                meta::roles_table::name,
                meta::roles_table::role_col_name);

-        if (legacy_mode(_qp)) {
-            co_await _qp.execute_internal(
-                    query,
-                    consistency_for_role(role_name),
-                    internal_distributed_query_state(),
-                    {sstring(role_name)},
-                    cql3::query_processor::cache_internal::no).discard_result();
-        } else {
-            co_await collect_mutations(_qp, mc, query, {sstring(role_name)});
-        }
+        co_await collect_mutations(_qp, mc, query, {sstring(role_name)});
    };

    co_await when_all_succeed(revoke_from_members, revoke_members_of, remove_attributes_of);
    co_await delete_role();
 }

-future<>
-standard_role_manager::legacy_modify_membership(
-        std::string_view grantee_name,
-        std::string_view role_name,
-        membership_change ch) {
-    const auto modify_roles = [this, role_name, grantee_name, ch] () -> future<> {
-        const auto query = seastar::format(
-                "UPDATE {}.{} SET member_of = member_of {} ? WHERE {} = ?",
-                get_auth_ks_name(_qp),
-                meta::roles_table::name,
-                (ch == membership_change::add ? '+' : '-'),
-                meta::roles_table::role_col_name);
-        co_await _qp.execute_internal(
-                query,
-                consistency_for_role(grantee_name),
-                internal_distributed_query_state(),
-                {role_set{sstring(role_name)}, sstring(grantee_name)},
-                cql3::query_processor::cache_internal::no).discard_result();
-    };
-
-    const auto modify_role_members = [this, role_name, grantee_name, ch] () -> future<> {
-        switch (ch) {
-            case membership_change::add: {
-                const sstring insert_query = seastar::format("INSERT INTO {}.{} (role, member) VALUES (?, ?)",
-                        get_auth_ks_name(_qp),
-                        ROLE_MEMBERS_CF);
-                co_return co_await _qp.execute_internal(
-                        insert_query,
-                        consistency_for_role(role_name),
-                        internal_distributed_query_state(),
-                        {sstring(role_name), sstring(grantee_name)},
-                        cql3::query_processor::cache_internal::no).discard_result();
-            }
-
-            case membership_change::remove: {
-                const sstring delete_query = seastar::format("DELETE FROM {}.{} WHERE role = ? AND member = ?",
-                        get_auth_ks_name(_qp),
-                        ROLE_MEMBERS_CF);
-                co_return co_await _qp.execute_internal(
-                        delete_query,
-                        consistency_for_role(role_name),
-                        internal_distributed_query_state(),
-                        {sstring(role_name), sstring(grantee_name)},
-                        cql3::query_processor::cache_internal::no).discard_result();
-            }
-        }
-    };
-
-    co_await when_all_succeed(modify_roles, modify_role_members).discard_result();
-}
-
 future<>
 standard_role_manager::modify_membership(
        std::string_view grantee_name,
        std::string_view role_name,
        membership_change ch,
        ::service::group0_batch& mc) {
-    if (legacy_mode(_qp)) {
-        co_return co_await legacy_modify_membership(grantee_name, role_name, ch);
-    }
-
    const auto modify_roles = seastar::format(
            "UPDATE {}.{} SET member_of = member_of {} ? WHERE {} = ?",
-            get_auth_ks_name(_qp),
+            db::system_keyspace::NAME,
            meta::roles_table::name,
            (ch == membership_change::add ? '+' : '-'),
            meta::roles_table::role_col_name);
@@ -563,12 +295,12 @@ standard_role_manager::modify_membership(
    switch (ch) {
    case membership_change::add:
        modify_role_members = seastar::format("INSERT INTO {}.{} (role, member) VALUES (?, ?)",
-                get_auth_ks_name(_qp),
+                db::system_keyspace::NAME,
                ROLE_MEMBERS_CF);
        break;
    case membership_change::remove:
        modify_role_members = seastar::format("DELETE FROM {}.{} WHERE role = ? AND member = ?",
-                get_auth_ks_name(_qp),
+                db::system_keyspace::NAME,
                ROLE_MEMBERS_CF);
        break;
    default:
@@ -660,57 +392,21 @@ future<role_set> standard_role_manager::query_granted(std::string_view grantee_n
 }

 future<role_to_directly_granted_map> standard_role_manager::query_all_directly_granted(::service::query_state& qs) {
-    const sstring query = seastar::format("SELECT * FROM {}.{}",
-            get_auth_ks_name(_qp),
-            ROLE_MEMBERS_CF);
-
-    const auto results = co_await _qp.execute_internal(
-            query,
-            db::consistency_level::ONE,
-            qs,
-            cql3::query_processor::cache_internal::yes);
-
    role_to_directly_granted_map roles_map;
-    std::transform(
-            results->begin(),
-            results->end(),
-            std::inserter(roles_map, roles_map.begin()),
-            [] (const cql3::untyped_result_set_row& row) {
-                return std::make_pair(row.get_as<sstring>("member"), row.get_as<sstring>("role")); }
-    );
-
+    _cache.for_each_role([&roles_map] (const cache::role_name_t& name, const cache::role_record& record) {
+        for (const auto& granted_role : record.member_of) {
+            roles_map.emplace(name, granted_role);
+        }
+    });
    co_return roles_map;
 }

 future<role_set> standard_role_manager::query_all(::service::query_state& qs) {
-    const sstring query = seastar::format("SELECT {} FROM {}.{}",
-            meta::roles_table::role_col_name,
-            get_auth_ks_name(_qp),
-            meta::roles_table::name);
-
-    // To avoid many copies of a view.
-    static const auto role_col_name_string = sstring(meta::roles_table::role_col_name);
-
-    if (utils::get_local_injector().enter("standard_role_manager_fail_legacy_query")) {
-        if (legacy_mode(_qp)) {
-            throw std::runtime_error("standard_role_manager::query_all: failed due to error injection");
-        }
-    }
-
-    const auto results = co_await _qp.execute_internal(
-            query,
-            db::consistency_level::QUORUM,
-            qs,
-            cql3::query_processor::cache_internal::yes);
-
    role_set roles;
-    std::transform(
-            results->begin(),
-            results->end(),
-            std::inserter(roles, roles.begin()),
-            [] (const cql3::untyped_result_set_row& row) {
-                return row.get_as<sstring>(role_col_name_string);}
-    );
+    roles.reserve(_cache.roles_count());
+    _cache.for_each_role([&roles] (const cache::role_name_t& name, const cache::role_record&) {
+        roles.insert(name);
+    });
    co_return roles;
 }

@@ -733,31 +429,26 @@ future<bool> standard_role_manager::can_login(std::string_view role_name) {
 }

 future<std::optional<sstring>> standard_role_manager::get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state& qs) {
-    const sstring query = seastar::format("SELECT name, value FROM {}.{} WHERE role = ? AND name = ?",
-            get_auth_ks_name(_qp),
-            ROLE_ATTRIBUTES_CF);
-    const auto result_set = co_await _qp.execute_internal(query, db::consistency_level::ONE, qs, {sstring(role_name), sstring(attribute_name)}, cql3::query_processor::cache_internal::yes);
-    if (!result_set->empty()) {
-        const cql3::untyped_result_set_row &row = result_set->one();
-        co_return std::optional<sstring>(row.get_as<sstring>("value"));
+    auto role = _cache.get(role_name);
+    if (!role) {
+        co_return std::nullopt;
    }
-    co_return std::optional<sstring>{};
+    auto it = role->attributes.find(attribute_name);
+    if (it != role->attributes.end()) {
+        co_return it->second;
+    }
+    co_return std::nullopt;
 }

-future<role_manager::attribute_vals> standard_role_manager::query_attribute_for_all (std::string_view attribute_name, ::service::query_state& qs) {
-    return query_all(qs).then([this, attribute_name, &qs] (role_set roles) {
-        return do_with(attribute_vals{}, [this, attribute_name, roles = std::move(roles), &qs] (attribute_vals &role_to_att_val) {
-            return parallel_for_each(roles.begin(), roles.end(), [this, &role_to_att_val, attribute_name, &qs] (sstring role) {
-                return get_attribute(role, attribute_name, qs).then([&role_to_att_val, role] (std::optional<sstring> att_val) {
-                    if (att_val) {
-                        role_to_att_val.emplace(std::move(role), std::move(*att_val));
-                    }
-                });
-            }).then([&role_to_att_val] () {
-                return make_ready_future<attribute_vals>(std::move(role_to_att_val));
-            });
-        });
+future<role_manager::attribute_vals> standard_role_manager::query_attribute_for_all(std::string_view attribute_name, ::service::query_state& qs) {
+    attribute_vals result;
+    _cache.for_each_role([&result, attribute_name] (const cache::role_name_t& name, const cache::role_record& record) {
+        auto it = record.attributes.find(attribute_name);
+        if (it != record.attributes.end()) {
+            result.emplace(name, it->second);
+        }
    });
+    co_return result;
 }

 future<> standard_role_manager::set_attribute(std::string_view role_name, std::string_view attribute_name, std::string_view attribute_value, ::service::group0_batch& mc) {
@@ -765,14 +456,10 @@ future<> standard_role_manager::set_attribute(std::string_view role_name, std::s
        throw auth::nonexistant_role(role_name);
    }
    const sstring query = seastar::format("INSERT INTO {}.{} (role, name, value)  VALUES (?, ?, ?)",
-            get_auth_ks_name(_qp),
+            db::system_keyspace::NAME,
            ROLE_ATTRIBUTES_CF);
-    if (legacy_mode(_qp)) {
-        co_await _qp.execute_internal(query, {sstring(role_name), sstring(attribute_name), sstring(attribute_value)}, cql3::query_processor::cache_internal::yes).discard_result();
-    } else {
-        co_await collect_mutations(_qp, mc, query,
-                {sstring(role_name), sstring(attribute_name), sstring(attribute_value)});
-    }
+    co_await collect_mutations(_qp, mc, query,
+            {sstring(role_name), sstring(attribute_name), sstring(attribute_value)});
 }

 future<> standard_role_manager::remove_attribute(std::string_view role_name, std::string_view attribute_name, ::service::group0_batch& mc) {
@@ -780,14 +467,10 @@ future<> standard_role_manager::remove_attribute(std::string_view role_name, std
        throw auth::nonexistant_role(role_name);
    }
    const sstring query = seastar::format("DELETE FROM {}.{} WHERE role = ? AND name = ?",
-            get_auth_ks_name(_qp),
+            db::system_keyspace::NAME,
            ROLE_ATTRIBUTES_CF);
-    if (legacy_mode(_qp)) {
-        co_await _qp.execute_internal(query, {sstring(role_name), sstring(attribute_name)}, cql3::query_processor::cache_internal::yes).discard_result();
-    } else {
-        co_await collect_mutations(_qp, mc, query,
-                {sstring(role_name), sstring(attribute_name)});
-    }
+    co_await collect_mutations(_qp, mc, query,
+            {sstring(role_name), sstring(attribute_name)});
 }

 future<std::vector<cql3::description>> standard_role_manager::describe_role_grants() {
--- a/auth/standard_role_manager.hh
+++ b/auth/standard_role_manager.hh
@@ -40,7 +40,6 @@ class standard_role_manager final : public role_manager {
    cache& _cache;
    future<> _stopped;
    abort_source _as;
-    std::string _superuser;
    shared_promise<> _superuser_created_promise;

 public:
@@ -97,24 +96,13 @@ private:
        role_set member_of;
    };

-    future<> create_legacy_metadata_tables_if_missing() const;
-
-    bool legacy_metadata_exists();
-
-    future<> migrate_legacy_metadata();
-
-    future<> legacy_create_default_role_if_missing();
-
    future<> maybe_create_default_role();
    future<> maybe_create_default_role_with_retries();

-    future<> create_or_replace(std::string_view auth_ks_name, std::string_view role_name, const role_config&, ::service::group0_batch&);
-
-    future<> legacy_modify_membership(std::string_view role_name, std::string_view grantee_name, membership_change);
+    future<> create_or_replace(std::string_view role_name, const role_config&, ::service::group0_batch&);

    future<> modify_membership(std::string_view role_name, std::string_view grantee_name, membership_change, ::service::group0_batch& mc);

-    future<std::optional<record>> legacy_find_record(std::string_view role_name);
    future<std::optional<record>> find_record(std::string_view role_name);
    future<record> require_record(std::string_view role_name);
    future<> collect_roles(
--- a/auth/transitional.cc
+++ b/auth/transitional.cc
@@ -8,244 +8,200 @@
 * SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
 */

+#include "auth/transitional.hh"
 #include "auth/authenticated_user.hh"
-#include "auth/authenticator.hh"
-#include "auth/authorizer.hh"
 #include "auth/default_authorizer.hh"
 #include "auth/password_authenticator.hh"
-#include "auth/cache.hh"
 #include "auth/permission.hh"
 #include "service/raft/raft_group0_client.hh"
-#include "utils/class_registrator.hh"

 namespace auth {

-static const sstring PACKAGE_NAME("com.scylladb.auth.");
-
-static const sstring& transitional_authenticator_name() {
-    static const sstring name = PACKAGE_NAME + "TransitionalAuthenticator";
-    return name;
+transitional_authenticator::transitional_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache)
+        : transitional_authenticator(std::make_unique<password_authenticator>(qp, g0, mm, cache)) {
 }

-static const sstring& transitional_authorizer_name() {
-    static const sstring name = PACKAGE_NAME + "TransitionalAuthorizer";
-    return name;
+transitional_authenticator::transitional_authenticator(std::unique_ptr<authenticator> a)
+        : _authenticator(std::move(a)) {
 }

-class transitional_authenticator : public authenticator {
-    std::unique_ptr<authenticator> _authenticator;
+future<> transitional_authenticator::start() {
+    return _authenticator->start();
+}

-public:
-    static const sstring PASSWORD_AUTHENTICATOR_NAME;
+future<> transitional_authenticator::stop() {
+    return _authenticator->stop();
+}

-    transitional_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache)
-            : transitional_authenticator(std::make_unique<password_authenticator>(qp, g0, mm, cache)) {
+std::string_view transitional_authenticator::qualified_java_name() const {
+    return "com.scylladb.auth.TransitionalAuthenticator";
+}
+
+bool transitional_authenticator::require_authentication() const {
+    return true;
+}
+
+authentication_option_set transitional_authenticator::supported_options() const {
+    return _authenticator->supported_options();
+}
+
+authentication_option_set transitional_authenticator::alterable_options() const {
+    return _authenticator->alterable_options();
+}
+
+future<authenticated_user> transitional_authenticator::authenticate(const credentials_map& credentials) const {
+    auto i = credentials.find(authenticator::USERNAME_KEY);
+    if ((i == credentials.end() || i->second.empty())
+            && (!credentials.contains(PASSWORD_KEY) || credentials.at(PASSWORD_KEY).empty())) {
+        // return anon user
+        return make_ready_future<authenticated_user>(anonymous_user());
    }
-    transitional_authenticator(std::unique_ptr<authenticator> a)
-            : _authenticator(std::move(a)) {
-    }
-
-    virtual future<> start() override {
-        return _authenticator->start();
-    }
-
-    virtual future<> stop() override {
-        return _authenticator->stop();
-    }
-
-    virtual std::string_view qualified_java_name() const override {
-        return transitional_authenticator_name();
-    }
-
-    virtual bool require_authentication() const override {
-        return true;
-    }
-
-    virtual authentication_option_set supported_options() const override {
-        return _authenticator->supported_options();
-    }
-
-    virtual authentication_option_set alterable_options() const override {
-        return _authenticator->alterable_options();
-    }
-
-    virtual future<authenticated_user> authenticate(const credentials_map& credentials) const override {
-        auto i = credentials.find(authenticator::USERNAME_KEY);
-        if ((i == credentials.end() || i->second.empty())
-                && (!credentials.contains(PASSWORD_KEY) || credentials.at(PASSWORD_KEY).empty())) {
+    return make_ready_future().then([this, &credentials] {
+        return _authenticator->authenticate(credentials);
+    }).handle_exception([](auto ep) {
+        try {
+            std::rethrow_exception(ep);
+        } catch (const exceptions::authentication_exception&) {
            // return anon user
            return make_ready_future<authenticated_user>(anonymous_user());
        }
-        return make_ready_future().then([this, &credentials] {
-            return _authenticator->authenticate(credentials);
-        }).handle_exception([](auto ep) {
-            try {
-                std::rethrow_exception(ep);
-            } catch (const exceptions::authentication_exception&) {
-                // return anon user
-                return make_ready_future<authenticated_user>(anonymous_user());
-            }
-        });
-    }
-
-    virtual future<> create(std::string_view role_name, const authentication_options& options, ::service::group0_batch& mc) override {
-        return _authenticator->create(role_name, options, mc);
-    }
-
-    virtual future<> alter(std::string_view role_name, const authentication_options& options, ::service::group0_batch& mc) override {
-        return _authenticator->alter(role_name, options, mc);
-    }
-
-    virtual future<> drop(std::string_view role_name, ::service::group0_batch& mc) override {
-        return _authenticator->drop(role_name, mc);
-    }
-
-    virtual future<custom_options> query_custom_options(std::string_view role_name) const override {
-        return _authenticator->query_custom_options(role_name);
-    }
-
-    virtual bool uses_password_hashes() const override {
-        return _authenticator->uses_password_hashes();
-    }
-
-    virtual future<std::optional<sstring>> get_password_hash(std::string_view role_name) const override {
-        return _authenticator->get_password_hash(role_name);
-    }
-
-    virtual const resource_set& protected_resources() const override {
-        return _authenticator->protected_resources();
-    }
-
-    virtual ::shared_ptr<sasl_challenge> new_sasl_challenge() const override {
-        class sasl_wrapper : public sasl_challenge {
-        public:
-            sasl_wrapper(::shared_ptr<sasl_challenge> sasl)
-                    : _sasl(std::move(sasl)) {
-            }
-
-            virtual bytes evaluate_response(bytes_view client_response) override {
-                try {
-                    return _sasl->evaluate_response(client_response);
-                } catch (const exceptions::authentication_exception&) {
-                    _complete = true;
-                    return {};
-                }
-            }
-
-            virtual bool is_complete() const override {
-                return _complete || _sasl->is_complete();
-            }
-
-            virtual future<authenticated_user> get_authenticated_user() const override {
-                return futurize_invoke([this] {
-                    return _sasl->get_authenticated_user().handle_exception([](auto ep) {
-                        try {
-                            std::rethrow_exception(ep);
-                        } catch (const exceptions::authentication_exception&) {
-                            // return anon user
-                            return make_ready_future<authenticated_user>(anonymous_user());
-                        }
-                    });
-                });
-	    }
-
-            const sstring& get_username() const override {
-                return _sasl->get_username();
-            }
-
-        private:
-            ::shared_ptr<sasl_challenge> _sasl;
-
-            bool _complete = false;
-        };
-        return ::make_shared<sasl_wrapper>(_authenticator->new_sasl_challenge());
-    }
-
-    virtual future<> ensure_superuser_is_created() const override {
-        return _authenticator->ensure_superuser_is_created();
-    }
-};
-
-class transitional_authorizer : public authorizer {
-    std::unique_ptr<authorizer> _authorizer;
-
-public:
-    transitional_authorizer(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm)
-            : transitional_authorizer(std::make_unique<default_authorizer>(qp, g0, mm)) {
-    }
-    transitional_authorizer(std::unique_ptr<authorizer> a)
-            : _authorizer(std::move(a)) {
-    }
-
-    ~transitional_authorizer() {
-    }
-
-    virtual future<> start() override {
-        return _authorizer->start();
-    }
-
-    virtual future<> stop() override {
-        return _authorizer->stop();
-    }
-
-    virtual std::string_view qualified_java_name() const override {
-        return transitional_authorizer_name();
-    }
-
-    virtual future<permission_set> authorize(const role_or_anonymous&, const resource&) const override {
-        static const permission_set transitional_permissions =
-                permission_set::of<
-                        permission::CREATE,
-                        permission::ALTER,
-                        permission::DROP,
-                        permission::SELECT,
-                        permission::MODIFY>();
-
-        return make_ready_future<permission_set>(transitional_permissions);
-    }
-
-    virtual future<> grant(std::string_view s, permission_set ps, const resource& r, ::service::group0_batch& mc)  override {
-        return _authorizer->grant(s, std::move(ps), r, mc);
-    }
-
-    virtual future<> revoke(std::string_view s, permission_set ps, const resource& r, ::service::group0_batch& mc) override {
-        return _authorizer->revoke(s, std::move(ps), r, mc);
-    }
-
-    virtual future<std::vector<permission_details>> list_all() const override {
-        return _authorizer->list_all();
-    }
-
-    virtual future<> revoke_all(std::string_view s, ::service::group0_batch& mc) override {
-        return _authorizer->revoke_all(s, mc);
-    }
-
-    virtual future<> revoke_all(const resource& r, ::service::group0_batch& mc) override {
-        return _authorizer->revoke_all(r, mc);
-    }
-
-    virtual const resource_set& protected_resources() const override {
-        return _authorizer->protected_resources();
-    }
-};
-
+    });
 }

-//
-// To ensure correct initialization order, we unfortunately need to use string literals.
-//
+future<> transitional_authenticator::create(std::string_view role_name, const authentication_options& options, ::service::group0_batch& mc) {
+    return _authenticator->create(role_name, options, mc);
+}

-static const class_registrator<
-        auth::authenticator,
-        auth::transitional_authenticator,
-        cql3::query_processor&,
-        ::service::raft_group0_client&,
-        ::service::migration_manager&,
-        auth::cache&> transitional_authenticator_reg(auth::PACKAGE_NAME + "TransitionalAuthenticator");
+future<> transitional_authenticator::alter(std::string_view role_name, const authentication_options& options, ::service::group0_batch& mc) {
+    return _authenticator->alter(role_name, options, mc);
+}

-static const class_registrator<
-        auth::authorizer,
-        auth::transitional_authorizer,
-        cql3::query_processor&,
-        ::service::raft_group0_client&,
-        ::service::migration_manager&> transitional_authorizer_reg(auth::PACKAGE_NAME + "TransitionalAuthorizer");
+future<> transitional_authenticator::drop(std::string_view role_name, ::service::group0_batch& mc) {
+    return _authenticator->drop(role_name, mc);
+}
+
+future<custom_options> transitional_authenticator::query_custom_options(std::string_view role_name) const {
+    return _authenticator->query_custom_options(role_name);
+}
+
+bool transitional_authenticator::uses_password_hashes() const {
+    return _authenticator->uses_password_hashes();
+}
+
+future<std::optional<sstring>> transitional_authenticator::get_password_hash(std::string_view role_name) const {
+    return _authenticator->get_password_hash(role_name);
+}
+
+const resource_set& transitional_authenticator::protected_resources() const {
+    return _authenticator->protected_resources();
+}
+
+::shared_ptr<sasl_challenge> transitional_authenticator::new_sasl_challenge() const {
+    class sasl_wrapper : public sasl_challenge {
+    public:
+        sasl_wrapper(::shared_ptr<sasl_challenge> sasl)
+                : _sasl(std::move(sasl)) {
+        }
+
+        virtual bytes evaluate_response(bytes_view client_response) override {
+            try {
+                return _sasl->evaluate_response(client_response);
+            } catch (const exceptions::authentication_exception&) {
+                _complete = true;
+                return {};
+            }
+        }
+
+        virtual bool is_complete() const override {
+            return _complete || _sasl->is_complete();
+        }
+
+        virtual future<authenticated_user> get_authenticated_user() const override {
+            return futurize_invoke([this] {
+                return _sasl->get_authenticated_user().handle_exception([](auto ep) {
+                    try {
+                        std::rethrow_exception(ep);
+                    } catch (const exceptions::authentication_exception&) {
+                        // return anon user
+                        return make_ready_future<authenticated_user>(anonymous_user());
+                    }
+                });
+            });
+        }
+
+        const sstring& get_username() const override {
+            return _sasl->get_username();
+        }
+
+    private:
+        ::shared_ptr<sasl_challenge> _sasl;
+
+        bool _complete = false;
+    };
+    return ::make_shared<sasl_wrapper>(_authenticator->new_sasl_challenge());
+}
+
+future<> transitional_authenticator::ensure_superuser_is_created() const {
+    return _authenticator->ensure_superuser_is_created();
+}
+
+transitional_authorizer::transitional_authorizer(cql3::query_processor& qp)
+        : transitional_authorizer(std::make_unique<default_authorizer>(qp)) {
+}
+
+transitional_authorizer::transitional_authorizer(std::unique_ptr<authorizer> a)
+        : _authorizer(std::move(a)) {
+}
+
+transitional_authorizer::~transitional_authorizer() {
+}
+
+future<> transitional_authorizer::start() {
+    return _authorizer->start();
+}
+
+future<> transitional_authorizer::stop() {
+    return _authorizer->stop();
+}
+
+std::string_view transitional_authorizer::qualified_java_name() const {
+    return "com.scylladb.auth.TransitionalAuthorizer";
+}
+
+future<permission_set> transitional_authorizer::authorize(const role_or_anonymous&, const resource&) const {
+    static const permission_set transitional_permissions =
+            permission_set::of<
+                    permission::CREATE,
+                    permission::ALTER,
+                    permission::DROP,
+                    permission::SELECT,
+                    permission::MODIFY>();
+
+    return make_ready_future<permission_set>(transitional_permissions);
+}
+
+future<> transitional_authorizer::grant(std::string_view s, permission_set ps, const resource& r, ::service::group0_batch& mc) {
+    return _authorizer->grant(s, std::move(ps), r, mc);
+}
+
+future<> transitional_authorizer::revoke(std::string_view s, permission_set ps, const resource& r, ::service::group0_batch& mc) {
+    return _authorizer->revoke(s, std::move(ps), r, mc);
+}
+
+future<std::vector<permission_details>> transitional_authorizer::list_all() const {
+    return _authorizer->list_all();
+}
+
+future<> transitional_authorizer::revoke_all(std::string_view s, ::service::group0_batch& mc) {
+    return _authorizer->revoke_all(s, mc);
+}
+
+future<> transitional_authorizer::revoke_all(const resource& r, ::service::group0_batch& mc) {
+    return _authorizer->revoke_all(r, mc);
+}
+
+const resource_set& transitional_authorizer::protected_resources() const {
+    return _authorizer->protected_resources();
+}
+
+}
--- a/auth/transitional.hh
+++ b/auth/transitional.hh
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2026-present ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
+ */
+
+#pragma once
+
+#include "auth/authenticator.hh"
+#include "auth/authorizer.hh"
+#include "auth/cache.hh"
+
+namespace cql3 {
+class query_processor;
+}
+
+namespace service {
+class raft_group0_client;
+class migration_manager;
+}
+
+namespace auth {
+
+///
+/// Transitional authenticator that allows anonymous access when credentials are not provided
+/// or authentication fails. Used for migration scenarios.
+///
+class transitional_authenticator : public authenticator {
+    std::unique_ptr<authenticator> _authenticator;
+
+public:
+    transitional_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache);
+    transitional_authenticator(std::unique_ptr<authenticator> a);
+
+    virtual future<> start() override;
+    virtual future<> stop() override;
+    virtual std::string_view qualified_java_name() const override;
+    virtual bool require_authentication() const override;
+    virtual authentication_option_set supported_options() const override;
+    virtual authentication_option_set alterable_options() const override;
+    virtual future<authenticated_user> authenticate(const credentials_map& credentials) const override;
+    virtual future<> create(std::string_view role_name, const authentication_options& options, ::service::group0_batch& mc) override;
+    virtual future<> alter(std::string_view role_name, const authentication_options& options, ::service::group0_batch& mc) override;
+    virtual future<> drop(std::string_view role_name, ::service::group0_batch& mc) override;
+    virtual future<custom_options> query_custom_options(std::string_view role_name) const override;
+    virtual bool uses_password_hashes() const override;
+    virtual future<std::optional<sstring>> get_password_hash(std::string_view role_name) const override;
+    virtual const resource_set& protected_resources() const override;
+    virtual ::shared_ptr<sasl_challenge> new_sasl_challenge() const override;
+    virtual future<> ensure_superuser_is_created() const override;
+};
+
+///
+/// Transitional authorizer that grants a fixed set of permissions to all users.
+/// Used for migration scenarios.
+///
+class transitional_authorizer : public authorizer {
+    std::unique_ptr<authorizer> _authorizer;
+
+public:
+    transitional_authorizer(cql3::query_processor& qp);
+    transitional_authorizer(std::unique_ptr<authorizer> a);
+    ~transitional_authorizer();
+
+    virtual future<> start() override;
+    virtual future<> stop() override;
+    virtual std::string_view qualified_java_name() const override;
+    virtual future<permission_set> authorize(const role_or_anonymous&, const resource&) const override;
+    virtual future<> grant(std::string_view s, permission_set ps, const resource& r, ::service::group0_batch& mc) override;
+    virtual future<> revoke(std::string_view s, permission_set ps, const resource& r, ::service::group0_batch& mc) override;
+    virtual future<std::vector<permission_details>> list_all() const override;
+    virtual future<> revoke_all(std::string_view s, ::service::group0_batch& mc) override;
+    virtual future<> revoke_all(const resource& r, ::service::group0_batch& mc) override;
+    virtual const resource_set& protected_resources() const override;
+};
+
+} // namespace auth
--- a/cdc/generation.cc
+++ b/cdc/generation.cc
@@ -10,24 +10,15 @@
 #include <random>
 #include <unordered_set>
 #include <algorithm>
-#include <seastar/core/sleep.hh>
 #include <seastar/core/coroutine.hh>
 #include <seastar/coroutine/maybe_yield.hh>
-#include <seastar/util/later.hh>

-#include "gms/endpoint_state.hh"
-#include "gms/versioned_value.hh"
 #include "keys/keys.hh"
 #include "replica/database.hh"
 #include "db/system_keyspace.hh"
-#include "db/system_distributed_keyspace.hh"
 #include "dht/token-sharding.hh"
 #include "locator/token_metadata.hh"
 #include "types/set.hh"
-#include "gms/application_state.hh"
-#include "gms/inet_address.hh"
-#include "gms/gossiper.hh"
-#include "gms/feature_service.hh"
 #include "utils/assert.hh"
 #include "utils/error_injection.hh"
 #include "utils/UUID_gen.hh"
@@ -41,16 +32,6 @@

 extern logging::logger cdc_log;

-static int get_shard_count(const locator::host_id& endpoint, const gms::gossiper& g) {
-    auto ep_state = g.get_application_state_ptr(endpoint, gms::application_state::SHARD_COUNT);
-    return ep_state ? std::stoi(ep_state->value()) : -1;
-}
-
-static unsigned get_sharding_ignore_msb(const locator::host_id& endpoint, const gms::gossiper& g) {
-    auto ep_state = g.get_application_state_ptr(endpoint, gms::application_state::IGNORE_MSB_BITS);
-    return ep_state ? std::stoi(ep_state->value()) : 0;
-}
-
 namespace db {
    extern thread_local data_type cdc_streams_set_type;
 }
@@ -225,12 +206,6 @@ static std::vector<stream_id> create_stream_ids(
    return result;
 }

-bool should_propose_first_generation(const locator::host_id& my_host_id, const gms::gossiper& g) {
-    return g.for_each_endpoint_state_until([&] (const gms::endpoint_state& eps) {
-        return stop_iteration(my_host_id < eps.get_host_id());
-    }) == stop_iteration::no;
-}
-
 bool is_cdc_generation_optimal(const cdc::topology_description& gen, const locator::token_metadata& tm) {
    if (tm.sorted_tokens().size() != gen.entries().size()) {
        // We probably have garbage streams from old generations
@@ -330,38 +305,6 @@ future<utils::chunked_vector<mutation>> get_cdc_generation_mutations_v3(
    co_return co_await get_common_cdc_generation_mutations(s, pkey, std::move(get_ckey), desc, mutation_size_threshold, ts);
 }

-// non-static for testing
-size_t limit_of_streams_in_topology_description() {
-    // Each stream takes 16B and we don't want to exceed 4MB so we can have
-    // at most 262144 streams but not less than 1 per vnode.
-    return 4 * 1024 * 1024 / 16;
-}
-
-// non-static for testing
-topology_description limit_number_of_streams_if_needed(topology_description&& desc) {
-    uint64_t streams_count = 0;
-    for (auto& tr_desc : desc.entries()) {
-        streams_count += tr_desc.streams.size();
-    }
-
-    size_t limit = std::max(limit_of_streams_in_topology_description(), desc.entries().size());
-    if (limit >= streams_count) {
-        return std::move(desc);
-    }
-    size_t streams_per_vnode_limit = limit / desc.entries().size();
-    auto entries = std::move(desc).entries();
-    auto start = entries.back().token_range_end;
-    for (size_t idx = 0; idx < entries.size(); ++idx) {
-        auto end = entries[idx].token_range_end;
-        if (entries[idx].streams.size() > streams_per_vnode_limit) {
-            entries[idx].streams =
-                create_stream_ids(idx, start, end, streams_per_vnode_limit, entries[idx].sharding_ignore_msb);
-        }
-        start = end;
-    }
-    return topology_description(std::move(entries));
-}
-
 // Compute a set of tokens that split the token ring into vnodes.
 static auto get_tokens(const std::unordered_set<dht::token>& bootstrap_tokens, const locator::token_metadata_ptr tmptr) {
    auto tokens = tmptr->sorted_tokens();
@@ -419,364 +362,6 @@ db_clock::time_point new_generation_timestamp(bool add_delay, std::chrono::milli
    return ts;
 }

-future<cdc::generation_id> generation_service::legacy_make_new_generation(const std::unordered_set<dht::token>& bootstrap_tokens, bool add_delay) {
-    const locator::token_metadata_ptr tmptr = _token_metadata.get();
-
-    // Fetch sharding parameters for a node that owns vnode ending with this token
-    // using gossiped application states.
-    auto get_sharding_info = [&] (dht::token end) -> std::pair<size_t, uint8_t> {
-        if (bootstrap_tokens.contains(end)) {
-            return {smp::count, _cfg.ignore_msb_bits};
-        } else {
-            auto endpoint = tmptr->get_endpoint(end);
-            if (!endpoint) {
-                throw std::runtime_error(
-                        format("Can't find endpoint for token {}", end));
-            }
-            auto sc = get_shard_count(*endpoint, _gossiper);
-            return {sc > 0 ? sc : 1, get_sharding_ignore_msb(*endpoint, _gossiper)};
-        }
-    };
-
-    auto uuid = utils::make_random_uuid();
-    auto gen = make_new_generation_description(bootstrap_tokens, get_sharding_info, tmptr);
-
-    // Our caller should ensure that there are normal tokens in the token ring.
-    auto normal_token_owners = tmptr->count_normal_token_owners();
-    SCYLLA_ASSERT(normal_token_owners);
-
-    if (_feature_service.cdc_generations_v2) {
-        cdc_log.info("Inserting new generation data at UUID {}", uuid);
-        // This may take a while.
-        co_await _sys_dist_ks.local().insert_cdc_generation(uuid, gen, { normal_token_owners });
-
-        // Begin the race.
-        cdc::generation_id_v2 gen_id{new_generation_timestamp(add_delay, _cfg.ring_delay), uuid};
-
-        cdc_log.info("New CDC generation: {}", gen_id);
-        co_return gen_id;
-    }
-
-    // The CDC_GENERATIONS_V2 feature is not enabled: some nodes may still not understand the V2 format.
-    // We must create a generation in the old format.
-
-    // If the cluster is large we may end up with a generation that contains
-    // large number of streams. This is problematic because we store the
-    // generation in a single row (V1 format). For a generation with large number of rows
-    // this will lead to a row that can be as big as 32MB. This is much more
-    // than the limit imposed by commitlog_segment_size_in_mb. If the size of
-    // the row that describes a new generation grows above
-    // commitlog_segment_size_in_mb, the write will fail and the new node won't
-    // be able to join. To avoid such problem we make sure that such row is
-    // always smaller than 4MB. We do that by removing some CDC streams from
-    // each vnode if the total number of streams is too large.
-    gen = limit_number_of_streams_if_needed(std::move(gen));
-
-    cdc_log.warn(
-        "Creating a new CDC generation in the old storage format due to a partially upgraded cluster:"
-        " the CDC_GENERATIONS_V2 feature is known by this node, but not enabled in the cluster."
-        " The old storage format forces us to create a suboptimal generation."
-        " It is recommended to finish the upgrade and then create a new generation either by bootstrapping"
-        " a new node or running the checkAndRepairCdcStreams nodetool command.");
-
-    // Begin the race.
-    cdc::generation_id_v1 gen_id{new_generation_timestamp(add_delay, _cfg.ring_delay)};
-
-    co_await _sys_dist_ks.local().insert_cdc_topology_description(gen_id, std::move(gen), { normal_token_owners });
-
-    cdc_log.info("New CDC generation: {}", gen_id);
-    co_return gen_id;
-}
-
-/* Retrieves CDC streams generation timestamp from the given endpoint's application state (broadcasted through gossip).
- * We might be during a rolling upgrade, so the timestamp might not be there (if the other node didn't upgrade yet),
- * but if the cluster already supports CDC, then every newly joining node will propose a new CDC generation,
- * which means it will gossip the generation's timestamp.
- */
-static std::optional<cdc::generation_id> get_generation_id_for(const locator::host_id& endpoint, const gms::endpoint_state& eps) {
-    const auto* gen_id_ptr = eps.get_application_state_ptr(gms::application_state::CDC_GENERATION_ID);
-    if (!gen_id_ptr) {
-        return std::nullopt;
-    }
-    auto gen_id_string = gen_id_ptr->value();
-    cdc_log.trace("endpoint={}, gen_id_string={}", endpoint, gen_id_string);
-    return gms::versioned_value::cdc_generation_id_from_string(gen_id_string);
-}
-
-static future<std::optional<cdc::topology_description>> retrieve_generation_data_v2(
-        cdc::generation_id_v2 id,
-        db::system_keyspace& sys_ks,
-        db::system_distributed_keyspace& sys_dist_ks) {
-    auto cdc_gen = co_await sys_dist_ks.read_cdc_generation(id.id);
-
-    if (!cdc_gen && id.id.is_timestamp()) {
-        // If we entered legacy mode due to recovery, we (or some other node)
-        // might gossip about a generation that was previously propagated
-        // through raft. If that's the case, it will sit in
-        // the system.cdc_generations_v3 table.
-        //
-        // If the provided id is not a timeuuid, we don't want to query
-        // the system.cdc_generations_v3 table. This table stores generation
-        // ids as timeuuids. If the provided id is not a timeuuid, the
-        // generation cannot be in system.cdc_generations_v3. Also, the query
-        // would fail with a marshaling error.
-        cdc_gen = co_await sys_ks.read_cdc_generation_opt(id.id);
-    }
-
-    co_return cdc_gen;
-}
-
-static future<std::optional<cdc::topology_description>> retrieve_generation_data(
-        cdc::generation_id gen_id,
-        db::system_keyspace& sys_ks,
-        db::system_distributed_keyspace& sys_dist_ks,
-        db::system_distributed_keyspace::context ctx) {
-    return std::visit(make_visitor(
-    [&] (const cdc::generation_id_v1& id) {
-        return sys_dist_ks.read_cdc_topology_description(id, ctx);
-    },
-    [&] (const cdc::generation_id_v2& id) {
-        return retrieve_generation_data_v2(id, sys_ks, sys_dist_ks);
-    }
-    ), gen_id);
-}
-
-static future<> do_update_streams_description(
-        cdc::generation_id gen_id,
-        db::system_keyspace& sys_ks,
-        db::system_distributed_keyspace& sys_dist_ks,
-        db::system_distributed_keyspace::context ctx) {
-    if (co_await sys_dist_ks.cdc_desc_exists(get_ts(gen_id), ctx)) {
-        cdc_log.info("Generation {}: streams description table already updated.", gen_id);
-        co_return;
-    }
-
-    // We might race with another node also inserting the description, but that's ok. It's an idempotent operation.
-
-    auto topo = co_await retrieve_generation_data(gen_id, sys_ks, sys_dist_ks, ctx);
-    if (!topo) {
-        throw no_generation_data_exception(gen_id);
-    }
-
-    co_await sys_dist_ks.create_cdc_desc(get_ts(gen_id), *topo, ctx);
-    cdc_log.info("CDC description table successfully updated with generation {}.", gen_id);
-}
-
-/* Inform CDC users about a generation of streams (identified by the given timestamp)
- * by inserting it into the cdc_streams table.
- *
- * Assumes that the cdc_generation_descriptions table contains this generation.
- *
- * Returning from this function does not mean that the table update was successful: the function
- * might run an asynchronous task in the background.
- */
-static future<> update_streams_description(
-        cdc::generation_id gen_id,
-        db::system_keyspace& sys_ks,
-        shared_ptr<db::system_distributed_keyspace> sys_dist_ks,
-        noncopyable_function<unsigned()> get_num_token_owners,
-        abort_source& abort_src) {
-    try {
-        co_await do_update_streams_description(gen_id, sys_ks, *sys_dist_ks, { get_num_token_owners() });
-    } catch (...) {
-        cdc_log.warn(
-            "Could not update CDC description table with generation {}: {}. Will retry in the background.",
-            gen_id, std::current_exception());
-
-        // It is safe to discard this future: we keep system distributed keyspace alive.
-        (void)(([] (cdc::generation_id gen_id,
-                    db::system_keyspace& sys_ks,
-                    shared_ptr<db::system_distributed_keyspace> sys_dist_ks,
-                    noncopyable_function<unsigned()> get_num_token_owners,
-                    abort_source& abort_src) -> future<> {
-            while (true) {
-                try {
-                    co_await sleep_abortable(std::chrono::seconds(60), abort_src);
-                } catch (seastar::sleep_aborted&) {
-                    cdc_log.warn( "Aborted update CDC description table with generation {}", gen_id);
-                    co_return;
-                }
-                try {
-                    co_await do_update_streams_description(gen_id, sys_ks, *sys_dist_ks, { get_num_token_owners() });
-                    co_return;
-                } catch (...) {
-                    cdc_log.warn(
-                        "Could not update CDC description table with generation {}: {}. Will try again.",
-                        gen_id, std::current_exception());
-                }
-            }
-        })(gen_id, sys_ks, std::move(sys_dist_ks), std::move(get_num_token_owners), abort_src));
-    }
-}
-
-static db_clock::time_point as_timepoint(const utils::UUID& uuid) {
-    return db_clock::time_point(utils::UUID_gen::unix_timestamp(uuid));
-}
-
-static future<std::vector<db_clock::time_point>> get_cdc_desc_v1_timestamps(
-        db::system_distributed_keyspace& sys_dist_ks,
-        abort_source& abort_src,
-        const noncopyable_function<unsigned()>& get_num_token_owners) {
-    while (true) {
-        try {
-            co_return co_await sys_dist_ks.get_cdc_desc_v1_timestamps({ get_num_token_owners() });
-        } catch (...) {
-            cdc_log.warn(
-                    "Failed to retrieve generation timestamps for rewriting: {}. Retrying in 60s.",
-                    std::current_exception());
-        }
-        co_await sleep_abortable(std::chrono::seconds(60), abort_src);
-    }
-}
-
-// Contains a CDC log table's creation time (extracted from its schema's id)
-// and its CDC TTL setting.
-struct time_and_ttl {
-    db_clock::time_point creation_time;
-    int ttl;
-};
-
-/*
- * See `maybe_rewrite_streams_descriptions`.
- * This is the long-running-in-the-background part of that function.
- * It returns the timestamp of the last rewritten generation (if any).
- */
-static future<std::optional<cdc::generation_id_v1>> rewrite_streams_descriptions(
-        std::vector<time_and_ttl> times_and_ttls,
-        db::system_keyspace& sys_ks,
-        shared_ptr<db::system_distributed_keyspace> sys_dist_ks,
-        noncopyable_function<unsigned()> get_num_token_owners,
-        abort_source& abort_src) {
-    cdc_log.info("Retrieving generation timestamps for rewriting...");
-    auto tss = co_await get_cdc_desc_v1_timestamps(*sys_dist_ks, abort_src, get_num_token_owners);
-    cdc_log.info("Generation timestamps retrieved.");
-
-    // Find first generation timestamp such that some CDC log table may contain data before this timestamp.
-    // This predicate is monotonic w.r.t the timestamps.
-    auto now = db_clock::now();
-    std::sort(tss.begin(), tss.end());
-    auto first = std::partition_point(tss.begin(), tss.end(), [&] (db_clock::time_point ts) {
-        // partition_point finds first element that does *not* satisfy the predicate.
-        return std::none_of(times_and_ttls.begin(), times_and_ttls.end(),
-                [&] (const time_and_ttl& tat) {
-            // In this CDC log table there are no entries older than the table's creation time
-            // or (now - the table's ttl). We subtract 10s to account for some possible clock drift.
-            // If ttl is set to 0 then entries in this table never expire. In that case we look
-            // only at the table's creation time.
-            auto no_entries_older_than =
-                (tat.ttl == 0 ? tat.creation_time : std::max(tat.creation_time, now - std::chrono::seconds(tat.ttl)))
-                    - std::chrono::seconds(10);
-            return no_entries_older_than < ts;
-        });
-    });
-
-    // Find first generation timestamp such that some CDC log table may contain data in this generation.
-    // This and all later generations need to be written to the new streams table.
-    if (first != tss.begin()) {
-        --first;
-    }
-
-    if (first == tss.end()) {
-        cdc_log.info("No generations to rewrite.");
-        co_return std::nullopt;
-    }
-
-    cdc_log.info("First generation to rewrite: {}", *first);
-
-    bool each_success = true;
-    co_await max_concurrent_for_each(first, tss.end(), 10, [&] (db_clock::time_point ts) -> future<> {
-        while (true) {
-            try {
-                co_return co_await do_update_streams_description(cdc::generation_id_v1{ts}, sys_ks, *sys_dist_ks, { get_num_token_owners() });
-            } catch (const no_generation_data_exception& e) {
-                cdc_log.error("Failed to rewrite streams for generation {}: {}. Giving up.", ts, e);
-                each_success = false;
-                co_return;
-            } catch (...) {
-                cdc_log.warn("Failed to rewrite streams for generation {}: {}. Retrying in 60s.", ts, std::current_exception());
-            }
-            co_await sleep_abortable(std::chrono::seconds(60), abort_src);
-        }
-    });
-
-    if (each_success) {
-        cdc_log.info("Rewriting stream tables finished successfully.");
-    } else {
-        cdc_log.info("Rewriting stream tables finished, but some generations could not be rewritten (check the logs).");
-    }
-
-    if (first != tss.end()) {
-        co_return cdc::generation_id_v1{*std::prev(tss.end())};
-    }
-
-    co_return std::nullopt;
-}
-
-future<> generation_service::maybe_rewrite_streams_descriptions() {
-    if (!_db.has_schema(_sys_dist_ks.local().NAME, _sys_dist_ks.local().CDC_DESC_V1)) {
-        // This cluster never went through a Scylla version which used this table
-        // or the user deleted the table. Nothing to do.
-        co_return;
-    }
-
-    if (co_await _sys_ks.local().cdc_is_rewritten()) {
-        co_return;
-    }
-
-    if (_cfg.dont_rewrite_streams) {
-        cdc_log.warn("Stream rewriting disabled. Manual administrator intervention may be required...");
-        co_return;
-    }
-
-    // For each CDC log table get the TTL setting (from CDC options) and the table's creation time
-    std::vector<time_and_ttl> times_and_ttls;
-    _db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> t) {
-        auto& s = *t->schema();
-        auto base = cdc::get_base_table(_db, s.ks_name(), s.cf_name());
-        if (!base) {
-            // Not a CDC log table.
-            return;
-        }
-        auto& cdc_opts = base->cdc_options();
-        if (!cdc_opts.enabled()) {
-            // This table is named like a CDC log table but it's not one.
-            return;
-        }
-
-        times_and_ttls.push_back(time_and_ttl{as_timepoint(s.id().uuid()), cdc_opts.ttl()});
-    });
-
-    if (times_and_ttls.empty()) {
-        // There's no point in rewriting old generations' streams (they don't contain any data).
-        cdc_log.info("No CDC log tables present, not rewriting stream tables.");
-        co_return co_await _sys_ks.local().cdc_set_rewritten(std::nullopt);
-    }
-
-    auto get_num_token_owners = [tm = _token_metadata.get()] { return tm->count_normal_token_owners(); };
-
-    // This code is racing with node startup. At this point, we're most likely still waiting for gossip to settle
-    // and some nodes that are UP may still be marked as DOWN by us.
-    // Let's sleep a bit to increase the chance that the first attempt at rewriting succeeds (it's still ok if
-    // it doesn't - we'll retry - but it's nice if we succeed without any warnings).
-    co_await sleep_abortable(std::chrono::seconds(10), _abort_src);
-
-    cdc_log.info("Rewriting stream tables in the background...");
-    auto last_rewritten = co_await rewrite_streams_descriptions(
-            std::move(times_and_ttls),
-            _sys_ks.local(),
-            _sys_dist_ks.local_shared(),
-            std::move(get_num_token_owners),
-            _abort_src);
-
-    co_await _sys_ks.local().cdc_set_rewritten(last_rewritten);
-}
-
-static void assert_shard_zero(const sstring& where) {
-    if (this_shard_id() != 0) {
-        on_internal_error(cdc_log, format("`{}`: must be run on shard 0", where));
-    }
-}
-
 class and_reducer {
 private:
    bool _result = true;
@@ -803,195 +388,26 @@ public:
    }
 };

-class generation_handling_nonfatal_exception : public std::runtime_error {
-    using std::runtime_error::runtime_error;
-};
-
-constexpr char could_not_retrieve_msg_template[]
-        = "Could not retrieve CDC streams with timestamp {} upon gossip event. Reason: \"{}\". Action: {}.";
-
 generation_service::generation_service(
-            config cfg, gms::gossiper& g, sharded<db::system_distributed_keyspace>& sys_dist_ks,
+            config cfg,
            sharded<db::system_keyspace>& sys_ks,
-            abort_source& abort_src, const locator::shared_token_metadata& stm, gms::feature_service& f,
            replica::database& db)
        : _cfg(std::move(cfg))
-        , _gossiper(g)
-        , _sys_dist_ks(sys_dist_ks)
        , _sys_ks(sys_ks)
-        , _abort_src(abort_src)
-        , _token_metadata(stm)
-        , _feature_service(f)
        , _db(db)
 {
 }

 future<> generation_service::stop() {
-    try {
-        co_await std::move(_cdc_streams_rewrite_complete);
-    } catch (...) {
-        cdc_log.error("CDC stream rewrite failed: ", std::current_exception());
-    }
-
-    if (_joined && (this_shard_id() == 0)) {
-        co_await leave_ring();
-    }
-
    _stopped = true;
+    return make_ready_future<>();
 }

 generation_service::~generation_service() {
    SCYLLA_ASSERT(_stopped);
 }

-future<> generation_service::after_join(std::optional<cdc::generation_id>&& startup_gen_id) {
-    assert_shard_zero(__PRETTY_FUNCTION__);
-
-    _gen_id = std::move(startup_gen_id);
-    _gossiper.register_(shared_from_this());
-
-    _joined = true;
-
-    // Retrieve the latest CDC generation seen in gossip (if any).
-    co_await legacy_scan_cdc_generations();
-
-    // Ensure that the new CDC stream description table has all required streams.
-    // See the function's comment for details.
-    //
-    // Since this depends on the entire cluster (and therefore we cannot guarantee
-    // timely completion), run it in the background and wait for it in stop().
-    _cdc_streams_rewrite_complete = maybe_rewrite_streams_descriptions();
-}
-
-future<> generation_service::leave_ring() {
-    assert_shard_zero(__PRETTY_FUNCTION__);
-    _joined = false;
-    co_await _gossiper.unregister_(shared_from_this());
-}
-
-future<> generation_service::on_join(gms::inet_address ep, locator::host_id id, gms::endpoint_state_ptr ep_state, gms::permit_id pid) {
-    return on_change(ep, id, ep_state->get_application_state_map(), pid);
-}
-
-future<> generation_service::on_change(gms::inet_address ep, locator::host_id id, const gms::application_state_map& states, gms::permit_id pid) {
-    assert_shard_zero(__PRETTY_FUNCTION__);
-
-    return make_ready_future<>();
-}
-
-future<> generation_service::check_and_repair_cdc_streams() {
-    // FIXME: support Raft group 0-based topology changes
-    if (!_joined) {
-        throw std::runtime_error("check_and_repair_cdc_streams: node not initialized yet");
-    }
-
-    std::optional<cdc::generation_id> latest = _gen_id;
-    _gossiper.for_each_endpoint_state([&] (const gms::endpoint_state& state) {
-        auto addr = state.get_host_id();
-        if (_gossiper.is_left(addr)) {
-            cdc_log.info("check_and_repair_cdc_streams ignored node {} because it is in LEFT state", addr);
-            return;
-        }
-        if (!_gossiper.is_normal(addr)) {
-            throw std::runtime_error(fmt::format("All nodes must be in NORMAL or LEFT state while performing check_and_repair_cdc_streams"
-                    " ({} is in state {})", addr, _gossiper.get_gossip_status(state)));
-        }
-
-        const auto gen_id = get_generation_id_for(addr, state);
-        if (!latest || (gen_id && get_ts(*gen_id) > get_ts(*latest))) {
-            latest = gen_id;
-        }
-    });
-
-    auto tmptr = _token_metadata.get();
-    auto sys_dist_ks = get_sys_dist_ks();
-
-    bool should_regenerate = false;
-
-    if (!latest) {
-        cdc_log.warn("check_and_repair_cdc_streams: no generation observed in gossip");
-        should_regenerate = true;
-    } else if (std::holds_alternative<cdc::generation_id_v1>(*latest)
-            && _feature_service.cdc_generations_v2) {
-        cdc_log.info(
-            "Cluster still using CDC generation storage format V1 (id: {}), even though it already understands the V2 format."
-            " Creating a new generation using V2.", *latest);
-        should_regenerate = true;
-    } else {
-        cdc_log.info("check_and_repair_cdc_streams: last generation observed in gossip: {}", *latest);
-
-        static const auto timeout_msg = "Timeout while fetching CDC topology description";
-        static const auto topology_read_error_note = "Note: this is likely caused by"
-                " node(s) being down or unreachable. It is recommended to check the network and"
-                " restart/remove the failed node(s), then retry checkAndRepairCdcStreams command";
-        static const auto exception_translating_msg = "Translating the exception to `request_execution_exception`";
-
-        std::optional<topology_description> gen;
-        try {
-            gen = co_await retrieve_generation_data(*latest, _sys_ks.local(), *sys_dist_ks, { tmptr->count_normal_token_owners() });
-        } catch (exceptions::request_timeout_exception& e) {
-            cdc_log.error("{}: \"{}\". {}.", timeout_msg, e.what(), exception_translating_msg);
-            throw exceptions::request_execution_exception(exceptions::exception_code::READ_TIMEOUT,
-                    format("{}. {}.", timeout_msg, topology_read_error_note));
-        } catch (exceptions::unavailable_exception& e) {
-            static const auto unavailable_msg = "Node(s) unavailable while fetching CDC topology description";
-            cdc_log.error("{}: \"{}\". {}.", unavailable_msg, e.what(), exception_translating_msg);
-            throw exceptions::request_execution_exception(exceptions::exception_code::UNAVAILABLE,
-                    format("{}. {}.", unavailable_msg, topology_read_error_note));
-        } catch (...) {
-            const auto ep = std::current_exception();
-            if (is_timeout_exception(ep)) {
-                cdc_log.error("{}: \"{}\". {}.", timeout_msg, ep, exception_translating_msg);
-                throw exceptions::request_execution_exception(exceptions::exception_code::READ_TIMEOUT,
-                        format("{}. {}.", timeout_msg, topology_read_error_note));
-            }
-            // On exotic errors proceed with regeneration
-            cdc_log.error("Exception while reading CDC topology description: \"{}\". Regenerating streams anyway.", ep);
-            should_regenerate = true;
-        }
-
-        if (!gen) {
-            cdc_log.error(
-                "Could not find CDC generation with timestamp {} in distributed system tables (current time: {}),"
-                " even though some node gossiped about it.",
-                latest, db_clock::now());
-            should_regenerate = true;
-        } else if (!is_cdc_generation_optimal(*gen, *tmptr)) {
-            should_regenerate = true;
-            cdc_log.info("CDC generation {} needs repair, regenerating", latest);
-        }
-    }
-
-    if (!should_regenerate) {
-        if (latest != _gen_id) {
-            co_await legacy_do_handle_cdc_generation(*latest);
-        }
-        cdc_log.info("CDC generation {} does not need repair", latest);
-        co_return;
-    }
-
-    const auto new_gen_id = co_await legacy_make_new_generation({}, true);
-
-    // Need to artificially update our STATUS so other nodes handle the generation ID change
-    // FIXME: after 0e0282cd nodes do not require a STATUS update to react to CDC generation changes.
-    // The artificial STATUS update here should eventually be removed (in a few releases).
-    auto status = _gossiper.get_this_endpoint_state_ptr()->get_application_state_ptr(gms::application_state::STATUS);
-    if (!status) {
-        cdc_log.error("Our STATUS is missing");
-        cdc_log.error("Aborting CDC generation repair due to missing STATUS");
-        co_return;
-    }
-    // Update _gen_id first, so that legacy_do_handle_cdc_generation (which will get called due to the status update)
-    // won't try to update the gossiper, which would result in a deadlock inside add_local_application_state
-    _gen_id = new_gen_id;
-    co_await _gossiper.add_local_application_state(
-            std::pair(gms::application_state::CDC_GENERATION_ID, gms::versioned_value::cdc_generation_id(new_gen_id)),
-            std::pair(gms::application_state::STATUS, *status)
-    );
-    co_await _sys_ks.local().update_cdc_generation_id(new_gen_id);
-}
-
-future<> generation_service::handle_cdc_generation(cdc::generation_id_v2 gen_id) {
+future<> generation_service::handle_cdc_generation(cdc::generation_id gen_id) {
    auto ts = get_ts(gen_id);
    if (co_await container().map_reduce(and_reducer(), [ts] (generation_service& svc) {
        return !svc._cdc_metadata.prepare(ts);
@@ -1013,171 +429,8 @@ future<> generation_service::handle_cdc_generation(cdc::generation_id_v2 gen_id)
    }
 }

-future<> generation_service::legacy_handle_cdc_generation(std::optional<cdc::generation_id> gen_id) {
-    assert_shard_zero(__PRETTY_FUNCTION__);
-
-    if (!gen_id) {
-        co_return;
-    }
-
-    if (!_sys_dist_ks.local_is_initialized() || !_sys_dist_ks.local().started()) {
-        on_internal_error(cdc_log, "Legacy handle CDC generation with sys.dist.ks. down");
-    }
-
-    // The service should not be listening for generation changes until after the node
-    // is bootstrapped and since the node leaves the ring on decommission
-
-    if (co_await container().map_reduce(and_reducer(), [ts = get_ts(*gen_id)] (generation_service& svc) {
-        return !svc._cdc_metadata.prepare(ts);
-    })) {
-        co_return;
-    }
-
-    bool using_this_gen = false;
-    try {
-        using_this_gen = co_await legacy_do_handle_cdc_generation_intercept_nonfatal_errors(*gen_id);
-    } catch (generation_handling_nonfatal_exception& e) {
-        cdc_log.warn(could_not_retrieve_msg_template, gen_id, e.what(), "retrying in the background");
-        legacy_async_handle_cdc_generation(*gen_id);
-        co_return;
-    } catch (...) {
-        cdc_log.error(could_not_retrieve_msg_template, gen_id, std::current_exception(), "not retrying");
-        co_return; // Exotic ("fatal") exception => do not retry
-    }
-
-    if (using_this_gen) {
-        cdc_log.info("Starting to use generation {}", *gen_id);
-        co_await update_streams_description(*gen_id, _sys_ks.local(), get_sys_dist_ks(),
-                [&tm = _token_metadata] { return tm.get()->count_normal_token_owners(); },
-                _abort_src);
-    }
-}
-
-void generation_service::legacy_async_handle_cdc_generation(cdc::generation_id gen_id) {
-    assert_shard_zero(__PRETTY_FUNCTION__);
-
-    (void)(([] (cdc::generation_id gen_id, shared_ptr<generation_service> svc) -> future<> {
-        while (true) {
-            co_await sleep_abortable(std::chrono::seconds(5), svc->_abort_src);
-
-            try {
-                bool using_this_gen = co_await svc->legacy_do_handle_cdc_generation_intercept_nonfatal_errors(gen_id);
-                if (using_this_gen) {
-                    cdc_log.info("Starting to use generation {}", gen_id);
-                    co_await update_streams_description(gen_id, svc->_sys_ks.local(), svc->get_sys_dist_ks(),
-                            [&tm = svc->_token_metadata] { return tm.get()->count_normal_token_owners(); },
-                            svc->_abort_src);
-                }
-                co_return;
-            } catch (generation_handling_nonfatal_exception& e) {
-                cdc_log.warn(could_not_retrieve_msg_template, gen_id, e.what(), "continuing to retry in the background");
-            } catch (...) {
-                cdc_log.error(could_not_retrieve_msg_template, gen_id, std::current_exception(), "not retrying anymore");
-                co_return; // Exotic ("fatal") exception => do not retry
-            }
-
-            if (co_await svc->container().map_reduce(and_reducer(), [ts = get_ts(gen_id)] (generation_service& svc) {
-                return svc._cdc_metadata.known_or_obsolete(ts);
-            })) {
-                co_return;
-            }
-        }
-    })(gen_id, shared_from_this()));
-}
-
-future<> generation_service::legacy_scan_cdc_generations() {
-    assert_shard_zero(__PRETTY_FUNCTION__);
-
-    std::optional<cdc::generation_id> latest;
-    _gossiper.for_each_endpoint_state([&] (const gms::endpoint_state& eps) {
-        auto gen_id = get_generation_id_for(eps.get_host_id(), eps);
-        if (!latest || (gen_id && get_ts(*gen_id) > get_ts(*latest))) {
-            latest = gen_id;
-        }
-    });
-
-    if (latest) {
-        cdc_log.info("Latest generation seen during startup: {}", *latest);
-        co_await legacy_handle_cdc_generation(latest);
-    } else {
-        cdc_log.info("No generation seen during startup.");
-    }
-}
-
-future<bool> generation_service::legacy_do_handle_cdc_generation_intercept_nonfatal_errors(cdc::generation_id gen_id) {
-    assert_shard_zero(__PRETTY_FUNCTION__);
-
-    // Use futurize_invoke to catch all exceptions from legacy_do_handle_cdc_generation.
-    return futurize_invoke([this, gen_id] {
-        return legacy_do_handle_cdc_generation(gen_id);
-    }).handle_exception([] (std::exception_ptr ep) -> future<bool> {
-        try {
-            std::rethrow_exception(ep);
-        } catch (exceptions::request_timeout_exception& e) {
-            throw generation_handling_nonfatal_exception(e.what());
-        } catch (exceptions::unavailable_exception& e) {
-            throw generation_handling_nonfatal_exception(e.what());
-        } catch (exceptions::read_failure_exception& e) {
-            throw generation_handling_nonfatal_exception(e.what());
-        } catch (...) {
-            const auto ep = std::current_exception();
-            if (is_timeout_exception(ep)) {
-                throw generation_handling_nonfatal_exception(format("{}", ep));
-            }
-            throw;
-        }
-    });
-}
-
-future<bool> generation_service::legacy_do_handle_cdc_generation(cdc::generation_id gen_id) {
-    assert_shard_zero(__PRETTY_FUNCTION__);
-
-    auto sys_dist_ks = get_sys_dist_ks();
-    auto gen = co_await retrieve_generation_data(gen_id, _sys_ks.local(), *sys_dist_ks, { _token_metadata.get()->count_normal_token_owners() });
-    if (!gen) {
-        // This may happen during raft upgrade when a node gossips about a generation that
-        // was propagated through raft and we didn't apply it yet.
-        throw generation_handling_nonfatal_exception(fmt::format(
-            "Could not find CDC generation {} in distributed system tables (current time: {}),"
-            " even though some node gossiped about it.",
-            gen_id, db_clock::now()));
-    }
-
-    // We always gossip about the generation with the greatest timestamp. Specific nodes may remember older generations,
-    // but eventually they forget when their clocks move past the latest generation's timestamp.
-    // The cluster as a whole is only interested in the last generation so restarting nodes may learn what it is.
-    // We assume that generation changes don't happen ``too often'' so every node can learn about a generation
-    // before it is superseded by a newer one which causes nodes to start gossiping the about the newer one.
-    // The assumption follows from the requirement of bootstrapping nodes sequentially.
-    if (!_gen_id || get_ts(*_gen_id) < get_ts(gen_id)) {
-        _gen_id = gen_id;
-        co_await _sys_ks.local().update_cdc_generation_id(gen_id);
-        co_await _gossiper.add_local_application_state(
-                gms::application_state::CDC_GENERATION_ID, gms::versioned_value::cdc_generation_id(gen_id));
-    }
-
-    // Return `true` iff the generation was inserted on any of our shards.
-    co_return co_await container().map_reduce(or_reducer(),
-            [ts = get_ts(gen_id), &gen] (generation_service& svc) -> future<bool> {
-        // We need to copy it here before awaiting anything to avoid destruction of the captures.
-        const auto timestamp = ts;
-        topology_description gen_copy = co_await gen->clone_async();
-        co_return svc._cdc_metadata.insert(timestamp, std::move(gen_copy));
-    });
-}
-
-shared_ptr<db::system_distributed_keyspace> generation_service::get_sys_dist_ks() {
-    assert_shard_zero(__PRETTY_FUNCTION__);
-
-    if (!_sys_dist_ks.local_is_initialized()) {
-        throw std::runtime_error("system distributed keyspace not initialized");
-    }
-
-    return _sys_dist_ks.local_shared();
-}
-
 db_clock::time_point get_ts(const generation_id& gen_id) {
-    return std::visit([] (auto& id) { return id.ts; }, gen_id);
+    return gen_id.ts;
 }

 future<mutation> create_table_streams_mutation(table_id table, db_clock::time_point stream_ts, const locator::tablet_map& map, api::timestamp_type ts) {
--- a/cdc/generation.hh
+++ b/cdc/generation.hh
@@ -34,16 +34,6 @@ namespace seastar {
    class abort_source;
 } // namespace seastar

-namespace db {
-    class config;
-    class system_distributed_keyspace;
-} // namespace db
-
-namespace gms {
-    class inet_address;
-    class gossiper;
-} // namespace gms
-
 namespace locator {
    class tablet_map;
 } // namespace locator
@@ -153,23 +143,6 @@ struct cdc_stream_diff {

 using table_streams = std::map<api::timestamp_type, committed_stream_set>;

-class no_generation_data_exception : public std::runtime_error {
-public:
-    no_generation_data_exception(cdc::generation_id generation_ts)
-        : std::runtime_error(fmt::format("could not find generation data for timestamp {}", generation_ts))
-    {}
-};
-
-/* Should be called when we're restarting and we noticed that we didn't save any streams timestamp in our local tables,
- * which means that we're probably upgrading from a non-CDC/old CDC version (another reason could be
- * that there's a bug, or the user messed with our local tables).
- *
- * It checks whether we should be the node to propose the first generation of CDC streams.
- * The chosen condition is arbitrary, it only tries to make sure that no two nodes propose a generation of streams
- * when upgrading, and nothing bad happens if they for some reason do (it's mostly an optimization).
- */
-bool should_propose_first_generation(const locator::host_id& me, const gms::gossiper&);
-
 /*
 * Checks if the CDC generation is optimal, which is true if its `topology_description` is consistent
 * with `token_metadata`.
--- a/cdc/generation_id.hh
+++ b/cdc/generation_id.hh
@@ -15,48 +15,22 @@

 namespace cdc {

-struct generation_id_v1 {
-    db_clock::time_point ts;
-    bool operator==(const generation_id_v1&) const = default;
-};

-struct generation_id_v2 {
+struct generation_id {
    db_clock::time_point ts;
    utils::UUID id;
-    bool operator==(const generation_id_v2&) const = default;
+    bool operator==(const generation_id&) const = default;
 };

-using generation_id = std::variant<generation_id_v1, generation_id_v2>;
-
 db_clock::time_point get_ts(const generation_id&);

 } // namespace cdc

-template <>
-struct fmt::formatter<cdc::generation_id_v1> {
-    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
-    template <typename FormatContext>
-    auto format(const cdc::generation_id_v1& gen_id, FormatContext& ctx) const {
-        return fmt::format_to(ctx.out(), "{}", gen_id.ts);
-    }
-};
-
-template <>
-struct fmt::formatter<cdc::generation_id_v2> {
-    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
-    template <typename FormatContext>
-    auto format(const cdc::generation_id_v2& gen_id, FormatContext& ctx) const {
-        return fmt::format_to(ctx.out(), "({}, {})", gen_id.ts, gen_id.id);
-    }
-};
-
 template <>
 struct fmt::formatter<cdc::generation_id> {
    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
    template <typename FormatContext>
    auto format(const cdc::generation_id& gen_id, FormatContext& ctx) const {
-        return std::visit([&ctx] (auto& id) {
-            return fmt::format_to(ctx.out(), "{}", id);
-        }, gen_id);
+        return fmt::format_to(ctx.out(), "({}, {})", gen_id.ts, gen_id.id);
    }
 };
--- a/cdc/generation_service.hh
+++ b/cdc/generation_service.hh
@@ -11,135 +11,51 @@
 #include <seastar/core/sharded.hh>
 #include "cdc/metadata.hh"
 #include "cdc/generation_id.hh"
-#include "gms/i_endpoint_state_change_subscriber.hh"

 namespace db {
-class system_distributed_keyspace;
 class system_keyspace;
 }

-namespace gms {
-class gossiper;
-class feature_service;
-}
-
-namespace seastar {
-class abort_source;
-}
-
 namespace locator {
-class shared_token_metadata;
 class tablet_map;
 }

 namespace cdc {

 class generation_service : public peering_sharded_service<generation_service>
-                         , public async_sharded_service<generation_service>
-                         , public gms::i_endpoint_state_change_subscriber {
+                         , public async_sharded_service<generation_service> {
 public:
    struct config {
-        unsigned ignore_msb_bits;
        std::chrono::milliseconds ring_delay;
-        bool dont_rewrite_streams = false;
    };

 private:
    bool _stopped = false;

-    // The node has joined the token ring. Set to `true` on `after_join` call.
-    bool _joined = false;
-
    config _cfg;
-    gms::gossiper& _gossiper;
-    sharded<db::system_distributed_keyspace>& _sys_dist_ks;
    sharded<db::system_keyspace>& _sys_ks;
-    abort_source& _abort_src;
-    const locator::shared_token_metadata& _token_metadata;
-    gms::feature_service& _feature_service;
    replica::database& _db;

-    /* Maintains the set of known CDC generations used to pick streams for log writes (i.e., the partition keys of these log writes).
-     * Updated in response to certain gossip events (see the handle_cdc_generation function).
-     */
+    /* Maintains the set of known CDC generations used to pick streams for log writes (i.e., the partition keys of these log writes). */
    cdc::metadata _cdc_metadata;

-    /* The latest known generation timestamp and the timestamp that we're currently gossiping
-     * (as CDC_GENERATION_ID application state).
-     *
-     * Only shard 0 manages this, hence it will be std::nullopt on all shards other than 0.
-     * This timestamp is also persisted in the system.cdc_local table.
-     *
-     * On shard 0 this may be nullopt only in one special case: rolling upgrade, when we upgrade
-     * from an old version of Scylla that didn't support CDC. In that case one node in the cluster
-     * will create the first generation and start gossiping it; it may be us, or it may be some
-     * different node. In any case, eventually - after one of the nodes gossips the first timestamp
-     * - we'll catch on and this variable will be updated with that generation.
-     */
-    std::optional<cdc::generation_id> _gen_id;
-    future<> _cdc_streams_rewrite_complete = make_ready_future<>();
-
 public:
-    generation_service(config cfg, gms::gossiper&,
-            sharded<db::system_distributed_keyspace>&,
+    generation_service(config cfg,
            sharded<db::system_keyspace>& sys_ks,
-            abort_source&, const locator::shared_token_metadata&,
-            gms::feature_service&, replica::database& db);
+            replica::database& db);

    future<> stop();
    ~generation_service();

-    /* After the node bootstraps and creates a new CDC generation, or restarts and loads the last
-     * known generation timestamp from persistent storage, this function should be called with
-     * that generation timestamp moved in as the `startup_gen_id` parameter.
-     * This passes the responsibility of managing generations from the node startup code to this service;
-     * until then, the service remains dormant.
-     * The startup code is in `storage_service::join_topology`, hence
-     * `after_join` should be called at the end of that function.
-     * Precondition: the node has completed bootstrapping and system_distributed_keyspace is initialized.
-     * Must be called on shard 0 - that's where the generation management happens.
-     */
-    future<> after_join(std::optional<cdc::generation_id>&& startup_gen_id);
-    future<> leave_ring();
-
    cdc::metadata& get_cdc_metadata() {
        return _cdc_metadata;
    }

-    virtual future<> on_join(gms::inet_address, locator::host_id id, gms::endpoint_state_ptr, gms::permit_id) override;
-    virtual future<> on_change(gms::inet_address, locator::host_id id, const gms::application_state_map&, gms::permit_id) override;
-
-    future<> check_and_repair_cdc_streams();
-
-    /* Generate a new set of CDC streams and insert it into the internal distributed CDC generations table.
-     * Returns the ID of this new generation.
-     *
-     * Should be called when starting the node for the first time (i.e., joining the ring).
-     *
-     * Assumes that the system_distributed_keyspace service is initialized.
-     * `cluster_supports_generations_v2` must be `true` if and only if the `CDC_GENERATIONS_V2` feature is enabled.
-     *
-     * If `CDC_GENERATIONS_V2` is enabled, the new generation will be inserted into
-     * `system_distributed_everywhere.cdc_generation_descriptions_v2` and the returned ID will be in the v2 format.
-     * Otherwise the new generation will be limited in size, causing suboptimal stream distribution, it will be inserted
-     * into `system_distributed.cdc_generation_descriptions` and the returned ID will be in the v1 format.
-     * The second case should happen only when we create new generations in a mixed cluster.
-     *
-     * The caller of this function is expected to insert the ID into the gossiper as fast as possible,
-     * so that other nodes learn about the generation before their clocks cross the generation's timestamp
-     * (not guaranteed in the current implementation, but expected to be the common case;
-     *  we assume that `ring_delay` is enough for other nodes to learn about the new generation).
-     *
-     * Legacy: used for gossiper-based topology changes.
-     */
-    future<cdc::generation_id> legacy_make_new_generation(
-        const std::unordered_set<dht::token>& bootstrap_tokens, bool add_delay);
-
    /* Retrieve the CDC generation with the given ID from local tables
     * and start using it for CDC log writes if it's not obsolete.
     * Precondition: the generation was committed using group 0 and locally applied.
     */
-    future<> handle_cdc_generation(cdc::generation_id_v2);
+    future<> handle_cdc_generation(cdc::generation_id);

    future<> load_cdc_tablet_streams(std::optional<std::unordered_set<table_id>> changed_tables);

@@ -151,56 +67,6 @@ public:
    future<utils::chunked_vector<mutation>> garbage_collect_cdc_streams_for_table(table_id table, std::optional<std::chrono::seconds> ttl, api::timestamp_type ts);
    future<> garbage_collect_cdc_streams(utils::chunked_vector<canonical_mutation>& muts, api::timestamp_type ts);

-private:
-    /* Retrieve the CDC generation which starts at the given timestamp (from a distributed table created for this purpose)
-     * and start using it for CDC log writes if it's not obsolete.
-     *
-     * Legacy: used for gossiper-based topology changes.
-     */
-    future<> legacy_handle_cdc_generation(std::optional<cdc::generation_id>);
-
-    /* If `legacy_handle_cdc_generation` fails, it schedules an asynchronous retry in the background
-     * using `legacy_async_handle_cdc_generation`.
-     *
-     * Legacy: used for gossiper-based topology changes.
-     */
-    void legacy_async_handle_cdc_generation(cdc::generation_id);
-
-    /* Wrapper around `legacy_do_handle_cdc_generation` which intercepts timeout/unavailability exceptions.
-     * Returns: legacy_do_handle_cdc_generation(ts).
-     *
-     * Legacy: used for gossiper-based topology changes.
-     */
-    future<bool> legacy_do_handle_cdc_generation_intercept_nonfatal_errors(cdc::generation_id);
-
-    /* Returns `true` iff we started using the generation (it was not obsolete or already known),
-     * which means that this node might write some CDC log entries using streams from this generation.
-     *
-     * Legacy: used for gossiper-based topology changes.
-     */
-    future<bool> legacy_do_handle_cdc_generation(cdc::generation_id);
-
-    /* Scan CDC generation timestamps gossiped by other nodes and retrieve the latest one.
-     * This function should be called once at the end of the node startup procedure
-     * (after the node is started and running normally, it will retrieve generations on gossip events instead).
-     *
-     * Legacy: used for gossiper-based topology changes.
-     */
-    future<> legacy_scan_cdc_generations();
-
-    /* generation_service code might be racing with system_distributed_keyspace deinitialization
-     * (the deinitialization order is broken).
-     * Therefore, whenever we want to access sys_dist_ks in a background task,
-     * we need to check if the instance is still there. Storing the shared pointer will keep it alive.
-     */
-    shared_ptr<db::system_distributed_keyspace> get_sys_dist_ks();
-
-    /* Part of the upgrade procedure. Useful in case where the version of Scylla that we're upgrading from
-     * used the "cdc_streams_descriptions" table. This procedure ensures that the new "cdc_streams_descriptions_v2"
-     * table contains streams of all generations that were present in the old table and may still contain data
-     * (i.e. there exist CDC log tables that may contain rows with partition keys being the stream IDs from
-     * these generations). */
-    future<> maybe_rewrite_streams_descriptions();
 };

 } // namespace cdc
--- a/cdc/log.cc
+++ b/cdc/log.cc
@@ -618,7 +618,7 @@ static void set_default_properties_log_table(schema_builder& b, const schema& s,
    b.set_caching_options(caching_options::get_disabled_caching_options());

    auto rs = generate_replication_strategy(ksm, db.get_token_metadata().get_topology());
-    auto tombstone_gc_ext = seastar::make_shared<tombstone_gc_extension>(get_default_tombstone_gc_mode(*rs, db.get_token_metadata(), false));
+    auto tombstone_gc_ext = seastar::make_shared<tombstone_gc_extension>(get_default_tombstone_gc_mode(*rs, false));
    b.add_extension(tombstone_gc_extension::NAME, std::move(tombstone_gc_ext));
 }

--- a/cdc/split.cc
+++ b/cdc/split.cc
@@ -76,14 +76,14 @@ struct partition_deletion {

 using clustered_column_set = std::map<clustering_key, cdc::one_kind_column_set, clustering_key::less_compare>;

-template<typename Container>
+template <typename Container>
 concept EntryContainer = requires(Container& container) {
    // Parenthesized due to https://bugs.llvm.org/show_bug.cgi?id=45088
    { (container.atomic_entries) } -> std::same_as<std::vector<atomic_column_update>&>;
    { (container.nonatomic_entries) } -> std::same_as<std::vector<nonatomic_column_update>&>;
 };

-template<EntryContainer Container>
+template <EntryContainer Container>
 static void add_columns_affected_by_entries(cdc::one_kind_column_set& cset, const Container& cont) {
    for (const auto& entry : cont.atomic_entries) {
        cset.set(entry.id);
@@ -134,7 +134,7 @@ struct batch {
            ret.emplace(clustering_key::make_empty(), all_columns);
        }

-        auto process_change_type = [&] (const auto& changes) {
+        auto process_change_type = [&](const auto& changes) {
            for (const auto& change : changes) {
                auto& cset = ret[change.key];
                cset.resize(s.regular_columns_count());
@@ -211,7 +211,9 @@ private:

 public:
    extract_collection_visitor(column_id id, std::map<change_key_t, row_update>& updates)
-        : _id(id), _updates(updates) {}
+        : _id(id)
+        , _updates(updates) {
+    }

    void collection_tombstone(const tombstone& t) {
        auto& entry = get_or_append_entry(t.timestamp + 1, gc_clock::duration(0));
@@ -226,7 +228,9 @@ public:
        cell(key, c);
    }

-    constexpr bool finished() const { return false; }
+    constexpr bool finished() const {
+        return false;
+    }
 };

 /* Visits all cells and tombstones in a row, putting the encountered changes into buckets
@@ -249,41 +253,46 @@ struct extract_row_visitor {

    void collection_column(const column_definition& cdef, auto&& visit_collection) {
        visit(*cdef.type, make_visitor(
-        [&] (const collection_type_impl& ctype) {
-            struct collection_visitor : public extract_collection_visitor<collection_visitor> {
-                data_type _value_type;
+                                  [&](const collection_type_impl& ctype) {
+                                      struct collection_visitor : public extract_collection_visitor<collection_visitor> {
+                                          data_type _value_type;

-                collection_visitor(column_id id, std::map<change_key_t, row_update>& updates, const collection_type_impl& ctype)
-                    : extract_collection_visitor<collection_visitor>(id, updates), _value_type(ctype.value_comparator()) {}
+                                          collection_visitor(column_id id, std::map<change_key_t, row_update>& updates, const collection_type_impl& ctype)
+                                              : extract_collection_visitor<collection_visitor>(id, updates)
+                                              , _value_type(ctype.value_comparator()) {
+                                          }

-                data_type get_value_type(bytes_view) {
-                    return _value_type;
-                }
-            } v(cdef.id, _updates, ctype);
+                                          data_type get_value_type(bytes_view) {
+                                              return _value_type;
+                                          }
+                                      } v(cdef.id, _updates, ctype);

-            visit_collection(v);
-        },
-        [&] (const user_type_impl& utype) {
-            struct udt_visitor : public extract_collection_visitor<udt_visitor> {
-                const user_type_impl& _utype;
+                                      visit_collection(v);
+                                  },
+                                  [&](const user_type_impl& utype) {
+                                      struct udt_visitor : public extract_collection_visitor<udt_visitor> {
+                                          const user_type_impl& _utype;

-                udt_visitor(column_id id, std::map<change_key_t, row_update>& updates, const user_type_impl& utype)
-                    : extract_collection_visitor<udt_visitor>(id, updates), _utype(utype) {}
+                                          udt_visitor(column_id id, std::map<change_key_t, row_update>& updates, const user_type_impl& utype)
+                                              : extract_collection_visitor<udt_visitor>(id, updates)
+                                              , _utype(utype) {
+                                          }

-                data_type get_value_type(bytes_view key) {
-                    return _utype.type(deserialize_field_index(key));
-                }
-            } v(cdef.id, _updates, utype);
+                                          data_type get_value_type(bytes_view key) {
+                                              return _utype.type(deserialize_field_index(key));
+                                          }
+                                      } v(cdef.id, _updates, utype);

-            visit_collection(v);
-        },
-        [&] (const abstract_type& o) {
-            throw std::runtime_error(format("extract_changes: unknown collection type:", o.name()));
-        }
-        ));
+                                      visit_collection(v);
+                                  },
+                                  [&](const abstract_type& o) {
+                                      throw std::runtime_error(format("extract_changes: unknown collection type:", o.name()));
+                                  }));
    }

-    constexpr bool finished() const { return false; }
+    constexpr bool finished() const {
+        return false;
+    }
 };

 struct extract_changes_visitor {
@@ -293,12 +302,8 @@ struct extract_changes_visitor {
        extract_row_visitor v;
        visit_row_cells(v);

-        for (auto& [ts_ttl, row_update]: v._updates) {
-            _result[ts_ttl.first].static_updates.push_back({
-                ts_ttl.second,
-                std::move(row_update.atomic_entries),
-                std::move(row_update.nonatomic_entries)
-            });
+        for (auto& [ts_ttl, row_update] : v._updates) {
+            _result[ts_ttl.first].static_updates.push_back({ts_ttl.second, std::move(row_update.atomic_entries), std::move(row_update.nonatomic_entries)});
        }
    }

@@ -319,24 +324,18 @@ struct extract_changes_visitor {
        } v;
        visit_row_cells(v);

-        for (auto& [ts_ttl, row_update]: v._updates) {
+        for (auto& [ts_ttl, row_update] : v._updates) {
            // It is important that changes in the resulting `set_of_changes` are listed
            // in increasing TTL order. The reason is explained in a comment in cdc/log.cc,
            // search for "#6070".
            auto [ts, ttl] = ts_ttl;

            if (v._marker && ts == v._marker_ts && ttl == v._marker_ttl) {
-                _result[ts].clustered_inserts.push_back({
-                        ttl,
-                        ckey,
-                        *v._marker,
-                        std::move(row_update.atomic_entries),
-                        {}
-                    });
+                _result[ts].clustered_inserts.push_back({ttl, ckey, *v._marker, std::move(row_update.atomic_entries), {}});

                auto& cr_insert = _result[ts].clustered_inserts.back();
                bool clustered_update_exists = false;
-                for (auto& nonatomic_up: row_update.nonatomic_entries) {
+                for (auto& nonatomic_up : row_update.nonatomic_entries) {
                    // Updating a collection column with an INSERT statement implies inserting a tombstone.
                    //
                    // For example, suppose that we have:
@@ -362,12 +361,7 @@ struct extract_changes_visitor {
                        cr_insert.nonatomic_entries.push_back(std::move(nonatomic_up));
                    } else {
                        if (!clustered_update_exists) {
-                            _result[ts].clustered_updates.push_back({
-                                ttl,
-                                ckey,
-                                {},
-                                {}
-                            });
+                            _result[ts].clustered_updates.push_back({ttl, ckey, {}, {}});

                            // Multiple iterations of this `for` loop (for different collection columns)
                            // might want to put their `nonatomic_up`s into an UPDATE change;
@@ -390,12 +384,7 @@ struct extract_changes_visitor {
                    }
                }
            } else {
-                _result[ts].clustered_updates.push_back({
-                        ttl,
-                        ckey,
-                        std::move(row_update.atomic_entries),
-                        std::move(row_update.nonatomic_entries)
-                    });
+                _result[ts].clustered_updates.push_back({ttl, ckey, std::move(row_update.atomic_entries), std::move(row_update.nonatomic_entries)});
            }
        }
    }
@@ -412,7 +401,9 @@ struct extract_changes_visitor {
        _result[t.timestamp].partition_deletions = partition_deletion{t};
    }

-    constexpr bool finished() const { return false; }
+    constexpr bool finished() const {
+        return false;
+    }
 };

 set_of_changes extract_changes(const mutation& m) {
@@ -426,13 +417,23 @@ namespace cdc {
 struct find_timestamp_visitor {
    api::timestamp_type _ts = api::missing_timestamp;

-    bool finished() const { return _ts != api::missing_timestamp; }
+    bool finished() const {
+        return _ts != api::missing_timestamp;
+    }

-    void visit(api::timestamp_type ts) { _ts = ts; }
-    void visit(const atomic_cell_view& cell) { visit(cell.timestamp()); }
+    void visit(api::timestamp_type ts) {
+        _ts = ts;
+    }
+    void visit(const atomic_cell_view& cell) {
+        visit(cell.timestamp());
+    }

-    void live_atomic_cell(const column_definition&, const atomic_cell_view& cell) { visit(cell); }
-    void dead_atomic_cell(const column_definition&, const atomic_cell_view& cell) { visit(cell); }
+    void live_atomic_cell(const column_definition&, const atomic_cell_view& cell) {
+        visit(cell);
+    }
+    void dead_atomic_cell(const column_definition&, const atomic_cell_view& cell) {
+        visit(cell);
+    }
    void collection_tombstone(const tombstone& t) {
        // A collection tombstone with timestamp T can be created with:
        // UPDATE ks.t USING TIMESTAMP T + 1 SET X = null WHERE ...
@@ -441,15 +442,33 @@ struct find_timestamp_visitor {
        // with cdc$time using timestamp T + 1 instead of T.
        visit(t.timestamp + 1);
    }
-    void live_collection_cell(bytes_view, const atomic_cell_view& cell) { visit(cell); }
-    void dead_collection_cell(bytes_view, const atomic_cell_view& cell) { visit(cell); }
-    void collection_column(const column_definition&, auto&& visit_collection) { visit_collection(*this); }
-    void marker(const row_marker& rm) { visit(rm.timestamp()); }
-    void static_row_cells(auto&& visit_row_cells) { visit_row_cells(*this); }
-    void clustered_row_cells(const clustering_key&, auto&& visit_row_cells) { visit_row_cells(*this); }
-    void clustered_row_delete(const clustering_key&, const tombstone& t) { visit(t.timestamp); }
-    void range_delete(const range_tombstone& t) { visit(t.tomb.timestamp); }
-    void partition_delete(const tombstone& t) { visit(t.timestamp); }
+    void live_collection_cell(bytes_view, const atomic_cell_view& cell) {
+        visit(cell);
+    }
+    void dead_collection_cell(bytes_view, const atomic_cell_view& cell) {
+        visit(cell);
+    }
+    void collection_column(const column_definition&, auto&& visit_collection) {
+        visit_collection(*this);
+    }
+    void marker(const row_marker& rm) {
+        visit(rm.timestamp());
+    }
+    void static_row_cells(auto&& visit_row_cells) {
+        visit_row_cells(*this);
+    }
+    void clustered_row_cells(const clustering_key&, auto&& visit_row_cells) {
+        visit_row_cells(*this);
+    }
+    void clustered_row_delete(const clustering_key&, const tombstone& t) {
+        visit(t.timestamp);
+    }
+    void range_delete(const range_tombstone& t) {
+        visit(t.tomb.timestamp);
+    }
+    void partition_delete(const tombstone& t) {
+        visit(t.timestamp);
+    }
 };

 /* Find some timestamp inside the given mutation.
@@ -505,8 +524,12 @@ struct should_split_visitor {

    virtual ~should_split_visitor() = default;

-    inline bool finished() const { return _result; }
-    inline void stop() { _result = true; }
+    inline bool finished() const {
+        return _result;
+    }
+    inline void stop() {
+        _result = true;
+    }

    void visit(api::timestamp_type ts, gc_clock::duration ttl = gc_clock::duration(0)) {
        if (_ts != api::missing_timestamp && _ts != ts) {
@@ -517,15 +540,23 @@ struct should_split_visitor {
        if (_ttl && *_ttl != ttl) {
            return stop();
        }
-        _ttl = { ttl };
+        _ttl = {ttl};
    }

-    void visit(const atomic_cell_view& cell) { visit(cell.timestamp(), get_ttl(cell)); }
+    void visit(const atomic_cell_view& cell) {
+        visit(cell.timestamp(), get_ttl(cell));
+    }

-    void live_atomic_cell(const column_definition&, const atomic_cell_view& cell) { visit(cell); }
-    void dead_atomic_cell(const column_definition&, const atomic_cell_view& cell) { visit(cell); }
+    void live_atomic_cell(const column_definition&, const atomic_cell_view& cell) {
+        visit(cell);
+    }
+    void dead_atomic_cell(const column_definition&, const atomic_cell_view& cell) {
+        visit(cell);
+    }

-    void collection_tombstone(const tombstone& t) { visit(t.timestamp + 1); }
+    void collection_tombstone(const tombstone& t) {
+        visit(t.timestamp + 1);
+    }

    virtual void live_collection_cell(bytes_view, const atomic_cell_view& cell) {
        if (_had_row_marker) {
@@ -534,8 +565,12 @@ struct should_split_visitor {
        }
        visit(cell);
    }
-    void dead_collection_cell(bytes_view, const atomic_cell_view& cell) { visit(cell); }
-    void collection_column(const column_definition&, auto&& visit_collection) { visit_collection(*this); }
+    void dead_collection_cell(bytes_view, const atomic_cell_view& cell) {
+        visit(cell);
+    }
+    void collection_column(const column_definition&, auto&& visit_collection) {
+        visit_collection(*this);
+    }

    virtual void marker(const row_marker& rm) {
        _had_row_marker = true;
@@ -606,8 +641,8 @@ bool should_split(const mutation& m, const per_request_options& options) {
    cdc::inspect_mutation(m, v);

    return v._result
-    // A mutation with no timestamp will be split into 0 mutations:
-        || v._ts == api::missing_timestamp;
+           // A mutation with no timestamp will be split into 0 mutations:
+           || v._ts == api::missing_timestamp;
 }

 // Returns true if the row state and the atomic and nonatomic entries represent
@@ -642,7 +677,7 @@ static bool entries_match_row_state(const schema_ptr& base_schema, const cell_ma
        if (current_values.size() != update.cells.size()) {
            return false;
        }
-        
+
        std::unordered_map<sstring_view, bytes> current_values_map;
        for (const auto& entry : current_values) {
            const auto attr_name = std::string_view(value_cast<sstring>(entry.first));
@@ -711,8 +746,8 @@ bool should_skip(batch& changes, const mutation& base_mutation, change_processor
    return true;
 }

-void process_changes_with_splitting(const mutation& base_mutation, change_processor& processor,
-        bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility) {
+void process_changes_with_splitting(
+        const mutation& base_mutation, change_processor& processor, bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility) {
    const auto base_schema = base_mutation.schema();
    auto changes = extract_changes(base_mutation);
    auto pk = base_mutation.key();
@@ -824,8 +859,8 @@ void process_changes_with_splitting(const mutation& base_mutation, change_proces
    }
 }

-void process_changes_without_splitting(const mutation& base_mutation, change_processor& processor,
-        bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility) {
+void process_changes_without_splitting(
+        const mutation& base_mutation, change_processor& processor, bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility) {
    if (alternator_strict_compatibility) {
        auto changes = extract_changes(base_mutation);
        if (should_skip(changes.begin()->second, base_mutation, processor)) {
@@ -842,7 +877,7 @@ void process_changes_without_splitting(const mutation& base_mutation, change_pro

        one_kind_column_set columns{base_schema->static_columns_count()};
        if (!p.static_row().empty()) {
-            p.static_row().get().for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) {
+            p.static_row().get().for_each_cell([&](column_id id, const atomic_cell_or_collection& cell) {
                columns.set(id);
            });
            processor.produce_preimage(nullptr, columns);
@@ -855,7 +890,7 @@ void process_changes_without_splitting(const mutation& base_mutation, change_pro
                // Row deleted - include all columns in preimage
                columns.set(0, base_schema->regular_columns_count(), true);
            } else {
-                cr.row().cells().for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) {
+                cr.row().cells().for_each_cell([&](column_id id, const atomic_cell_or_collection& cell) {
                    columns.set(id);
                });
            }
--- a/compaction/compaction.cc
+++ b/compaction/compaction.cc
@@ -48,6 +48,7 @@
 #include "mutation/mutation_fragment_stream_validator.hh"
 #include "utils/assert.hh"
 #include "utils/error_injection.hh"
+#include "utils/chunked_vector.hh"
 #include "utils/pretty_printers.hh"
 #include "readers/multi_range.hh"
 #include "readers/compacting.hh"
@@ -161,6 +162,7 @@ std::string_view to_string(compaction_type type) {
    case compaction_type::Reshape: return "Reshape";
    case compaction_type::Split: return "Split";
    case compaction_type::Major: return "Major";
+    case compaction_type::RewriteComponent: return "RewriteComponent";
    }
    on_internal_error_noexcept(clogger, format("Invalid compaction type {}", int(type)));
    return "(invalid)";
@@ -598,8 +600,7 @@ protected:
    // Garbage collected sstables that were added to SSTable set and should be eventually removed from it.
    std::vector<sstables::shared_sstable> _used_garbage_collected_sstables;
    utils::observable<> _stop_request_observable;
-    // optional tombstone_gc_state that is used when gc has to check only the compacting sstables to collect tombstones.
-    std::optional<tombstone_gc_state> _tombstone_gc_state_with_commitlog_check_disabled;
+    tombstone_gc_state _tombstone_gc_state;
    int64_t _output_repaired_at = 0;
 private:
    // Keeps track of monitors for input sstable.
@@ -611,23 +612,23 @@ private:
    }

    // Called in a seastar thread
-    dht::partition_range_vector
+    utils::chunked_vector<dht::partition_range>
    get_ranges_for_invalidation(const std::vector<sstables::shared_sstable>& sstables) {
        // If owned ranges is disengaged, it means no cleanup work was done and
        // so nothing needs to be invalidated.
        if (!_owned_ranges) {
-            return dht::partition_range_vector{};
+            return {};
        }
-        auto owned_ranges = dht::to_partition_ranges(*_owned_ranges, utils::can_yield::yes);
+        auto owned_ranges = dht::to_partition_ranges_chunked(*_owned_ranges).get();

        auto non_owned_ranges = sstables
                | std::views::transform([] (const sstables::shared_sstable& sst) {
            seastar::thread::maybe_yield();
            return dht::partition_range::make({sst->get_first_decorated_key(), true},
                                              {sst->get_last_decorated_key(), true});
-        })      | std::ranges::to<dht::partition_range_vector>();
+        })      | std::ranges::to<utils::chunked_vector<dht::partition_range>>();

-        return dht::subtract_ranges(*_schema, non_owned_ranges, std::move(owned_ranges)).get();
+        return dht::subtract_ranges(*_schema, std::move(non_owned_ranges), std::move(owned_ranges)).get();
    }
 protected:
    compaction(compaction_group_view& table_s, compaction_descriptor descriptor, compaction_data& cdata, compaction_progress_monitor& progress_monitor, use_backlog_tracker use_backlog_tracker)
@@ -649,9 +650,12 @@ protected:
        , _owned_ranges(std::move(descriptor.owned_ranges))
        , _sharder(descriptor.sharder)
        , _owned_ranges_checker(_owned_ranges ? std::optional<dht::incremental_owned_ranges_checker>(*_owned_ranges) : std::nullopt)
-        , _tombstone_gc_state_with_commitlog_check_disabled(descriptor.gc_check_only_compacting_sstables ? std::make_optional(_table_s.get_tombstone_gc_state().with_commitlog_check_disabled()) : std::nullopt)
+        , _tombstone_gc_state(_table_s.get_tombstone_gc_state())
        , _progress_monitor(progress_monitor)
    {
+        if (descriptor.gc_check_only_compacting_sstables) {
+            _tombstone_gc_state = _tombstone_gc_state.with_commitlog_check_disabled();
+        }
        std::unordered_set<sstables::run_id> ssts_run_ids;
        _contains_multi_fragment_runs = std::any_of(_sstables.begin(), _sstables.end(), [&ssts_run_ids] (sstables::shared_sstable& sst) {
            return !ssts_run_ids.insert(sst->run_identifier()).second;
@@ -718,8 +722,8 @@ protected:

    compaction_completion_desc
    get_compaction_completion_desc(std::vector<sstables::shared_sstable> input_sstables, std::vector<sstables::shared_sstable> output_sstables) {
-        auto ranges_for_for_invalidation = get_ranges_for_invalidation(input_sstables);
-        return compaction_completion_desc{std::move(input_sstables), std::move(output_sstables), std::move(ranges_for_for_invalidation)};
+        auto ranges = get_ranges_for_invalidation(input_sstables);
+        return compaction_completion_desc{std::move(input_sstables), std::move(output_sstables), std::move(ranges)};
    }

    // Tombstone expiration is enabled based on the presence of sstable set.
@@ -849,8 +853,8 @@ private:
        return _table_s.get_compaction_strategy().make_sstable_set(_table_s);
    }

-    const tombstone_gc_state& get_tombstone_gc_state() const {
-        return _tombstone_gc_state_with_commitlog_check_disabled ? _tombstone_gc_state_with_commitlog_check_disabled.value() : _table_s.get_tombstone_gc_state();
+    tombstone_gc_state get_tombstone_gc_state() const {
+        return _tombstone_gc_state;
    }

    future<> setup() {
@@ -1050,7 +1054,7 @@ private:
            return can_never_purge;
        }
        return [this] (const dht::decorated_key& dk, is_shadowable is_shadowable) {
-            return get_max_purgeable_timestamp(_table_s, *_selector, _compacting_for_max_purgeable_func, dk, _bloom_filter_checks, _compacting_max_timestamp, _tombstone_gc_state_with_commitlog_check_disabled.has_value(), is_shadowable);
+            return get_max_purgeable_timestamp(_table_s, *_selector, _compacting_for_max_purgeable_func, dk, _bloom_filter_checks, _compacting_max_timestamp, !_tombstone_gc_state.is_commitlog_check_enabled(), is_shadowable);
        };
    }

@@ -2048,6 +2052,7 @@ compaction_type compaction_type_options::type() const {
        compaction_type::Reshape,
        compaction_type::Split,
        compaction_type::Major,
+        compaction_type::RewriteComponent,
    };
    static_assert(std::variant_size_v<compaction_type_options::options_variant> == std::size(index_to_type));
    return index_to_type[_options.index()];
@@ -2084,6 +2089,9 @@ static std::unique_ptr<compaction> make_compaction(compaction_group_view& table_
        std::unique_ptr<compaction> operator()(compaction_type_options::split split_options) {
            return std::make_unique<split_compaction>(table_s, std::move(descriptor), cdata, std::move(split_options), progress_monitor);
        }
+        std::unique_ptr<compaction> operator()(compaction_type_options::component_rewrite) {
+            throw std::runtime_error("component_rewrite compaction should be handled separately");
+        }
    } visitor_factory{table_s, std::move(descriptor), cdata, progress_monitor};

    return descriptor.options.visit(visitor_factory);
@@ -2101,7 +2109,7 @@ static future<compaction_result> scrub_sstables_validate_mode(compaction_descrip

        validation_errors += co_await sst->validate(permit, cdata.abort, [&schema] (sstring what) {
            scrub_compaction::report_validation_error(compaction_type::Scrub, *schema, what);
-        }, monitor_generator(sst));
+        }, monitor_generator(sst), true);
        // Did validation actually finish because aborted?
        if (cdata.is_stop_requested()) {
            // Compaction manager will catch this exception and re-schedule the compaction.
@@ -2138,6 +2146,34 @@ future<compaction_result> scrub_sstables_validate_mode(compaction_descriptor des
    co_return res;
 }

+future<compaction_result> rewrite_sstables_component(compaction_descriptor descriptor, compaction_group_view& table_s) {
+    return seastar::async([descriptor = std::move(descriptor), &table_s] () mutable {
+        compaction_result result {
+            .stats = {
+                .started_at = db_clock::now(),
+            },
+        };
+
+        const auto& options = descriptor.options.as<compaction_type_options::component_rewrite>();
+        bool update_id = static_cast<bool>(options.update_id);
+        // When rewriting a component, we cannot use the standard descriptor creator
+        // because we must preserve the sstable version.
+        auto creator = [&table_s] (sstables::shared_sstable sst) {
+            return table_s.make_sstable(sst->state(), sst->get_version());
+        };
+        result.new_sstables.reserve(descriptor.sstables.size());
+        for (auto& sst : descriptor.sstables) {
+            auto rewritten = sst->link_with_rewritten_component(creator, options.component_to_rewrite, options.modifier, update_id).get();
+            result.new_sstables.push_back(rewritten);
+        }
+
+        descriptor.replacer({std::move(descriptor.sstables), result.new_sstables});
+
+        result.stats.ended_at = db_clock::now();
+        return result;
+    });
+}
+
 future<compaction_result>
 compact_sstables(compaction_descriptor descriptor, compaction_data& cdata, compaction_group_view& table_s, compaction_progress_monitor& progress_monitor) {
    if (descriptor.sstables.empty()) {
@@ -2149,6 +2185,9 @@ compact_sstables(compaction_descriptor descriptor, compaction_data& cdata, compa
        // Bypass the usual compaction machinery for dry-mode scrub
        return scrub_sstables_validate_mode(std::move(descriptor), cdata, table_s, progress_monitor);
    }
+    if (descriptor.options.type() == compaction_type::RewriteComponent) {
+        return rewrite_sstables_component(std::move(descriptor), table_s);
+    }
    return compaction::run(make_compaction(table_s, std::move(descriptor), cdata, progress_monitor));
 }

--- a/compaction/compaction_descriptor.hh
+++ b/compaction/compaction_descriptor.hh
@@ -12,10 +12,12 @@
 #include <functional>
 #include <optional>
 #include <variant>
+#include "sstables/component_type.hh"
 #include "sstables/types_fwd.hh"
 #include "sstables/sstable_set.hh"
 #include "compaction_fwd.hh"
 #include "mutation_writer/token_group_based_splitting_writer.hh"
+#include "utils/chunked_vector.hh"

 namespace compaction {

@@ -30,6 +32,7 @@ enum class compaction_type {
    Reshape = 7,
    Split = 8,
    Major = 9,
+    RewriteComponent = 10,
 };

 struct compaction_completion_desc {
@@ -38,7 +41,7 @@ struct compaction_completion_desc {
    // New, fresh SSTables that should be added to SSTable set, replacing the old ones.
    std::vector<sstables::shared_sstable> new_sstables;
    // Set of compacted partition ranges that should be invalidated in the cache.
-    dht::partition_range_vector ranges_for_cache_invalidation;
+    utils::chunked_vector<dht::partition_range> ranges_for_cache_invalidation;
 };

 // creates a new SSTable for a given shard
@@ -90,8 +93,15 @@ public:
    struct split {
        mutation_writer::classify_by_token_group classifier;
    };
+    struct component_rewrite {
+        sstables::component_type component_to_rewrite;
+        std::function<void(sstables::sstable&)> modifier;
+
+        using update_sstable_id = bool_class<class update_sstable_id_tag>;
+        update_sstable_id update_id = update_sstable_id::yes;
+    };
 private:
-    using options_variant = std::variant<regular, cleanup, upgrade, scrub, reshard, reshape, split, major>;
+    using options_variant = std::variant<regular, cleanup, upgrade, scrub, reshard, reshape, split, major, component_rewrite>;

 private:
    options_variant _options;
@@ -129,6 +139,10 @@ public:
        return compaction_type_options(scrub{.operation_mode = mode, .quarantine_sstables = quarantine_sstables, .drop_unfixable = drop_unfixable_sstables});
    }

+    static compaction_type_options make_component_rewrite(component_type component, std::function<void(sstables::sstable&)> modifier, component_rewrite::update_sstable_id update_id = component_rewrite::update_sstable_id::yes) {
+        return compaction_type_options(component_rewrite{.component_to_rewrite = component, .modifier = std::move(modifier), .update_id = update_id});
+    }
+
    static compaction_type_options make_split(mutation_writer::classify_by_token_group classifier) {
        return compaction_type_options(split{std::move(classifier)});
    }
--- a/compaction/compaction_group_view.hh
+++ b/compaction/compaction_group_view.hh
@@ -46,6 +46,7 @@ public:
    virtual reader_permit make_compaction_reader_permit() const = 0;
    virtual sstables::sstables_manager& get_sstables_manager() noexcept = 0;
    virtual sstables::shared_sstable make_sstable(sstables::sstable_state) const = 0;
+    virtual sstables::shared_sstable make_sstable(sstables::sstable_state, sstables::sstable_version_types) const = 0;
    virtual sstables::sstable_writer_config configure_writer(sstring origin) const = 0;
    virtual api::timestamp_type min_memtable_timestamp() const = 0;
    virtual api::timestamp_type min_memtable_live_timestamp() const = 0;
@@ -54,7 +55,7 @@ public:
    virtual future<> on_compaction_completion(compaction_completion_desc desc, sstables::offstrategy offstrategy) = 0;
    virtual bool is_auto_compaction_disabled_by_user() const noexcept = 0;
    virtual bool tombstone_gc_enabled() const noexcept = 0;
-    virtual const tombstone_gc_state& get_tombstone_gc_state() const noexcept = 0;
+    virtual tombstone_gc_state get_tombstone_gc_state() const noexcept = 0;
    virtual compaction_backlog_tracker& get_backlog_tracker() = 0;
    virtual const std::string get_group_id() const noexcept = 0;
    virtual seastar::condition_variable& get_staging_done_condition() noexcept = 0;
--- a/compaction/compaction_manager.cc
+++ b/compaction/compaction_manager.cc
@@ -778,6 +778,7 @@ compaction_manager::get_incremental_repair_read_lock(compaction::compaction_grou
        cmlog.debug("Get get_incremental_repair_read_lock for {} started", reason);
    }
    compaction::compaction_state& cs = get_compaction_state(&t);
+    auto gh = cs.gate.hold();
    auto ret = co_await cs.incremental_repair_lock.hold_read_lock();
    if (!reason.empty()) {
        cmlog.debug("Get get_incremental_repair_read_lock for {} done", reason);
@@ -791,6 +792,7 @@ compaction_manager::get_incremental_repair_write_lock(compaction::compaction_gro
        cmlog.debug("Get get_incremental_repair_write_lock for {} started", reason);
    }
    compaction::compaction_state& cs = get_compaction_state(&t);
+    auto gh = cs.gate.hold();
    auto ret = co_await cs.incremental_repair_lock.hold_write_lock();
    if (!reason.empty()) {
        cmlog.debug("Get get_incremental_repair_write_lock for {} done", reason);
@@ -944,7 +946,7 @@ sstables::shared_sstable sstables_task_executor::consume_sstable() {
    auto sst = _sstables.back();
    _sstables.pop_back();
    --_cm._stats.pending_tasks; // from this point on, switch_state(pending|active) works the same way as any other task
-    cmlog.debug("{}", format("consumed {}", sst->get_filename()));
+    cmlog.debug("consumed {}", sst->get_filename());
    return sst;
 }

@@ -1040,7 +1042,7 @@ compaction_manager::compaction_manager(config cfg, abort_source& as, tasks::task
        _compaction_controller.set_max_shares(max_shares);
    }))
    , _strategy_control(std::make_unique<strategy_control>(*this))
-    , _tombstone_gc_state(_shared_tombstone_gc_state) {
+{
    tm.register_module(_task_manager_module->get_name(), _task_manager_module);
    register_metrics();
    // Bandwidth throttling is node-wide, updater is needed on single shard
@@ -1064,7 +1066,7 @@ compaction_manager::compaction_manager(tasks::task_manager& tm)
    , _compaction_static_shares_observer(_cfg.static_shares.observe(_update_compaction_static_shares_action.make_observer()))
    , _compaction_max_shares_observer(_cfg.max_shares.observe([] (const float& max_shares) {}))
    , _strategy_control(std::make_unique<strategy_control>(*this))
-    , _tombstone_gc_state(_shared_tombstone_gc_state) {
+{
    tm.register_module(_task_manager_module->get_name(), _task_manager_module);
    // No metric registration because this constructor is supposed to be used only by the testing
    // infrastructure.
@@ -1206,7 +1208,6 @@ future<> compaction_manager::await_tasks(std::vector<shared_ptr<compaction_task_

 std::vector<shared_ptr<compaction_task_executor>>
 compaction_manager::do_stop_ongoing_compactions(sstring reason, std::function<bool(const compaction_group_view*)> filter, std::optional<compaction_type> type_opt) noexcept {
-    auto ongoing_compactions = get_compactions(filter).size();
    auto tasks = _tasks
            | std::views::filter([&filter, type_opt] (const auto& task) {
                return filter(task.compacting_table()) && (!type_opt || task.compaction_type() == *type_opt);
@@ -1215,6 +1216,7 @@ compaction_manager::do_stop_ongoing_compactions(sstring reason, std::function<bo
            | std::ranges::to<std::vector<shared_ptr<compaction_task_executor>>>();
    logging::log_level level = tasks.empty() ? log_level::debug : log_level::info;
    if (cmlog.is_enabled(level)) {
+        auto ongoing_compactions = get_compactions(filter).size();
        std::string scope = "";
        if (!tasks.empty()) {
            const compaction_group_view* t = tasks.front()->compacting_table();
@@ -1266,9 +1268,15 @@ future<> compaction_manager::start(const db::config& cfg, utils::disk_space_moni
    if (dsm && (this_shard_id() == 0)) {
        _out_of_space_subscription = dsm->subscribe(cfg.critical_disk_utilization_level, [this] (auto threshold_reached) {
            if (threshold_reached) {
-                return container().invoke_on_all([] (compaction_manager& cm) { return cm.drain(); });
+                return container().invoke_on_all([] (compaction_manager& cm) {
+                    cm._in_critical_disk_utilization_mode = true;
+                    return cm.drain();
+                });
            }
-            return container().invoke_on_all([] (compaction_manager& cm) { cm.enable(); });
+            return container().invoke_on_all([] (compaction_manager& cm) {
+                cm._in_critical_disk_utilization_mode = false;
+                cm.enable();
+            });
        });
    }

@@ -1418,11 +1426,17 @@ protected:
            compaction_strategy cs = t.get_compaction_strategy();
            compaction_descriptor descriptor = co_await cs.get_sstables_for_compaction(t, _cm.get_strategy_control());
            int weight = calculate_weight(descriptor);
-            cmlog.debug("Started minor compaction sstables={} sstables_reapired_at={} range={} uuid={} compaction_uuid={}",
-                    descriptor.sstables, compacting_table()->get_sstables_repaired_at(),
-                    compacting_table()->token_range(), uuid, _compaction_data.compaction_uuid);
+            bool debug_enabled = cmlog.is_enabled(log_level::debug);
+            if (debug_enabled) {
+                cmlog.debug("Started minor compaction sstables={} sstables_reapired_at={} range={} uuid={} compaction_uuid={}",
+                        descriptor.sstables, compacting_table()->get_sstables_repaired_at(),
+                        compacting_table()->token_range(), uuid, _compaction_data.compaction_uuid);
+            }

-            auto old_sstables = ::format("{}", descriptor.sstables);
+            sstring old_sstables;
+            if (debug_enabled) {
+                old_sstables = ::format("{}", descriptor.sstables);
+            }

            if (descriptor.sstables.empty() || !can_proceed() || t.is_auto_compaction_disabled_by_user()) {
                cmlog.debug("{}: sstables={} can_proceed={} auto_compaction={}", *this, descriptor.sstables.size(), can_proceed(), t.is_auto_compaction_disabled_by_user());
@@ -1452,8 +1466,10 @@ protected:
            try {
                bool should_update_history = this->should_update_history(descriptor.options.type());
                compaction_result res = co_await compact_sstables(std::move(descriptor), _compaction_data, on_replace);
-                cmlog.debug("Finished minor compaction old_sstables={} new_sstables={} sstables_reapired_at={} range={} uuid={} compaction_uuid={}",
-                        old_sstables, res.new_sstables, compacting_table()->get_sstables_repaired_at(), compacting_table()->token_range(), uuid, _compaction_data.compaction_uuid);
+                if (debug_enabled) {
+                    cmlog.debug("Finished minor compaction old_sstables={} new_sstables={} sstables_reapired_at={} range={} uuid={} compaction_uuid={}",
+                            old_sstables, res.new_sstables, compacting_table()->get_sstables_repaired_at(), compacting_table()->token_range(), uuid, _compaction_data.compaction_uuid);
+                }
                finish_compaction();
                if (should_update_history) {
                    // update_history can take a long time compared to
@@ -1786,6 +1802,41 @@ protected:
    }
 };

+class rewrite_sstables_component_compaction_task_executor final : public rewrite_sstables_compaction_task_executor {
+    std::unordered_map<sstables::shared_sstable, sstables::shared_sstable>& _rewritten_sstables;
+public:
+    rewrite_sstables_component_compaction_task_executor(compaction_manager& mgr,
+                                       throw_if_stopping do_throw_if_stopping,
+                                       compaction_group_view* t,
+                                       tasks::task_id parent_id,
+                                       compaction_type_options options,
+                                       std::vector<sstables::shared_sstable> sstables,
+                                       compacting_sstable_registration compacting,
+                                       std::unordered_map<sstables::shared_sstable, sstables::shared_sstable>& rewritten_sstables)
+            : rewrite_sstables_compaction_task_executor(mgr, do_throw_if_stopping, t, parent_id, options, {},
+                std::move(sstables), std::move(compacting), compaction_manager::can_purge_tombstones::no, "component_rewrite"),
+            _rewritten_sstables(rewritten_sstables)
+    {}
+protected:
+    virtual future<compaction_manager::compaction_stats_opt> do_run() override {
+        compaction_stats stats{};
+
+        switch_state(state::pending);
+        auto maintenance_permit = co_await acquire_semaphore(_cm._maintenance_ops_sem);
+
+        while (!_sstables.empty()) {
+            auto sst = consume_sstable();
+            auto it = _rewritten_sstables.emplace(sst, sstables::shared_sstable{}).first;
+            auto res = co_await rewrite_sstable(std::move(sst));
+            _cm._validation_errors += res.stats.validation_errors;
+            stats += res.stats;
+            it->second = std::move(res.new_sstables.front());
+        }
+
+        co_return stats;
+    }
+};
+
 class split_compaction_task_executor final : public rewrite_sstables_compaction_task_executor {
    compaction_type_options::split _opt;
 public:
@@ -1899,6 +1950,28 @@ compaction_manager::rewrite_sstables(compaction_group_view& t, compaction_type_o
    return perform_task_on_all_files<rewrite_sstables_compaction_task_executor>("rewrite", info, t, std::move(options), std::move(owned_ranges_ptr), std::move(get_func), throw_if_stopping::no, can_purge, std::move(options_desc));
 }

+future<compaction_manager::compaction_stats_opt>
+compaction_manager::rewrite_sstables_component(compaction_group_view& t,
+                                     std::vector<sstables::shared_sstable>& sstables,
+                                     compaction_type_options options,
+                                     std::unordered_map<sstables::shared_sstable, sstables::shared_sstable>& rewritten_sstables,
+                                     tasks::task_info info) {
+    auto gh = start_compaction(t);
+    if (!gh) {
+        co_return std::nullopt;
+    }
+
+    if (sstables.empty()) {
+        co_return std::nullopt;
+    }
+
+    compacting_sstable_registration compacting(*this, get_compaction_state(&t));
+    compacting.register_compacting(sstables);
+
+    co_return co_await perform_compaction<rewrite_sstables_component_compaction_task_executor>(throw_if_stopping::no, info, &t, info.id,
+        std::move(options), std::move(sstables), std::move(compacting), rewritten_sstables);
+}
+
 class validate_sstables_compaction_task_executor : public sstables_task_executor {
    compaction_manager::quarantine_invalid_sstables _quarantine_sstables;
 public:
@@ -2289,6 +2362,16 @@ future<compaction_manager::compaction_stats_opt> compaction_manager::perform_spl
    return perform_task_on_all_files<split_compaction_task_executor>("split", info, t, std::move(options), std::move(owned_ranges_ptr), std::move(get_sstables), throw_if_stopping::no);
 }

+std::exception_ptr compaction_manager::make_disabled_exception(compaction::compaction_group_view& cg) {
+    std::exception_ptr ex;
+    if (_in_critical_disk_utilization_mode) {
+        ex = std::make_exception_ptr(std::runtime_error("critical disk utilization"));
+    } else {
+        ex = std::make_exception_ptr(compaction_stopped_exception(cg.schema()->ks_name(), cg.schema()->cf_name(), "compaction disabled"));
+    }
+    return ex;
+}
+
 future<std::vector<sstables::shared_sstable>>
 compaction_manager::maybe_split_new_sstable(sstables::shared_sstable sst, compaction_group_view& t, compaction_type_options::split opt) {
    if (!split_compaction_task_executor::sstable_needs_split(sst, opt)) {
@@ -2298,8 +2381,7 @@ compaction_manager::maybe_split_new_sstable(sstables::shared_sstable sst, compac
    // We don't want to prevent split because compaction is temporarily disabled on a view only for synchronization,
    // which is unneeded against new sstables that aren't part of any set yet, so never use can_proceed(&t) here.
    if (is_disabled()) {
-        co_return coroutine::exception(std::make_exception_ptr(std::runtime_error(format("Cannot split {} because manager has compaction disabled, " \
-                                                                                         "reason might be out of space prevention", sst->get_filename()))));
+        co_return coroutine::exception(make_disabled_exception(t));
    }
    std::vector<sstables::shared_sstable> ret;

@@ -2323,6 +2405,18 @@ compaction_manager::maybe_split_new_sstable(sstables::shared_sstable sst, compac
    co_return ret;
 }

+future<std::unordered_map<sstables::shared_sstable, sstables::shared_sstable>> compaction_manager::perform_component_rewrite(compaction::compaction_group_view& t,
+            tasks::task_info info,
+            std::vector<sstables::shared_sstable> sstables,
+            sstables::component_type component,
+            std::function<void(sstables::sstable&)> modifier,
+            compaction_type_options::component_rewrite::update_sstable_id update_id) {
+    std::unordered_map<sstables::shared_sstable, sstables::shared_sstable> rewritten_sstables;
+    rewritten_sstables.reserve(sstables.size());
+    co_await rewrite_sstables_component(t, sstables, compaction_type_options::make_component_rewrite(component, std::move(modifier), update_id), rewritten_sstables, info);
+    co_return rewritten_sstables;
+}
+
 // Submit a table to be scrubbed and wait for its termination.
 future<compaction_manager::compaction_stats_opt> compaction_manager::perform_sstable_scrub(compaction_group_view& t, compaction_type_options::scrub opts, tasks::task_info info) {
    auto scrub_mode = opts.operation_mode;
@@ -2387,6 +2481,8 @@ future<> compaction_manager::remove(compaction_group_view& t, sstring reason) no
    if (!c_state.gate.is_closed()) {
        auto close_gate = c_state.gate.close();
        co_await stop_ongoing_compactions(reason, &t);
+        // Wait for users of incremental repair lock (can be either repair itself or maintenance compactions).
+        co_await c_state.incremental_repair_lock.write_lock();
        co_await std::move(close_gate);
    }

--- a/compaction/compaction_manager.hh
+++ b/compaction/compaction_manager.hh
@@ -55,6 +55,7 @@ class custom_compaction_task_executor;
 class regular_compaction_task_executor;
 class offstrategy_compaction_task_executor;
 class rewrite_sstables_compaction_task_executor;
+class rewrite_sstables_component_compaction_task_executor;
 class split_compaction_task_executor;
 class cleanup_sstables_compaction_task_executor;
 class validate_sstables_compaction_task_executor;
@@ -114,6 +115,8 @@ private:
    uint32_t _disabled_state_count = 0;

    bool is_disabled() const { return _state != state::running || _disabled_state_count > 0; }
+    // precondition: is_disabled() is true.
+    std::exception_ptr make_disabled_exception(compaction::compaction_group_view& cg);

    std::optional<future<>> _stop_future;

@@ -167,12 +170,9 @@ private:
    std::unique_ptr<strategy_control> _strategy_control;

    shared_tombstone_gc_state _shared_tombstone_gc_state;
-    // TODO: tombstone_gc_state should now have value semantics, but the code
-    // still uses it with reference semantics (inconsistently though).
-    // Drop this member, once the code is converted into using value semantics.
-    tombstone_gc_state _tombstone_gc_state;

    utils::disk_space_monitor::subscription _out_of_space_subscription;
+    bool _in_critical_disk_utilization_mode = false;
 private:
    // Requires task->_compaction_state.gate to be held and task to be registered in _tasks.
    future<compaction_stats_opt> perform_task(shared_ptr<compaction::compaction_task_executor> task, throw_if_stopping do_throw_if_stopping);
@@ -256,6 +256,12 @@ private:
    future<compaction_stats_opt> rewrite_sstables(compaction::compaction_group_view& t, compaction_type_options options, owned_ranges_ptr, get_candidates_func, tasks::task_info info,
                                                  can_purge_tombstones can_purge = can_purge_tombstones::yes, sstring options_desc = "");

+    future<compaction_stats_opt> rewrite_sstables_component(compaction_group_view& t,
+                                                            std::vector<sstables::shared_sstable>& sstables,
+                                                            compaction_type_options options,
+                                                            std::unordered_map<sstables::shared_sstable, sstables::shared_sstable>& rewritten_sstables,
+                                                            tasks::task_info info);
+
    // Stop all fibers, without waiting. Safe to be called multiple times.
    void do_stop() noexcept;
    future<> really_do_stop() noexcept;
@@ -364,6 +370,13 @@ public:
    // Submit a table to be scrubbed and wait for its termination.
    future<compaction_stats_opt> perform_sstable_scrub(compaction::compaction_group_view& t, compaction_type_options::scrub opts, tasks::task_info info);

+    future<std::unordered_map<sstables::shared_sstable, sstables::shared_sstable>> perform_component_rewrite(compaction::compaction_group_view& t,
+            tasks::task_info info,
+            std::vector<sstables::shared_sstable> sstables,
+            sstables::component_type component,
+            std::function<void(sstables::sstable&)> modifier,
+            compaction_type_options::component_rewrite::update_sstable_id update_id = compaction_type_options::component_rewrite::update_sstable_id::yes);
+
    // Submit a table for major compaction.
    future<> perform_major_compaction(compaction::compaction_group_view& t, tasks::task_info info, bool consider_only_existing_data = false);

@@ -456,10 +469,6 @@ public:

    compaction::strategy_control& get_strategy_control() const noexcept;

-    const tombstone_gc_state& get_tombstone_gc_state() const noexcept {
-        return _tombstone_gc_state;
-    };
-
    shared_tombstone_gc_state& get_shared_tombstone_gc_state() noexcept {
        return _shared_tombstone_gc_state;
    };
@@ -489,6 +498,7 @@ public:
    friend class compaction::regular_compaction_task_executor;
    friend class compaction::offstrategy_compaction_task_executor;
    friend class compaction::rewrite_sstables_compaction_task_executor;
+    friend class compaction::rewrite_sstables_component_compaction_task_executor;
    friend class compaction::cleanup_sstables_compaction_task_executor;
    friend class compaction::validate_sstables_compaction_task_executor;
    friend compaction_reenabler;
--- a/compaction/leveled_compaction_strategy.cc
+++ b/compaction/leveled_compaction_strategy.cc
@@ -33,8 +33,10 @@ future<compaction_descriptor> leveled_compaction_strategy::get_sstables_for_comp
    auto candidate = manifest.get_compaction_candidates(*state->last_compacted_keys, state->compaction_counter);

    if (!candidate.sstables.empty()) {
-        auto main_set = co_await table_s.main_sstable_set();
-        leveled_manifest::logger.debug("leveled: Compacting {} out of {} sstables", candidate.sstables.size(), main_set->size());
+        if (leveled_manifest::logger.is_enabled(logging::log_level::debug)) {
+            auto main_set = co_await table_s.main_sstable_set();
+            leveled_manifest::logger.debug("leveled: Compacting {} out of {} sstables", candidate.sstables.size(), main_set->size());
+        }
        co_return candidate;
    }

--- a/compaction/time_window_compaction_strategy.cc
+++ b/compaction/time_window_compaction_strategy.cc
@@ -15,6 +15,7 @@
 #include "compaction_strategy_state.hh"
 #include "utils/error_injection.hh"

+#include <seastar/util/lazy.hh>
 #include <ranges>

 namespace compaction {
@@ -28,12 +29,12 @@ time_window_compaction_strategy_state_ptr time_window_compaction_strategy::get_s
 }

 const std::unordered_map<sstring, std::chrono::seconds> time_window_compaction_strategy_options::valid_window_units = {
-    { "MINUTES", 60s }, { "HOURS", 3600s }, { "DAYS", 86400s }
-};
+        {"MINUTES", 60s}, {"HOURS", 3600s}, {"DAYS", 86400s}};

-const std::unordered_map<sstring, time_window_compaction_strategy_options::timestamp_resolutions> time_window_compaction_strategy_options::valid_timestamp_resolutions = {
-    { "MICROSECONDS", timestamp_resolutions::microsecond },
-    { "MILLISECONDS", timestamp_resolutions::millisecond },
+const std::unordered_map<sstring, time_window_compaction_strategy_options::timestamp_resolutions>
+        time_window_compaction_strategy_options::valid_timestamp_resolutions = {
+                {"MICROSECONDS", timestamp_resolutions::microsecond},
+                {"MILLISECONDS", timestamp_resolutions::millisecond},
 };

 static std::chrono::seconds validate_compaction_window_unit(const std::map<sstring, sstring>& options) {
@@ -43,7 +44,8 @@ static std::chrono::seconds validate_compaction_window_unit(const std::map<sstri
    if (tmp_value) {
        auto valid_window_units_it = time_window_compaction_strategy_options::valid_window_units.find(tmp_value.value());
        if (valid_window_units_it == time_window_compaction_strategy_options::valid_window_units.end()) {
-            throw exceptions::configuration_exception(fmt::format("Invalid window unit {} for {}", tmp_value.value(), time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY));
+            throw exceptions::configuration_exception(
+                    fmt::format("Invalid window unit {} for {}", tmp_value.value(), time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY));
        }
        window_unit = valid_window_units_it->second;
    }
@@ -59,10 +61,12 @@ static std::chrono::seconds validate_compaction_window_unit(const std::map<sstri

 static int validate_compaction_window_size(const std::map<sstring, sstring>& options) {
    auto tmp_value = compaction_strategy_impl::get_value(options, time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY);
-    int window_size = cql3::statements::property_definitions::to_long(time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, tmp_value, time_window_compaction_strategy_options::DEFAULT_COMPACTION_WINDOW_SIZE);
+    int window_size = cql3::statements::property_definitions::to_long(time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, tmp_value,
+            time_window_compaction_strategy_options::DEFAULT_COMPACTION_WINDOW_SIZE);

    if (window_size <= 0) {
-        throw exceptions::configuration_exception(fmt::format("{} value ({}) must be greater than 1", time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, window_size));
+        throw exceptions::configuration_exception(
+                fmt::format("{} value ({}) must be greater than 1", time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, window_size));
    }

    return window_size;
@@ -82,26 +86,30 @@ static db_clock::duration validate_expired_sstable_check_frequency_seconds(const
        try {
            expired_sstable_check_frequency = std::chrono::seconds(std::stol(tmp_value.value()));
        } catch (const std::exception& e) {
-            throw exceptions::syntax_exception(fmt::format("Invalid long value {} for {}", tmp_value.value(), time_window_compaction_strategy_options::EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY));
+            throw exceptions::syntax_exception(fmt::format(
+                    "Invalid long value {} for {}", tmp_value.value(), time_window_compaction_strategy_options::EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY));
        }
    }

    return expired_sstable_check_frequency;
 }

-static db_clock::duration validate_expired_sstable_check_frequency_seconds(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
+static db_clock::duration validate_expired_sstable_check_frequency_seconds(
+        const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
    db_clock::duration expired_sstable_check_frequency = validate_expired_sstable_check_frequency_seconds(options);
    unchecked_options.erase(time_window_compaction_strategy_options::EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY);
    return expired_sstable_check_frequency;
 }

 static time_window_compaction_strategy_options::timestamp_resolutions validate_timestamp_resolution(const std::map<sstring, sstring>& options) {
-    time_window_compaction_strategy_options::timestamp_resolutions timestamp_resolution = time_window_compaction_strategy_options::timestamp_resolutions::microsecond;
+    time_window_compaction_strategy_options::timestamp_resolutions timestamp_resolution =
+            time_window_compaction_strategy_options::timestamp_resolutions::microsecond;

    auto tmp_value = compaction_strategy_impl::get_value(options, time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY);
    if (tmp_value) {
        if (!time_window_compaction_strategy_options::valid_timestamp_resolutions.contains(tmp_value.value())) {
-            throw exceptions::configuration_exception(fmt::format("Invalid timestamp resolution {} for {}", tmp_value.value(), time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY));
+            throw exceptions::configuration_exception(fmt::format(
+                    "Invalid timestamp resolution {} for {}", tmp_value.value(), time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY));
        } else {
            timestamp_resolution = time_window_compaction_strategy_options::valid_timestamp_resolutions.at(tmp_value.value());
        }
@@ -110,7 +118,8 @@ static time_window_compaction_strategy_options::timestamp_resolutions validate_t
    return timestamp_resolution;
 }

-static time_window_compaction_strategy_options::timestamp_resolutions validate_timestamp_resolution(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
+static time_window_compaction_strategy_options::timestamp_resolutions validate_timestamp_resolution(
+        const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
    time_window_compaction_strategy_options::timestamp_resolutions timestamp_resolution = validate_timestamp_resolution(options);
    unchecked_options.erase(time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY);
    return timestamp_resolution;
@@ -145,7 +154,7 @@ void time_window_compaction_strategy_options::validate(const std::map<sstring, s
    compaction_strategy_impl::validate_min_max_threshold(options, unchecked_options);

    auto it = options.find("enable_optimized_twcs_queries");
-    if (it != options.end() && it->second != "true"  && it->second != "false") {
+    if (it != options.end() && it->second != "true" && it->second != "false") {
        throw exceptions::configuration_exception(fmt::format("enable_optimized_twcs_queries value ({}) must be \"true\" or \"false\"", it->second));
    }
    unchecked_options.erase("enable_optimized_twcs_queries");
@@ -162,7 +171,9 @@ class classify_by_timestamp {
    std::vector<int64_t> _known_windows;

 public:
-    explicit classify_by_timestamp(time_window_compaction_strategy_options options) : _options(std::move(options)) { }
+    explicit classify_by_timestamp(time_window_compaction_strategy_options options)
+        : _options(std::move(options)) {
+    }
    int64_t operator()(api::timestamp_type ts) {
        const auto window = time_window_compaction_strategy::get_window_for(_options, ts);
        if (const auto it = std::ranges::find(_known_windows, window); it != _known_windows.end()) {
@@ -190,7 +201,7 @@ uint64_t time_window_compaction_strategy::adjust_partition_estimate(const mutati
    auto estimated_window_count = max_data_segregation_window_count;
    auto default_ttl = std::chrono::duration_cast<std::chrono::microseconds>(s->default_time_to_live());
    bool min_and_max_ts_available = ms_meta.min_timestamp && ms_meta.max_timestamp;
-    auto estimate_window_count = [this] (timestamp_type min_window, timestamp_type max_window) {
+    auto estimate_window_count = [this](timestamp_type min_window, timestamp_type max_window) {
        const auto window_size = get_window_size(_options);
        return (max_window + (window_size - 1) - min_window) / window_size;
    };
@@ -210,21 +221,19 @@ uint64_t time_window_compaction_strategy::adjust_partition_estimate(const mutati
    return partition_estimate / std::max(1UL, uint64_t(estimated_window_count));
 }

-mutation_reader_consumer time_window_compaction_strategy::make_interposer_consumer(const mutation_source_metadata& ms_meta, mutation_reader_consumer end_consumer) const {
-    if (ms_meta.min_timestamp && ms_meta.max_timestamp
-            && get_window_for(_options, *ms_meta.min_timestamp) == get_window_for(_options, *ms_meta.max_timestamp)) {
+mutation_reader_consumer time_window_compaction_strategy::make_interposer_consumer(
+        const mutation_source_metadata& ms_meta, mutation_reader_consumer end_consumer) const {
+    if (ms_meta.min_timestamp && ms_meta.max_timestamp &&
+            get_window_for(_options, *ms_meta.min_timestamp) == get_window_for(_options, *ms_meta.max_timestamp)) {
        return end_consumer;
    }
-    return [options = _options, end_consumer = std::move(end_consumer)] (mutation_reader rd) mutable -> future<> {
-        return mutation_writer::segregate_by_timestamp(
-                std::move(rd),
-                classify_by_timestamp(std::move(options)),
-                end_consumer);
+    return [options = _options, end_consumer = std::move(end_consumer)](mutation_reader rd) mutable -> future<> {
+        return mutation_writer::segregate_by_timestamp(std::move(rd), classify_by_timestamp(std::move(options)), end_consumer);
    };
 }

-compaction_descriptor
-time_window_compaction_strategy::get_reshaping_job(std::vector<sstables::shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
+compaction_descriptor time_window_compaction_strategy::get_reshaping_job(
+        std::vector<sstables::shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
    auto mode = cfg.mode;
    std::vector<sstables::shared_sstable> single_window;
    std::vector<sstables::shared_sstable> multi_window;
@@ -239,7 +248,7 @@ time_window_compaction_strategy::get_reshaping_job(std::vector<sstables::shared_

    // Sort input sstables by first_key order
    // to allow efficient reshaping of disjoint sstables.
-    std::sort(input.begin(), input.end(), [&schema] (const sstables::shared_sstable& a, const sstables::shared_sstable& b) {
+    std::sort(input.begin(), input.end(), [&schema](const sstables::shared_sstable& a, const sstables::shared_sstable& b) {
        return dht::ring_position(a->get_first_decorated_key()).less_compare(*schema, dht::ring_position(b->get_first_decorated_key()));
    });

@@ -253,31 +262,34 @@ time_window_compaction_strategy::get_reshaping_job(std::vector<sstables::shared_
        }
    }

-    auto is_disjoint = [&schema, mode, max_sstables] (const std::vector<sstables::shared_sstable>& ssts) {
+    auto is_disjoint = [&schema, mode, max_sstables](const std::vector<sstables::shared_sstable>& ssts) {
        size_t tolerance = (mode == reshape_mode::relaxed) ? max_sstables : 0;
        return sstable_set_overlapping_count(schema, ssts) <= tolerance;
    };

-    clogger.debug("time_window_compaction_strategy::get_reshaping_job: offstrategy_threshold={} max_sstables={} multi_window={} disjoint={} single_window={} disjoint={}",
-            offstrategy_threshold, max_sstables,
-            multi_window.size(), !multi_window.empty() && sstable_set_overlapping_count(schema, multi_window) == 0,
-            single_window.size(), !single_window.empty() && sstable_set_overlapping_count(schema, single_window) == 0);
+    clogger.debug("time_window_compaction_strategy::get_reshaping_job: offstrategy_threshold={} max_sstables={} multi_window={} disjoint={} "
+                  "single_window={} disjoint={}",
+            offstrategy_threshold, max_sstables, multi_window.size(), seastar::value_of([&] {
+                return !multi_window.empty() && sstable_set_overlapping_count(schema, multi_window) == 0;
+            }),
+            single_window.size(), seastar::value_of([&] {
+                return !single_window.empty() && sstable_set_overlapping_count(schema, single_window) == 0;
+            }));

-    auto get_job_size = [] (const std::vector<sstables::shared_sstable>& ssts) {
+    auto get_job_size = [](const std::vector<sstables::shared_sstable>& ssts) {
        return std::ranges::fold_left(ssts | std::views::transform(std::mem_fn(&sstables::sstable::bytes_on_disk)), uint64_t(0), std::plus{});
    };

    // Targets a space overhead of 10%. All disjoint sstables can be compacted together as long as they won't
    // cause an overhead above target. Otherwise, the job targets a maximum of #max_threshold sstables.
-    auto need_trimming = [&] (const std::vector<sstables::shared_sstable>& ssts, const uint64_t job_size, bool is_disjoint) {
+    auto need_trimming = [&](const std::vector<sstables::shared_sstable>& ssts, const uint64_t job_size, bool is_disjoint) {
        const size_t min_sstables = 2;
        auto is_above_target_size = job_size > target_job_size;

-        return (ssts.size() > max_sstables && !is_disjoint) ||
-               (ssts.size() > min_sstables && is_above_target_size);
+        return (ssts.size() > max_sstables && !is_disjoint) || (ssts.size() > min_sstables && is_above_target_size);
    };

-    auto maybe_trim_job = [&need_trimming] (std::vector<sstables::shared_sstable>& ssts, uint64_t job_size, bool is_disjoint) {
+    auto maybe_trim_job = [&need_trimming](std::vector<sstables::shared_sstable>& ssts, uint64_t job_size, bool is_disjoint) {
        while (need_trimming(ssts, job_size, is_disjoint)) {
            auto sst = ssts.back();
            ssts.pop_back();
@@ -294,7 +306,7 @@ time_window_compaction_strategy::get_reshaping_job(std::vector<sstables::shared_
            // For example, if there are N sstables spanning window W, where N <= 32, then we can produce all data for W
            // in a single compaction round, removing the need to later compact W to reduce its number of files.
            auto sort_size = std::min(max_sstables, multi_window.size());
-            std::ranges::partial_sort(multi_window, multi_window.begin() + sort_size, std::ranges::less(), [] (const sstables::shared_sstable &a) {
+            std::ranges::partial_sort(multi_window, multi_window.begin() + sort_size, std::ranges::less(), [](const sstables::shared_sstable& a) {
                return a->get_stats_metadata().max_timestamp;
            });
            maybe_trim_job(multi_window, job_size, disjoint);
@@ -334,8 +346,7 @@ time_window_compaction_strategy::get_reshaping_job(std::vector<sstables::shared_
    return compaction_descriptor();
 }

-future<compaction_descriptor>
-time_window_compaction_strategy::get_sstables_for_compaction(compaction_group_view& table_s, strategy_control& control) {
+future<compaction_descriptor> time_window_compaction_strategy::get_sstables_for_compaction(compaction_group_view& table_s, strategy_control& control) {
    auto state = get_state(table_s);
    auto compaction_time = gc_clock::now();
    auto candidates = co_await control.candidates(table_s);
@@ -369,10 +380,8 @@ time_window_compaction_strategy::get_sstables_for_compaction(compaction_group_vi
    co_return compaction_descriptor(std::move(compaction_candidates));
 }

-time_window_compaction_strategy::bucket_compaction_mode
-time_window_compaction_strategy::compaction_mode(const time_window_compaction_strategy_state& state,
-        const bucket_t& bucket, timestamp_type bucket_key,
-        timestamp_type now, size_t min_threshold) const {
+time_window_compaction_strategy::bucket_compaction_mode time_window_compaction_strategy::compaction_mode(
+        const time_window_compaction_strategy_state& state, const bucket_t& bucket, timestamp_type bucket_key, timestamp_type now, size_t min_threshold) const {
    // STCS will also be performed on older window buckets, to avoid a bad write and
    // space amplification when something like read repair cause small updates to
    // those past windows.
@@ -385,8 +394,7 @@ time_window_compaction_strategy::compaction_mode(const time_window_compaction_st
    return bucket_compaction_mode::none;
 }

-std::vector<sstables::shared_sstable>
-time_window_compaction_strategy::get_next_non_expired_sstables(compaction_group_view& table_s, strategy_control& control,
+std::vector<sstables::shared_sstable> time_window_compaction_strategy::get_next_non_expired_sstables(compaction_group_view& table_s, strategy_control& control,
        std::vector<sstables::shared_sstable> non_expiring_sstables, gc_clock::time_point compaction_time, time_window_compaction_strategy_state& state) {
    auto most_interesting = get_compaction_candidates(table_s, control, non_expiring_sstables, state);

@@ -400,31 +408,29 @@ time_window_compaction_strategy::get_next_non_expired_sstables(compaction_group_

    // if there is no sstable to compact in standard way, try compacting single sstable whose droppable tombstone
    // ratio is greater than threshold.
-    std::erase_if(non_expiring_sstables, [this, compaction_time, &table_s] (const sstables::shared_sstable& sst) -> bool {
+    std::erase_if(non_expiring_sstables, [this, compaction_time, &table_s](const sstables::shared_sstable& sst) -> bool {
        return !worth_dropping_tombstones(sst, compaction_time, table_s);
    });
    if (non_expiring_sstables.empty()) {
        return {};
    }
-    auto it = std::ranges::min_element(non_expiring_sstables, [] (auto& i, auto& j) {
+    auto it = std::ranges::min_element(non_expiring_sstables, [](auto& i, auto& j) {
        return i->get_stats_metadata().min_timestamp < j->get_stats_metadata().min_timestamp;
    });
-    return { *it };
+    return {*it};
 }

-std::vector<sstables::shared_sstable>
-time_window_compaction_strategy::get_compaction_candidates(compaction_group_view& table_s, strategy_control& control,
-    std::vector<sstables::shared_sstable> candidate_sstables, time_window_compaction_strategy_state& state) {
+std::vector<sstables::shared_sstable> time_window_compaction_strategy::get_compaction_candidates(compaction_group_view& table_s, strategy_control& control,
+        std::vector<sstables::shared_sstable> candidate_sstables, time_window_compaction_strategy_state& state) {
    auto [buckets, max_timestamp] = get_buckets(std::move(candidate_sstables), _options);
    // Update the highest window seen, if necessary
    state.highest_window_seen = std::max(state.highest_window_seen, max_timestamp);

    return newest_bucket(table_s, control, std::move(buckets), table_s.min_compaction_threshold(), table_s.schema()->max_compaction_threshold(),
-        state.highest_window_seen, state);
+            state.highest_window_seen, state);
 }

-timestamp_type
-time_window_compaction_strategy::get_window_lower_bound(std::chrono::seconds sstable_window_size, timestamp_type timestamp) {
+timestamp_type time_window_compaction_strategy::get_window_lower_bound(std::chrono::seconds sstable_window_size, timestamp_type timestamp) {
    using namespace std::chrono;
    // mask out window size from timestamp to get lower bound of its window
    auto num_windows = microseconds(timestamp) / sstable_window_size;
@@ -432,8 +438,8 @@ time_window_compaction_strategy::get_window_lower_bound(std::chrono::seconds sst
    return duration_cast<microseconds>(num_windows * sstable_window_size).count();
 }

-std::pair<std::map<timestamp_type, std::vector<sstables::shared_sstable>>, timestamp_type>
-time_window_compaction_strategy::get_buckets(std::vector<sstables::shared_sstable> files, const time_window_compaction_strategy_options& options) {
+std::pair<std::map<timestamp_type, std::vector<sstables::shared_sstable>>, timestamp_type> time_window_compaction_strategy::get_buckets(
+        std::vector<sstables::shared_sstable> files, const time_window_compaction_strategy_options& options) {
    std::map<timestamp_type, std::vector<sstables::shared_sstable>> buckets;

    timestamp_type max_timestamp = 0;
@@ -450,11 +456,13 @@ time_window_compaction_strategy::get_buckets(std::vector<sstables::shared_sstabl
    return std::make_pair(std::move(buckets), max_timestamp);
 }

-}
+} // namespace compaction

 template <>
 struct fmt::formatter<std::map<compaction::timestamp_type, std::vector<sstables::shared_sstable>>> {
-    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
    auto format(const std::map<compaction::timestamp_type, std::vector<sstables::shared_sstable>>& buckets, fmt::format_context& ctx) const {
        auto out = fmt::format_to(ctx.out(), "  buckets = {{\n");
        for (auto& [timestamp, sstables] : buckets | std::views::reverse) {
@@ -466,9 +474,9 @@ struct fmt::formatter<std::map<compaction::timestamp_type, std::vector<sstables:

 namespace compaction {

-std::vector<sstables::shared_sstable>
-time_window_compaction_strategy::newest_bucket(compaction_group_view& table_s, strategy_control& control, std::map<timestamp_type, std::vector<sstables::shared_sstable>> buckets,
-        int min_threshold, int max_threshold, timestamp_type now, time_window_compaction_strategy_state& state) {
+std::vector<sstables::shared_sstable> time_window_compaction_strategy::newest_bucket(compaction_group_view& table_s, strategy_control& control,
+        std::map<timestamp_type, std::vector<sstables::shared_sstable>> buckets, int min_threshold, int max_threshold, timestamp_type now,
+        time_window_compaction_strategy_state& state) {
    clogger.debug("time_window_compaction_strategy::newest_bucket:\n  now {}\n{}", now, buckets);

    for (auto&& [key, bucket] : buckets | std::views::reverse) {
@@ -509,8 +517,7 @@ time_window_compaction_strategy::newest_bucket(compaction_group_view& table_s, s
    return {};
 }

-std::vector<sstables::shared_sstable>
-time_window_compaction_strategy::trim_to_threshold(std::vector<sstables::shared_sstable> bucket, int max_threshold) {
+std::vector<sstables::shared_sstable> time_window_compaction_strategy::trim_to_threshold(std::vector<sstables::shared_sstable> bucket, int max_threshold) {
    auto n = std::min(bucket.size(), size_t(max_threshold));
    // Trim the largest sstables off the end to meet the maxThreshold
    std::ranges::partial_sort(bucket, bucket.begin() + n, std::ranges::less(), std::mem_fn(&sstables::sstable::ondisk_data_size));
@@ -542,8 +549,8 @@ future<int64_t> time_window_compaction_strategy::estimated_pending_compactions(c
    co_return n;
 }

-std::vector<compaction_descriptor>
-time_window_compaction_strategy::get_cleanup_compaction_jobs(compaction_group_view& table_s, std::vector<sstables::shared_sstable> candidates) const {
+std::vector<compaction_descriptor> time_window_compaction_strategy::get_cleanup_compaction_jobs(
+        compaction_group_view& table_s, std::vector<sstables::shared_sstable> candidates) const {
    std::vector<compaction_descriptor> ret;
    for (auto&& [_, sstables] : get_buckets(std::move(candidates), _options).first) {
        auto per_window_jobs = size_tiered_compaction_strategy(_stcs_options).get_cleanup_compaction_jobs(table_s, std::move(sstables));
@@ -556,4 +563,4 @@ std::unique_ptr<sstables::sstable_set_impl> time_window_compaction_strategy::mak
    return std::make_unique<sstables::time_series_sstable_set>(ts.schema(), _options.enable_optimized_twcs_queries);
 }

-}
+} // namespace compaction
--- a/conf/scylla.yaml
+++ b/conf/scylla.yaml
@@ -397,6 +397,17 @@ commitlog_total_space_in_mb: -1
 #      you can cache more hot rows
 # column_index_size_in_kb: 64

+# sstable format version for newly written sstables.
+# Currently allowed values are `me` and `ms`.
+# If not specified in the config, this defaults to `me`.
+#
+# The difference between `me` and `ms` are the data structures used
+# in the primary index.
+# In short, `ms` needs more CPU during sstable writes,
+# but should behave better during reads,
+# although it might behave worse for very long clustering keys.
+sstable_format: ms
+
 # Auto-scaling of the promoted index prevents running out of memory
 # when the promoted index grows too large (due to partitions with many rows
 # vs. too small column_index_size_in_kb).  When the serialized representation
@@ -639,7 +650,7 @@ strict_is_not_null_in_views: true
 # * workdir: the node will open the maintenance socket on the path <scylla's workdir>/cql.m,
 #            where <scylla's workdir> is a path defined by the workdir configuration option,
 # * <socket path>: the node will open the maintenance socket on the path <socket path>.
-maintenance_socket: ignore
+maintenance_socket: workdir

 # If set to true, configuration parameters defined with LiveUpdate option can be updated in runtime with CQL
 # by updating system.config virtual table. If we don't want any configuration parameter to be changed in runtime
@@ -648,10 +659,9 @@ maintenance_socket: ignore
 # e.g. for cloud users, for whom scylla's configuration should be changed only by support engineers.
 # live_updatable_config_params_changeable_via_cql: true

-# ****************
-# *  GUARDRAILS  *
-# ****************
-
+#
+# Guardrails options
+#
 # Guardrails to warn or fail when Replication Factor is smaller/greater than the threshold.
 # Please note that the value of 0 is always allowed,
 # which means that having no replication at all, i.e. RF = 0, is always valid.
@@ -661,6 +671,27 @@ maintenance_socket: ignore
 # minimum_replication_factor_warn_threshold:  3
 # maximum_replication_factor_warn_threshold: -1
 # maximum_replication_factor_fail_threshold: -1
+#
+# Guardrails to warn about or disallow creating a keyspace with specific replication strategy.
+# Each of these 2 settings is a list storing replication strategies considered harmful.
+# The replication strategies to choose from are:
+# 1) SimpleStrategy,
+# 2) NetworkTopologyStrategy,
+# 3) LocalStrategy,
+# 4) EverywhereStrategy
+#
+# replication_strategy_warn_list:
+#  - SimpleStrategy
+# replication_strategy_fail_list:
+#
+# Guardrail to enable the deprecated feature of CREATE TABLE WITH COMPACT STORAGE.
+# enable_create_table_with_compact_storage: false
+#
+# Guardrails to limit usage of selected consistency levels for writes.
+# Adding a warning to a CQL query response can significantly increase network
+# traffic and decrease overall throughput.
+# write_consistency_levels_warned: []
+# write_consistency_levels_disallowed: []

 #
 # System information encryption settings
@@ -838,21 +869,6 @@ maintenance_socket: ignore
 #   key_namespace: <kmip key namespace> (optional)
 #

-# Guardrails to warn about or disallow creating a keyspace with specific replication strategy.
-# Each of these 2 settings is a list storing replication strategies considered harmful.
-# The replication strategies to choose from are:
-# 1) SimpleStrategy,
-# 2) NetworkTopologyStrategy,
-# 3) LocalStrategy,
-# 4) EverywhereStrategy
-#
-# replication_strategy_warn_list:
-#  - SimpleStrategy
-# replication_strategy_fail_list:
-
-# Guardrail to enable the deprecated feature of CREATE TABLE WITH COMPACT STORAGE.
-# enable_create_table_with_compact_storage: false
-
 # Control tablets for new keyspaces.
 # Can be set to: disabled|enabled|enforced
 #
--- a/configure.py
+++ b/configure.py
@@ -544,7 +544,6 @@ scylla_tests = set([
    'test/boost/caching_options_test',
    'test/boost/canonical_mutation_test',
    'test/boost/cartesian_product_test',
-    'test/boost/cdc_generation_test',
    'test/boost/cell_locker_test',
    'test/boost/checksum_utils_test',
    'test/boost/chunked_managed_vector_test',
@@ -619,6 +618,7 @@ scylla_tests = set([
    'test/boost/reservoir_sampling_test',
    'test/boost/result_utils_test',
    'test/boost/rest_client_test',
+    'test/boost/rolling_max_tracker_test',
    'test/boost/reusable_buffer_test',
    'test/boost/rust_test',
    'test/boost/s3_test',
@@ -896,6 +896,9 @@ scylla_core = (['message/messaging_service.cc',
                'replica/multishard_query.cc',
                'replica/mutation_dump.cc',
                'replica/querier.cc',
+                'replica/logstor/segment_manager.cc',
+                'replica/logstor/logstor.cc',
+                'replica/logstor/write_buffer.cc',
                'mutation/atomic_cell.cc',
                'mutation/canonical_mutation.cc',
                'mutation/frozen_mutation.cc',
@@ -1204,6 +1207,7 @@ scylla_core = (['message/messaging_service.cc',
                'gms/application_state.cc',
                'gms/inet_address.cc',
                'dht/i_partitioner.cc',
+                'dht/fixed_shard.cc',
                'dht/token.cc',
                'dht/murmur3_partitioner.cc',
                'dht/boot_strapper.cc',
@@ -1239,7 +1243,6 @@ scylla_core = (['message/messaging_service.cc',
                'service/pager/query_pagers.cc',
                'service/qos/qos_common.cc',
                'service/qos/service_level_controller.cc',
-                'service/qos/standard_service_level_distributed_data_accessor.cc',
                'service/qos/raft_service_level_distributed_data_accessor.cc',
                'streaming/stream_task.cc',
                'streaming/stream_session.cc',
@@ -1273,8 +1276,8 @@ scylla_core = (['message/messaging_service.cc',
                'auth/common.cc',
                'auth/default_authorizer.cc',
                'auth/resource.cc',
-                'auth/roles-metadata.cc',
                'auth/passwords.cc',
+                'auth/maintenance_socket_authenticator.cc',
                'auth/password_authenticator.cc',
                'auth/permission.cc',
                'auth/service.cc',
@@ -1340,6 +1343,7 @@ scylla_core = (['message/messaging_service.cc',
                'service/strong_consistency/groups_manager.cc',
                'service/strong_consistency/coordinator.cc',
                'service/strong_consistency/state_machine.cc',
+                'service/strong_consistency/raft_groups_storage.cc',
                'service/raft/group0_state_id_handler.cc',
                'service/raft/group0_state_machine.cc',
                'service/raft/group0_state_machine_merger.cc',
@@ -1466,6 +1470,7 @@ idls = ['idl/gossip_digest.idl.hh',
        'idl/query.idl.hh',
        'idl/idl_test.idl.hh',
        'idl/commitlog.idl.hh',
+        'idl/logstor.idl.hh',
        'idl/tracing.idl.hh',
        'idl/consistency_level.idl.hh',
        'idl/cache_temperature.idl.hh',
@@ -1473,6 +1478,7 @@ idls = ['idl/gossip_digest.idl.hh',
        'idl/messaging_service.idl.hh',
        'idl/paxos.idl.hh',
        'idl/raft.idl.hh',
+        'idl/raft_util.idl.hh',
        'idl/raft_storage.idl.hh',
        'idl/group0.idl.hh',
        'idl/hinted_handoff.idl.hh',
@@ -1492,7 +1498,9 @@ idls = ['idl/gossip_digest.idl.hh',
        'idl/gossip.idl.hh',
        'idl/migration_manager.idl.hh',
        "idl/node_ops.idl.hh",
-        "idl/tasks.idl.hh"
+        "idl/tasks.idl.hh",
+        "idl/client_state.idl.hh",
+        "idl/forward_cql.idl.hh",
        ]

 scylla_tests_generic_dependencies = [
@@ -1585,6 +1593,7 @@ pure_boost_tests = set([
    'test/boost/wrapping_interval_test',
    'test/boost/range_tombstone_list_test',
    'test/boost/reservoir_sampling_test',
+    'test/boost/rolling_max_tracker_test',
    'test/boost/serialization_test',
    'test/boost/small_vector_test',
    'test/boost/top_k_test',
@@ -1733,6 +1742,7 @@ deps['test/boost/url_parse_test'] = ['utils/http.cc', 'test/boost/url_parse_test
 deps['test/boost/murmur_hash_test'] = ['bytes.cc', 'utils/murmur_hash.cc', 'test/boost/murmur_hash_test.cc']
 deps['test/boost/allocation_strategy_test'] = ['test/boost/allocation_strategy_test.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc', 'utils/labels.cc']
 deps['test/boost/log_heap_test'] = ['test/boost/log_heap_test.cc']
+deps['test/boost/rolling_max_tracker_test'] = ['test/boost/rolling_max_tracker_test.cc']
 deps['test/boost/estimated_histogram_test'] = ['test/boost/estimated_histogram_test.cc']
 deps['test/boost/summary_test'] = ['test/boost/summary_test.cc']
 deps['test/boost/anchorless_list_test'] = ['test/boost/anchorless_list_test.cc']
--- a/cql3/column_specification.cc
+++ b/cql3/column_specification.cc
@@ -23,7 +23,7 @@ column_specification::column_specification(std::string_view ks_name_, std::strin

 bool column_specification::all_in_same_table(const std::vector<lw_shared_ptr<column_specification>>& names)
 {
-    SCYLLA_ASSERT(!names.empty());
+    throwing_assert(!names.empty());

    auto first = names.front();
    return std::all_of(std::next(names.begin()), names.end(), [first] (auto&& spec) {
--- a/cql3/cql3_type.cc
+++ b/cql3/cql3_type.cc
@@ -49,9 +49,9 @@ static cql3_type::kind get_cql3_kind(const abstract_type& t) {
        cql3_type::kind operator()(const uuid_type_impl&) { return cql3_type::kind::UUID; }
        cql3_type::kind operator()(const varint_type_impl&) { return cql3_type::kind::VARINT; }
        cql3_type::kind operator()(const reversed_type_impl& r) { return get_cql3_kind(*r.underlying_type()); }
-        cql3_type::kind operator()(const tuple_type_impl&) { SCYLLA_ASSERT(0 && "no kind for this type"); }
-        cql3_type::kind operator()(const vector_type_impl&) { SCYLLA_ASSERT(0 && "no kind for this type"); }
-        cql3_type::kind operator()(const collection_type_impl&) { SCYLLA_ASSERT(0 && "no kind for this type"); }
+        cql3_type::kind operator()(const tuple_type_impl&) { throwing_assert(0 && "no kind for this type"); }
+        cql3_type::kind operator()(const vector_type_impl&) { throwing_assert(0 && "no kind for this type"); }
+        cql3_type::kind operator()(const collection_type_impl&) { throwing_assert(0 && "no kind for this type"); }
    };
    return visit(t, visitor{});
 }
@@ -124,7 +124,7 @@ class cql3_type::raw_collection : public raw {
        } else if (_kind == abstract_type::kind::map) {
            return format("{}map<{}, {}>{}", start, _keys, _values, end);
        }
-        abort();
+        throwing_assert(0 && "invalid raw_collection kind");
    }
 public:
    raw_collection(const abstract_type::kind kind, shared_ptr<raw> keys, shared_ptr<raw> values)
@@ -150,7 +150,7 @@ public:
    }

    virtual cql3_type prepare_internal(const sstring& keyspace, const data_dictionary::user_types_metadata& user_types) override {
-        SCYLLA_ASSERT(_values); // "Got null values type for a collection";
+        throwing_assert(_values); // "Got null values type for a collection";

        if (_values->is_counter()) {
            throw exceptions::invalid_request_exception(format("Counters are not allowed inside collections: {}", *this));
@@ -190,7 +190,7 @@ private:
            }
            return cql3_type(set_type_impl::get_instance(_values->prepare_internal(keyspace, user_types).get_type(), !is_frozen()));
        } else if (_kind == abstract_type::kind::map) {
-            SCYLLA_ASSERT(_keys); // "Got null keys type for a collection";
+            throwing_assert(_keys); // "Got null keys type for a collection";
            if (_keys->is_duration()) {
                throw exceptions::invalid_request_exception(format("Durations are not allowed as map keys: {}", *this));
            }
@@ -198,7 +198,7 @@ private:
                                                         _values->prepare_internal(keyspace, user_types).get_type(),
                                                         !is_frozen()));
        }
-        abort();
+        throwing_assert(0 && "do_prepare invalid kind");
    }
 };

--- a/cql3/expr/expression.cc
+++ b/cql3/expr/expression.cc
@@ -1603,7 +1603,7 @@ static cql3::raw_value do_evaluate(const collection_constructor& collection, con
        case collection_constructor::style_type::vector:
            return evaluate_vector(collection, inputs);
    }
-    std::abort();
+    throwing_assert(0 && "do_evaluate invalid style");
 }

 static cql3::raw_value do_evaluate(const usertype_constructor& user_val, const evaluation_inputs& inputs) {
--- a/cql3/expr/prepare_expr.cc
+++ b/cql3/expr/prepare_expr.cc
@@ -876,7 +876,7 @@ cast_test_assignment(const cast& c, data_dictionary::database db, const sstring&
            return assignment_testable::test_result::NOT_ASSIGNABLE;
        }
    } catch (exceptions::invalid_request_exception& e) {
-        abort();
+        throwing_assert(0 && "cast_test_assignment exception");
    }
 }

--- a/cql3/functions/functions.cc
+++ b/cql3/functions/functions.cc
@@ -544,7 +544,7 @@ functions::get_user_aggregates(const sstring& keyspace) const {

 std::ranges::subrange<functions::declared_t::const_iterator>
 functions::find(const function_name& name) const {
-    SCYLLA_ASSERT(name.has_keyspace()); // : "function name not fully qualified";
+    throwing_assert(name.has_keyspace()); // : "function name not fully qualified";
    auto pair = _declared.equal_range(name);
    return std::ranges::subrange(pair.first, pair.second);
 }
--- a/cql3/functions/vector_similarity_fcts.cc
+++ b/cql3/functions/vector_similarity_fcts.cc
@@ -10,8 +10,9 @@
 #include "types/types.hh"
 #include "types/vector.hh"
 #include "exceptions/exceptions.hh"
-#include <span>
 #include <bit>
+#include <span>
+#include <seastar/core/byteorder.hh>

 namespace cql3 {
 namespace functions {
@@ -30,14 +31,10 @@ std::vector<float> extract_float_vector(const bytes_opt& param, vector_dimension
                       expected_size, dimension, param->size()));
    }

-    std::vector<float> result;
-    result.reserve(dimension);
-
-    bytes_view view(*param);
+    std::vector<float> result(dimension);
+    const char* p = reinterpret_cast<const char*>(param->data());
    for (size_t i = 0; i < dimension; ++i) {
-        // read_simple handles network byte order (big-endian) conversion
-        uint32_t raw = read_simple<uint32_t>(view);
-        result.push_back(std::bit_cast<float>(raw));
+        result[i] = std::bit_cast<float>(consume_be<uint32_t>(p));
    }

    return result;
@@ -55,13 +52,14 @@ namespace {
 // You should only use this function if you need to preserve the original vectors and cannot normalize
 // them in advance.
 float compute_cosine_similarity(std::span<const float> v1, std::span<const float> v2) {
-    double dot_product = 0.0;
-    double squared_norm_a = 0.0;
-    double squared_norm_b = 0.0;
+    #pragma clang fp contract(fast) reassociate(on) // Allow the compiler to optimize the loop.
+    float dot_product = 0.0;
+    float squared_norm_a = 0.0;
+    float squared_norm_b = 0.0;

    for (size_t i = 0; i < v1.size(); ++i) {
-        double a = v1[i];
-        double b = v2[i];
+        float a = v1[i];
+        float b = v2[i];

        dot_product += a * b;
        squared_norm_a += a * a;
@@ -79,13 +77,14 @@ float compute_cosine_similarity(std::span<const float> v1, std::span<const float
 }

 float compute_euclidean_similarity(std::span<const float> v1, std::span<const float> v2) {
-    double sum = 0.0;
+    #pragma clang fp contract(fast) reassociate(on) // Allow the compiler to optimize the loop.
+    float sum = 0.0;

    for (size_t i = 0; i < v1.size(); ++i) {
-        double a = v1[i];
-        double b = v2[i];
+        float a = v1[i];
+        float b = v2[i];

-        double diff = a - b;
+        float diff = a - b;
        sum += diff * diff;
    }

@@ -98,11 +97,12 @@ float compute_euclidean_similarity(std::span<const float> v1, std::span<const fl
 // Assumes that both vectors are L2-normalized.
 // This similarity is intended as an optimized way to perform cosine similarity calculation.
 float compute_dot_product_similarity(std::span<const float> v1, std::span<const float> v2) {
-    double dot_product = 0.0;
+    #pragma clang fp contract(fast) reassociate(on) // Allow the compiler to optimize the loop.
+    float dot_product = 0.0;

    for (size_t i = 0; i < v1.size(); ++i) {
-        double a = v1[i];
-        double b = v2[i];
+        float a = v1[i];
+        float b = v2[i];
        dot_product += a * b;
    }

--- a/cql3/keyspace_element_name.cc
+++ b/cql3/keyspace_element_name.cc
@@ -25,7 +25,7 @@ bool keyspace_element_name::has_keyspace() const

 const sstring& keyspace_element_name::get_keyspace() const
 {
-    SCYLLA_ASSERT(_ks_name);
+    throwing_assert(_ks_name);
    return *_ks_name;
 }

--- a/cql3/lists.cc
+++ b/cql3/lists.cc
@@ -62,7 +62,7 @@ lists::setter_by_index::fill_prepare_context(prepare_context& ctx) {
 void
 lists::setter_by_index::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
    // we should not get here for frozen lists
-    SCYLLA_ASSERT(column.type->is_multi_cell()); // "Attempted to set an individual element on a frozen list";
+    throwing_assert(column.type->is_multi_cell()); // "Attempted to set an individual element on a frozen list";

    auto index = expr::evaluate(_idx, params._options);
    if (index.is_null()) {
@@ -105,7 +105,7 @@ lists::setter_by_uuid::requires_read() const {
 void
 lists::setter_by_uuid::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
    // we should not get here for frozen lists
-    SCYLLA_ASSERT(column.type->is_multi_cell()); // "Attempted to set an individual element on a frozen list";
+    throwing_assert(column.type->is_multi_cell()); // "Attempted to set an individual element on a frozen list";

    auto index = expr::evaluate(_idx, params._options);
    auto value = expr::evaluate(*_e, params._options);
@@ -133,7 +133,7 @@ lists::setter_by_uuid::execute(mutation& m, const clustering_key_prefix& prefix,
 void
 lists::appender::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
    const cql3::raw_value value = expr::evaluate(*_e, params._options);
-    SCYLLA_ASSERT(column.type->is_multi_cell()); // "Attempted to append to a frozen list";
+    throwing_assert(column.type->is_multi_cell()); // "Attempted to append to a frozen list";
    do_append(value, m, prefix, column, params);
 }

@@ -189,7 +189,7 @@ lists::do_append(const cql3::raw_value& list_value,

 void
 lists::prepender::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
-    SCYLLA_ASSERT(column.type->is_multi_cell()); // "Attempted to prepend to a frozen list";
+    throwing_assert(column.type->is_multi_cell()); // "Attempted to prepend to a frozen list";
    cql3::raw_value lvalue = expr::evaluate(*_e, params._options);
    if (lvalue.is_null()) {
        return;
@@ -244,7 +244,7 @@ lists::discarder::requires_read() const {

 void
 lists::discarder::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
-    SCYLLA_ASSERT(column.type->is_multi_cell()); // "Attempted to delete from a frozen list";
+    throwing_assert(column.type->is_multi_cell()); // "Attempted to delete from a frozen list";

    auto&& existing_list = params.get_prefetched_list(m.key(), prefix, column);
    // We want to call bind before possibly returning to reject queries where the value provided is not a list.
@@ -300,7 +300,7 @@ lists::discarder_by_index::requires_read() const {

 void
 lists::discarder_by_index::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
-    SCYLLA_ASSERT(column.type->is_multi_cell()); // "Attempted to delete an item by index from a frozen list";
+    throwing_assert(column.type->is_multi_cell()); // "Attempted to delete an item by index from a frozen list";
    cql3::raw_value index = expr::evaluate(*_e, params._options);
    if (index.is_null()) {
        throw exceptions::invalid_request_exception("Invalid null value for list index");
--- a/cql3/maps.cc
+++ b/cql3/maps.cc
@@ -45,7 +45,7 @@ maps::setter_by_key::fill_prepare_context(prepare_context& ctx) {
 void
 maps::setter_by_key::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
    using exceptions::invalid_request_exception;
-    SCYLLA_ASSERT(column.type->is_multi_cell()); // "Attempted to set a value for a single key on a frozen map"m
+    throwing_assert(column.type->is_multi_cell()); // "Attempted to set a value for a single key on a frozen map"m
    auto key = expr::evaluate(_k, params._options);
    auto value = expr::evaluate(*_e, params._options);
    if (key.is_null()) {
@@ -63,7 +63,7 @@ maps::setter_by_key::execute(mutation& m, const clustering_key_prefix& prefix, c

 void
 maps::putter::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
-    SCYLLA_ASSERT(column.type->is_multi_cell()); // "Attempted to add items to a frozen map";
+    throwing_assert(column.type->is_multi_cell()); // "Attempted to add items to a frozen map";
    cql3::raw_value value = expr::evaluate(*_e, params._options);
    do_put(m, prefix, params, value, column);
 }
@@ -96,7 +96,7 @@ maps::do_put(mutation& m, const clustering_key_prefix& prefix, const update_para

 void
 maps::discarder_by_key::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
-    SCYLLA_ASSERT(column.type->is_multi_cell()); // "Attempted to delete a single key in a frozen map";
+    throwing_assert(column.type->is_multi_cell()); // "Attempted to delete a single key in a frozen map";
    cql3::raw_value key = expr::evaluate(*_e, params._options);
    if (key.is_null()) {
        throw exceptions::invalid_request_exception("Invalid null map key");
--- a/cql3/operation.cc
+++ b/cql3/operation.cc
@@ -67,7 +67,7 @@ operation::set_element::prepare(data_dictionary::database db, const sstring& key
        verify_no_aggregate_functions(mval, "SET clause");
        return make_shared<maps::setter_by_key>(receiver, std::move(key), std::move(mval));
    }
-    abort();
+    throwing_assert(0 && "prepare set_element collection type");
 }

 bool
@@ -166,7 +166,7 @@ operation::addition::prepare(data_dictionary::database db, const sstring& keyspa
    } else if (ctype->get_kind() == abstract_type::kind::map) {
        return make_shared<maps::putter>(receiver, std::move(v));
    } else {
-        abort();
+        throwing_assert(0 && "prepare addition collection type");
    }
 }

@@ -216,7 +216,7 @@ operation::subtraction::prepare(data_dictionary::database db, const sstring& key
        verify_no_aggregate_functions(v, "SET clause");
        return ::make_shared<sets::discarder>(receiver, std::move(v));
    }
-    abort();
+    throwing_assert(0 && "prepare subtraction collection type");
 }

 bool
@@ -267,7 +267,7 @@ operation::set_value::prepare(data_dictionary::database db, const sstring& keysp
        } else if (k == abstract_type::kind::map) {
            return make_shared<maps::setter>(receiver, std::move(v));
        } else {
-            abort();
+            throwing_assert(0 && "prepare set_value collection type");
        }
    }

@@ -385,7 +385,7 @@ operation::element_deletion::prepare(data_dictionary::database db, const sstring
        verify_no_aggregate_functions(key, "SET clause");
        return make_shared<maps::discarder_by_key>(receiver, std::move(key));
    }
-    abort();
+    throwing_assert(0 && "prepare element_deletion collection type");
 }

 expr::expression
--- a/cql3/prepared_statements_cache.hh
+++ b/cql3/prepared_statements_cache.hh
@@ -105,6 +105,7 @@ public:
    static const std::chrono::minutes entry_expiry;

    using key_type = prepared_cache_key_type;
+    using pinned_value_type = cache_value_ptr;
    using value_type = checked_weak_ptr;
    using statement_is_too_big = typename cache_type::entry_is_too_big;

@@ -116,9 +117,14 @@ public:
        : _cache(size, entry_expiry, logger)
    {}

+    template <typename LoadFunc>
+    future<pinned_value_type> get_pinned(const key_type& key, LoadFunc&& load) {
+        return _cache.get_ptr(key.key(), [load = std::forward<LoadFunc>(load)] (const cache_key_type&) { return load(); });
+    }
+
    template <typename LoadFunc>
    future<value_type> get(const key_type& key, LoadFunc&& load) {
-        return _cache.get_ptr(key.key(), [load = std::forward<LoadFunc>(load)] (const cache_key_type&) { return load(); }).then([] (cache_value_ptr v_ptr) {
+        return get_pinned(key, std::forward<LoadFunc>(load)).then([] (cache_value_ptr v_ptr) {
            return make_ready_future<value_type>((*v_ptr)->checked_weak_from_this());
        });
    }
--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -11,6 +11,7 @@
 #include "cql3/query_processor.hh"

 #include <seastar/core/metrics.hh>
+#include <seastar/core/memory.hh>
 #include <seastar/core/shared_ptr.hh>
 #include <seastar/coroutine/parallel_for_each.hh>
 #include <seastar/coroutine/as_future.hh>
@@ -47,13 +48,15 @@ const sstring query_processor::CQL_VERSION = "3.3.1";
 const std::chrono::minutes prepared_statements_cache::entry_expiry = std::chrono::minutes(60);

 struct query_processor::remote {
-    remote(service::migration_manager& mm, service::mapreduce_service& fwd,
-           service::storage_service& ss, service::raft_group0_client& group0_client,
-           service::strong_consistency::coordinator& _sc_coordinator)
-            : mm(mm), mapreducer(fwd), ss(ss), group0_client(group0_client)
-            , sc_coordinator(_sc_coordinator)
-            , gate("query_processor::remote")
-    {}
+    remote(service::migration_manager& mm, service::mapreduce_service& fwd, service::storage_service& ss, service::raft_group0_client& group0_client,
+            service::strong_consistency::coordinator& _sc_coordinator)
+        : mm(mm)
+        , mapreducer(fwd)
+        , ss(ss)
+        , group0_client(group0_client)
+        , sc_coordinator(_sc_coordinator)
+        , gate("query_processor::remote") {
+    }

    service::migration_manager& mm;
    service::mapreduce_service& mapreducer;
@@ -76,28 +79,42 @@ static service::query_state query_state_for_internal_call() {
    return {service::client_state::for_internal_calls(), empty_service_permit()};
 }

-query_processor::query_processor(service::storage_proxy& proxy, data_dictionary::database db, service::migration_notifier& mn, vector_search::vector_store_client& vsc, query_processor::memory_config mcfg, cql_config& cql_cfg, utils::loading_cache_config auth_prep_cache_cfg, lang::manager& langm)
-        : _migration_subscriber{std::make_unique<migration_subscriber>(this)}
-        , _proxy(proxy)
-        , _db(db)
-        , _mnotifier(mn)
-        , _vector_store_client(vsc)
-        , _mcfg(mcfg)
-        , _cql_config(cql_cfg)
-        , _prepared_cache(prep_cache_log, _mcfg.prepared_statment_cache_size)
-        , _authorized_prepared_cache(std::move(auth_prep_cache_cfg), authorized_prepared_statements_cache_log)
-        , _auth_prepared_cache_cfg_cb([this] (uint32_t) { (void) _authorized_prepared_cache_config_action.trigger_later(); })
-        , _authorized_prepared_cache_config_action([this] { update_authorized_prepared_cache_config(); return make_ready_future<>(); })
-        , _authorized_prepared_cache_update_interval_in_ms_observer(_db.get_config().permissions_update_interval_in_ms.observe(_auth_prepared_cache_cfg_cb))
-        , _authorized_prepared_cache_validity_in_ms_observer(_db.get_config().permissions_validity_in_ms.observe(_auth_prepared_cache_cfg_cb))
-        , _lang_manager(langm)
-        {
+query_processor::query_processor(service::storage_proxy& proxy, data_dictionary::database db, service::migration_notifier& mn,
+        vector_search::vector_store_client& vsc, query_processor::memory_config mcfg, cql_config& cql_cfg, utils::loading_cache_config auth_prep_cache_cfg,
+        lang::manager& langm)
+    : _migration_subscriber{std::make_unique<migration_subscriber>(this)}
+    , _proxy(proxy)
+    , _db(db)
+    , _mnotifier(mn)
+    , _vector_store_client(vsc)
+    , _mcfg(mcfg)
+    , _cql_config(cql_cfg)
+    , _prepared_cache(prep_cache_log, _mcfg.prepared_statment_cache_size)
+    , _authorized_prepared_cache(std::move(auth_prep_cache_cfg), authorized_prepared_statements_cache_log)
+    , _auth_prepared_cache_cfg_cb([this](uint32_t) {
+        (void)_authorized_prepared_cache_config_action.trigger_later();
+    })
+    , _authorized_prepared_cache_config_action([this] {
+        update_authorized_prepared_cache_config();
+        return make_ready_future<>();
+    })
+    , _authorized_prepared_cache_update_interval_in_ms_observer(_db.get_config().permissions_update_interval_in_ms.observe(_auth_prepared_cache_cfg_cb))
+    , _authorized_prepared_cache_validity_in_ms_observer(_db.get_config().permissions_validity_in_ms.observe(_auth_prepared_cache_cfg_cb))
+    , _lang_manager(langm)
+    , _write_consistency_levels_warned_observer(_db.get_config().write_consistency_levels_warned.observe([this](const auto& v) {
+        _write_consistency_levels_warned = to_consistency_level_set(v);
+    }))
+    , _write_consistency_levels_disallowed_observer(_db.get_config().write_consistency_levels_disallowed.observe([this](const auto& v) {
+        _write_consistency_levels_disallowed = to_consistency_level_set(v);
+    })) {
+    _write_consistency_levels_warned = to_consistency_level_set(_db.get_config().write_consistency_levels_warned());
+    _write_consistency_levels_disallowed = to_consistency_level_set(_db.get_config().write_consistency_levels_disallowed());
    namespace sm = seastar::metrics;
    namespace stm = statements;
    using clevel = db::consistency_level;
    sm::label cl_label("consistency_level");

-    sm::label who_label("who");  // Who queried system tables
+    sm::label who_label("who"); // Who queried system tables
    const auto user_who_label_instance = who_label("user");
    const auto internal_who_label_instance = who_label("internal");

@@ -105,17 +122,11 @@ query_processor::query_processor(service::storage_proxy& proxy, data_dictionary:
    const auto system_ks_label_instance = ks_label("system");

    std::vector<sm::metric_definition> qp_group;
-    qp_group.push_back(sm::make_counter(
-        "statements_prepared",
-        _stats.prepare_invocations,
-        sm::description("Counts the total number of parsed CQL requests.")));
+    qp_group.push_back(sm::make_counter("statements_prepared", _stats.prepare_invocations, sm::description("Counts the total number of parsed CQL requests.")));
    for (auto cl = size_t(clevel::MIN_VALUE); cl <= size_t(clevel::MAX_VALUE); ++cl) {
-        qp_group.push_back(
-            sm::make_counter(
-                "queries",
-                _stats.queries_by_cl[cl],
-                sm::description("Counts queries by consistency level."),
-                {cl_label(clevel(cl)), basic_level}).set_skip_when_empty());
+        qp_group.push_back(sm::make_counter(
+                "queries", _stats.queries_by_cl[cl], sm::description("Counts queries by consistency level."), {cl_label(clevel(cl)), basic_level})
+                        .set_skip_when_empty());
    }
    _metrics.add_group("query_processor", qp_group);

@@ -506,8 +517,34 @@ query_processor::query_processor(service::storage_proxy& proxy, data_dictionary:
                            _cql_stats.replication_strategy_fail_list_violations,
                            sm::description("Counts the number of replication_strategy_fail_list guardrail violations, "
                                            "i.e. attempts to set a forbidden replication strategy in a keyspace via CREATE/ALTER KEYSPACE.")).set_skip_when_empty(),
+
+                    sm::make_counter(
+                            "forwarded_requests",
+                            _cql_stats.forwarded_requests,
+                            sm::description("Counts the total number of attempts to forward CQL requests to other nodes. One request may be forwarded multiple times, "
+                                            "particularly when a write is handled by a non-replica node.")).set_skip_when_empty(),
            });

+    std::vector<sm::metric_definition> cql_cl_group;
+    for (auto cl = size_t(clevel::MIN_VALUE); cl <= size_t(clevel::MAX_VALUE); ++cl) {
+        cql_cl_group.push_back(sm::make_counter("writes_per_consistency_level", _cql_stats.writes_per_consistency_level[cl],
+                sm::description("Counts the number of writes for each consistency level."), {cl_label(clevel(cl)), basic_level})
+                        .set_skip_when_empty());
+    }
+    _metrics.add_group("cql", cql_cl_group);
+
+    _metrics.add_group(
+            "cql", {
+                           sm::make_counter("write_consistency_levels_disallowed_violations", _cql_stats.write_consistency_levels_disallowed_violations,
+                                   sm::description("Counts the number of write_consistency_levels_disallowed guardrail violations, "
+                                                   "i.e. attempts to write with a forbidden consistency level."),
+                                   {basic_level}),
+                           sm::make_counter("write_consistency_levels_warned_violations", _cql_stats.write_consistency_levels_warned_violations,
+                                   sm::description("Counts the number of write_consistency_levels_warned guardrail violations, "
+                                                   "i.e. attempts to write with a discouraged consistency level."),
+                                   {basic_level}),
+                   });
+
    _mnotifier.register_listener(_migration_subscriber.get());
 }

@@ -517,15 +554,13 @@ query_processor::~query_processor() {
    }
 }

-std::pair<std::reference_wrapper<service::strong_consistency::coordinator>, gate::holder>
-query_processor::acquire_strongly_consistent_coordinator() {
+std::pair<std::reference_wrapper<service::strong_consistency::coordinator>, gate::holder> query_processor::acquire_strongly_consistent_coordinator() {
    auto [remote_, holder] = remote();
    return {remote_.get().sc_coordinator, std::move(holder)};
 }

-void query_processor::start_remote(service::migration_manager& mm, service::mapreduce_service& mapreducer,
-                                   service::storage_service& ss, service::raft_group0_client& group0_client,
-                                   service::strong_consistency::coordinator& sc_coordinator) {
+void query_processor::start_remote(service::migration_manager& mm, service::mapreduce_service& mapreducer, service::storage_service& ss,
+        service::raft_group0_client& group0_client, service::strong_consistency::coordinator& sc_coordinator) {
    _remote = std::make_unique<struct remote>(mm, mapreducer, ss, group0_client, sc_coordinator);
 }

@@ -545,23 +580,24 @@ future<> query_processor::stop() {
 }

 future<::shared_ptr<cql_transport::messages::result_message>> query_processor::execute_with_guard(
-        std::function<future<::shared_ptr<cql_transport::messages::result_message>>(service::query_state&, ::shared_ptr<cql_statement>, const query_options&, std::optional<service::group0_guard>)> fn,
+        std::function<future<::shared_ptr<cql_transport::messages::result_message>>(
+                service::query_state&, ::shared_ptr<cql_statement>, const query_options&, std::optional<service::group0_guard>)>
+                fn,
        ::shared_ptr<cql_statement> statement, service::query_state& query_state, const query_options& options) {
    // execute all statements that need group0 guard on shard0
    if (this_shard_id() != 0) {
-        co_return ::make_shared<cql_transport::messages::result_message::bounce_to_shard>(0,
-                    std::move(const_cast<cql3::query_options&>(options).take_cached_pk_function_calls()));
+        co_return bounce_to_shard(0, std::move(const_cast<cql3::query_options&>(options).take_cached_pk_function_calls()), false);
    }

    auto [remote_, holder] = remote();
    size_t retries = remote_.get().mm.get_concurrent_ddl_retries();
-    while (true)  {
+    while (true) {
        try {
            auto guard = co_await remote_.get().mm.start_group0_operation();
            co_return co_await fn(query_state, statement, options, std::move(guard));
        } catch (const service::group0_concurrent_modification& ex) {
-            log.warn("Failed to execute statement \"{}\" due to guard conflict.{}.",
-                    statement->raw_cql_statement, retries ? " Retrying" : " Number of retries exceeded, giving up");
+            log.warn("Failed to execute statement \"{}\" due to guard conflict.{}.", statement->raw_cql_statement,
+                    retries ? " Retrying" : " Number of retries exceeded, giving up");
            if (retries--) {
                continue;
            }
@@ -570,29 +606,30 @@ future<::shared_ptr<cql_transport::messages::result_message>> query_processor::e
    }
 }

-template<typename... Args>
-future<::shared_ptr<result_message>>
-query_processor::execute_maybe_with_guard(service::query_state& query_state, ::shared_ptr<cql_statement> statement, const query_options& options,
-    future<::shared_ptr<result_message>>(query_processor::*fn)(service::query_state&, ::shared_ptr<cql_statement>, const query_options&, std::optional<service::group0_guard>, Args...), Args... args) {
+template <typename... Args>
+future<::shared_ptr<result_message>> query_processor::execute_maybe_with_guard(service::query_state& query_state, ::shared_ptr<cql_statement> statement,
+        const query_options& options,
+        future<::shared_ptr<result_message>> (query_processor::*fn)(
+                service::query_state&, ::shared_ptr<cql_statement>, const query_options&, std::optional<service::group0_guard>, Args...),
+        Args... args) {
    if (!statement->needs_guard(*this, query_state)) {
        return (this->*fn)(query_state, std::move(statement), options, std::nullopt, std::forward<Args>(args)...);
    }
-    static auto exec = [fn] (query_processor& qp, Args... args, service::query_state& query_state, ::shared_ptr<cql_statement> statement, const query_options& options, std::optional<service::group0_guard> guard) {
+    static auto exec = [fn](query_processor& qp, Args... args, service::query_state& query_state, ::shared_ptr<cql_statement> statement,
+                               const query_options& options, std::optional<service::group0_guard> guard) {
        return (qp.*fn)(query_state, std::move(statement), options, std::move(guard), std::forward<Args>(args)...);
    };
    return execute_with_guard(std::bind_front(exec, std::ref(*this), std::forward<Args>(args)...), std::move(statement), query_state, options);
 }

-future<::shared_ptr<result_message>>
-query_processor::execute_direct_without_checking_exception_message(const std::string_view& query_string, service::query_state& query_state, dialect d, query_options& options) {
+future<::shared_ptr<result_message>> query_processor::execute_direct_without_checking_exception_message(
+        const std::string_view& query_string, service::query_state& query_state, dialect d, query_options& options) {
    log.trace("execute_direct: \"{}\"", query_string);
    tracing::trace(query_state.get_trace_state(), "Parsing a statement");
    auto p = get_statement(query_string, query_state.get_client_state(), d);
    auto statement = p->statement;
    if (statement->get_bound_terms() != options.get_values_count()) {
-        const auto msg = format("Invalid amount of bind variables: expected {:d} received {:d}",
-                statement->get_bound_terms(),
-                options.get_values_count());
+        const auto msg = format("Invalid amount of bind variables: expected {:d} received {:d}", statement->get_bound_terms(), options.get_values_count());
        throw exceptions::invalid_request_exception(msg);
    }
    options.prepare(p->bound_names);
@@ -603,17 +640,13 @@ query_processor::execute_direct_without_checking_exception_message(const std::st
            metrics.regularStatementsExecuted.inc();
 #endif
    auto user = query_state.get_client_state().user();
-    tracing::trace(query_state.get_trace_state(), "Processing a statement for authenticated user: {}", user ? (user->name ? *user->name : "anonymous") : "no user authenticated");
+    tracing::trace(query_state.get_trace_state(), "Processing a statement for authenticated user: {}",
+            user ? (user->name ? *user->name : "anonymous") : "no user authenticated");
    return execute_maybe_with_guard(query_state, std::move(statement), options, &query_processor::do_execute_direct, std::move(p->warnings));
 }

-future<::shared_ptr<result_message>>
-query_processor::do_execute_direct(
-         service::query_state& query_state,
-        shared_ptr<cql_statement> statement,
-        const query_options& options,
-        std::optional<service::group0_guard> guard,
-        cql3::cql_warnings_vec warnings) {
+future<::shared_ptr<result_message>> query_processor::do_execute_direct(service::query_state& query_state, shared_ptr<cql_statement> statement,
+        const query_options& options, std::optional<service::group0_guard> guard, cql3::cql_warnings_vec warnings) {
    auto access_future = co_await coroutine::as_future(statement->check_access(*this, query_state.get_client_state()));
    if (access_future.failed()) {
        co_await audit::inspect(statement, query_state, options, true);
@@ -638,26 +671,16 @@ query_processor::do_execute_direct(
    co_return std::move(m);
 }

-future<::shared_ptr<result_message>>
-query_processor::execute_prepared_without_checking_exception_message(
-        service::query_state& query_state,
-        shared_ptr<cql_statement> statement,
-        const query_options& options,
-        statements::prepared_statement::checked_weak_ptr prepared,
-        cql3::prepared_cache_key_type cache_key,
-        bool needs_authorization) {
-    return execute_maybe_with_guard(query_state, std::move(statement), options, &query_processor::do_execute_prepared, std::move(prepared), std::move(cache_key), needs_authorization);
+future<::shared_ptr<result_message>> query_processor::execute_prepared_without_checking_exception_message(service::query_state& query_state,
+        shared_ptr<cql_statement> statement, const query_options& options, statements::prepared_statement::checked_weak_ptr prepared,
+        cql3::prepared_cache_key_type cache_key, bool needs_authorization) {
+    return execute_maybe_with_guard(
+            query_state, std::move(statement), options, &query_processor::do_execute_prepared, std::move(prepared), std::move(cache_key), needs_authorization);
 }

-future<::shared_ptr<result_message>>
-query_processor::do_execute_prepared(
-        service::query_state& query_state,
-        shared_ptr<cql_statement> statement,
-        const query_options& options,
-        std::optional<service::group0_guard> guard,
-        statements::prepared_statement::checked_weak_ptr prepared,
-        cql3::prepared_cache_key_type cache_key,
-        bool needs_authorization) {
+future<::shared_ptr<result_message>> query_processor::do_execute_prepared(service::query_state& query_state, shared_ptr<cql_statement> statement,
+        const query_options& options, std::optional<service::group0_guard> guard, statements::prepared_statement::checked_weak_ptr prepared,
+        cql3::prepared_cache_key_type cache_key, bool needs_authorization) {
    if (needs_authorization) {
        co_await statement->check_access(*this, query_state.get_client_state());
        try {
@@ -671,8 +694,8 @@ query_processor::do_execute_prepared(
    co_return co_await process_authorized_statement(std::move(statement), query_state, options, std::move(guard));
 }

-future<::shared_ptr<result_message>>
-query_processor::process_authorized_statement(const ::shared_ptr<cql_statement> statement, service::query_state& query_state, const query_options& options, std::optional<service::group0_guard> guard) {
+future<::shared_ptr<result_message>> query_processor::process_authorized_statement(const ::shared_ptr<cql_statement> statement,
+        service::query_state& query_state, const query_options& options, std::optional<service::group0_guard> guard) {
    auto& client_state = query_state.get_client_state();

    ++_stats.queries_by_cl[size_t(options.get_consistency())];
@@ -682,43 +705,39 @@ query_processor::process_authorized_statement(const ::shared_ptr<cql_statement>
    auto msg = co_await statement->execute_without_checking_exception_message(*this, query_state, options, std::move(guard));

    if (msg) {
-       co_return std::move(msg);
+        co_return std::move(msg);
    }
    co_return ::make_shared<result_message::void_message>();
 }

-future<::shared_ptr<cql_transport::messages::result_message::prepared>>
-query_processor::prepare(sstring query_string, service::query_state& query_state, cql3::dialect d) {
+future<::shared_ptr<cql_transport::messages::result_message::prepared>> query_processor::prepare(
+        sstring query_string, service::query_state& query_state, cql3::dialect d) {
    auto& client_state = query_state.get_client_state();
    return prepare(std::move(query_string), client_state, d);
 }

-future<::shared_ptr<cql_transport::messages::result_message::prepared>>
-query_processor::prepare(sstring query_string, const service::client_state& client_state, cql3::dialect d) {
+future<::shared_ptr<cql_transport::messages::result_message::prepared>> query_processor::prepare(
+        sstring query_string, const service::client_state& client_state, cql3::dialect d) {
    try {
        auto key = compute_id(query_string, client_state.get_raw_keyspace(), d);
-        auto prep_ptr = co_await _prepared_cache.get(key, [this, &query_string, &client_state, d] {
-                auto prepared = get_statement(query_string, client_state, d);
-                prepared->calculate_metadata_id();
-                auto bound_terms = prepared->statement->get_bound_terms();
-                if (bound_terms > std::numeric_limits<uint16_t>::max()) {
-                    throw exceptions::invalid_request_exception(
-                            format("Too many markers(?). {:d} markers exceed the allowed maximum of {:d}",
-                                bound_terms,
-                                std::numeric_limits<uint16_t>::max()));
-                }
-                SCYLLA_ASSERT(bound_terms == prepared->bound_names.size());
-                return make_ready_future<std::unique_ptr<statements::prepared_statement>>(std::move(prepared));
-            });
+        auto prep_entry = co_await _prepared_cache.get_pinned(key, [this, &query_string, &client_state, d] {
+            auto prepared = get_statement(query_string, client_state, d);
+            prepared->calculate_metadata_id();
+            auto bound_terms = prepared->statement->get_bound_terms();
+            if (bound_terms > std::numeric_limits<uint16_t>::max()) {
+                throw exceptions::invalid_request_exception(
+                        format("Too many markers(?). {:d} markers exceed the allowed maximum of {:d}", bound_terms, std::numeric_limits<uint16_t>::max()));
+            }
+            throwing_assert(bound_terms == prepared->bound_names.size());
+            return make_ready_future<std::unique_ptr<statements::prepared_statement>>(std::move(prepared));
+        });

-        const auto& warnings = prep_ptr->warnings;
-        const auto msg = ::make_shared<result_message::prepared::cql>(prepared_cache_key_type::cql_id(key), std::move(prep_ptr),
-                    client_state.is_protocol_extension_set(cql_transport::cql_protocol_extension::LWT_ADD_METADATA_MARK));
-        for (const auto& w : warnings) {
-            msg->add_warning(w);
-        }
-        co_return ::shared_ptr<cql_transport::messages::result_message::prepared>(std::move(msg));
-    } catch(typename prepared_statements_cache::statement_is_too_big&) {
+        co_await utils::get_local_injector().inject("query_processor_prepare_wait_after_cache_get", utils::wait_for_message(std::chrono::seconds(60)));
+
+        auto msg = ::make_shared<result_message::prepared::cql>(prepared_cache_key_type::cql_id(key), std::move(prep_entry),
+                client_state.is_protocol_extension_set(cql_transport::cql_protocol_extension::LWT_ADD_METADATA_MARK));
+        co_return std::move(msg);
+    } catch (typename prepared_statements_cache::statement_is_too_big&) {
        throw prepared_statement_is_too_big(query_string);
    }
 }
@@ -729,15 +748,15 @@ static std::string hash_target(std::string_view query_string, std::string_view k
    return ret;
 }

-prepared_cache_key_type query_processor::compute_id(
-        std::string_view query_string,
-        std::string_view keyspace,
-        dialect d) {
+prepared_cache_key_type query_processor::compute_id(std::string_view query_string, std::string_view keyspace, dialect d) {
    return prepared_cache_key_type(md5_hasher::calculate(hash_target(query_string, keyspace)), d);
 }

-std::unique_ptr<prepared_statement>
-query_processor::get_statement(const std::string_view& query, const service::client_state& client_state, dialect d) {
+std::unique_ptr<prepared_statement> query_processor::get_statement(const std::string_view& query, const service::client_state& client_state, dialect d) {
+    // Measuring allocation cost requires that no yield points exist
+    // between bytes_before and bytes_after. It needs fixing if this
+    // function is ever futurized.
+    auto bytes_before = seastar::memory::stats().total_bytes_allocated();
    std::unique_ptr<raw::parsed_statement> statement = parse_statement(query, d);

    // Set keyspace for statement that require login
@@ -753,11 +772,12 @@ query_processor::get_statement(const std::string_view& query, const service::cli
        audit_info->set_query_string(query);
        p->statement->sanitize_audit_info();
    }
+    auto bytes_after = seastar::memory::stats().total_bytes_allocated();
+    _parsing_cost_tracker.add_sample(bytes_after - bytes_before);
    return p;
 }

-std::unique_ptr<raw::parsed_statement>
-query_processor::parse_statement(const std::string_view& query, dialect d) {
+std::unique_ptr<raw::parsed_statement> query_processor::parse_statement(const std::string_view& query, dialect d) {
    try {
        {
            const char* error_injection_key = "query_processor-parse_statement-test_failure";
@@ -782,8 +802,7 @@ query_processor::parse_statement(const std::string_view& query, dialect d) {
    }
 }

-std::vector<std::unique_ptr<raw::parsed_statement>>
-query_processor::parse_statements(std::string_view queries, dialect d) {
+std::vector<std::unique_ptr<raw::parsed_statement>> query_processor::parse_statements(std::string_view queries, dialect d) {
    try {
        auto statements = util::do_with_parser(queries, d, std::mem_fn(&cql3_parser::CqlParser::queries));
        if (statements.empty()) {
@@ -812,15 +831,10 @@ std::pair<std::reference_wrapper<struct query_processor::remote>, gate::holder>
    on_internal_error(log, "attempted to perform distributed query when `query_processor::remote` is unavailable");
 }

-query_options query_processor::make_internal_options(
-        const statements::prepared_statement::checked_weak_ptr& p,
-        const std::vector<data_value_or_unset>& values,
-        db::consistency_level cl,
-        int32_t page_size,
-        service::node_local_only node_local_only) const {
+query_options query_processor::make_internal_options(const statements::prepared_statement::checked_weak_ptr& p, const std::vector<data_value_or_unset>& values,
+        db::consistency_level cl, int32_t page_size, service::node_local_only node_local_only) const {
    if (p->bound_names.size() != values.size()) {
-        throw std::invalid_argument(
-                format("Invalid number of values. Expecting {:d} but got {:d}", p->bound_names.size(), values.size()));
+        throw std::invalid_argument(format("Invalid number of values. Expecting {:d} but got {:d}", p->bound_names.size(), values.size()));
    }
    auto ni = p->bound_names.begin();
    raw_value_vector_with_unset bound_values;
@@ -828,32 +842,28 @@ query_options query_processor::make_internal_options(
    bound_values.unset.resize(values.size());
    for (auto& var : values) {
        auto& n = *ni;
-        std::visit(overloaded_functor {
-            [&] (const data_value& v) {
-                if (v.type() == bytes_type) {
-                    bound_values.values.emplace_back(cql3::raw_value::make_value(value_cast<bytes>(v)));
-                } else if (v.is_null()) {
-                    bound_values.values.emplace_back(cql3::raw_value::make_null());
-                } else {
-                    bound_values.values.emplace_back(cql3::raw_value::make_value(n->type->decompose(v)));
-                }
-            }, [&] (const unset_value&) {
-                bound_values.values.emplace_back(cql3::raw_value::make_null());
-                bound_values.unset[std::distance(p->bound_names.begin(), ni)] = true;
-            }
-        }, var);
+        std::visit(overloaded_functor{[&](const data_value& v) {
+                                          if (v.type() == bytes_type) {
+                                              bound_values.values.emplace_back(cql3::raw_value::make_value(value_cast<bytes>(v)));
+                                          } else if (v.is_null()) {
+                                              bound_values.values.emplace_back(cql3::raw_value::make_null());
+                                          } else {
+                                              bound_values.values.emplace_back(cql3::raw_value::make_value(n->type->decompose(v)));
+                                          }
+                                      },
+                           [&](const unset_value&) {
+                               bound_values.values.emplace_back(cql3::raw_value::make_null());
+                               bound_values.unset[std::distance(p->bound_names.begin(), ni)] = true;
+                           }},
+                var);
        ++ni;
    }
-    return query_options(
-            cl,
-            std::move(bound_values),
-            cql3::query_options::specific_options {
-                .page_size = page_size,
-                .state = {},
-                .serial_consistency = db::consistency_level::SERIAL,
-                .timestamp = api::missing_timestamp,
-                .node_local_only = node_local_only
-            });
+    return query_options(cl, std::move(bound_values),
+            cql3::query_options::specific_options{.page_size = page_size,
+                    .state = {},
+                    .serial_consistency = db::consistency_level::SERIAL,
+                    .timestamp = api::missing_timestamp,
+                    .node_local_only = node_local_only});
 }

 statements::prepared_statement::checked_weak_ptr query_processor::prepare_internal(const sstring& query_string) {
@@ -875,11 +885,7 @@ struct internal_query_state {
 };

 internal_query_state query_processor::create_paged_state(
-        const sstring& query_string,
-        db::consistency_level cl,
-        const data_value_list& values,
-        int32_t page_size,
-        std::optional<service::query_state> qs) {
+        const sstring& query_string, db::consistency_level cl, const data_value_list& values, int32_t page_size, std::optional<service::query_state> qs) {
    auto p = prepare_internal(query_string);
    auto opts = make_internal_options(p, values, cl, page_size);
    if (!qs) {
@@ -893,8 +899,7 @@ bool query_processor::has_more_results(cql3::internal_query_state& state) const
 }

 future<> query_processor::for_each_cql_result(
-        cql3::internal_query_state& state,
-        noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set::row&)> f) {
+        cql3::internal_query_state& state, noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set::row&)> f) {
    do {
        auto msg = co_await execute_paged_internal(state);
        for (auto& row : *msg) {
@@ -905,17 +910,18 @@ future<> query_processor::for_each_cql_result(
    } while (has_more_results(state));
 }

-future<::shared_ptr<untyped_result_set>>
-query_processor::execute_paged_internal(internal_query_state& state) {
+future<::shared_ptr<untyped_result_set>> query_processor::execute_paged_internal(internal_query_state& state) {
    state.p->statement->validate(*this, service::client_state::for_internal_calls());
-    ::shared_ptr<cql_transport::messages::result_message> msg =
-      co_await state.p->statement->execute(*this, *state.qs, *state.opts, std::nullopt);
+    ::shared_ptr<cql_transport::messages::result_message> msg = co_await state.p->statement->execute(*this, *state.qs, *state.opts, std::nullopt);

    class visitor : public result_message::visitor_base {
        internal_query_state& _state;
        query_processor& _qp;
+
    public:
-        visitor(internal_query_state& state, query_processor& qp) : _state(state), _qp(qp) {
+        visitor(internal_query_state& state, query_processor& qp)
+            : _state(state)
+            , _qp(qp) {
        }
        virtual ~visitor() = default;
        void visit(const result_message::rows& rmrs) override {
@@ -944,23 +950,14 @@ query_processor::execute_paged_internal(internal_query_state& state) {
    co_return ::make_shared<untyped_result_set>(msg);
 }

-future<::shared_ptr<untyped_result_set>>
-query_processor::execute_internal(
-        const sstring& query_string,
-        db::consistency_level cl,
-        const data_value_list& values,
-        cache_internal cache) {
+future<::shared_ptr<untyped_result_set>> query_processor::execute_internal(
+        const sstring& query_string, db::consistency_level cl, const data_value_list& values, cache_internal cache) {
    auto qs = query_state_for_internal_call();
    co_return co_await execute_internal(query_string, cl, qs, values, cache);
 }

-future<::shared_ptr<untyped_result_set>>
-query_processor::execute_internal(
-        const sstring& query_string,
-        db::consistency_level cl,
-        service::query_state& query_state,
-        const data_value_list& values,
-        cache_internal cache) {
+future<::shared_ptr<untyped_result_set>> query_processor::execute_internal(
+        const sstring& query_string, db::consistency_level cl, service::query_state& query_state, const data_value_list& values, cache_internal cache) {

    if (log.is_enabled(logging::log_level::trace)) {
        log.trace("execute_internal: {}\"{}\" ({})", cache ? "(cached) " : "", query_string, fmt::join(values, ", "));
@@ -978,10 +975,7 @@ query_processor::execute_internal(
 }

 future<utils::chunked_vector<mutation>> query_processor::get_mutations_internal(
-        const sstring query_string,
-        service::query_state& query_state,
-        api::timestamp_type timestamp,
-        std::vector<data_value_or_unset> values) {
+        const sstring query_string, service::query_state& query_state, api::timestamp_type timestamp, std::vector<data_value_or_unset> values) {
    log.debug("get_mutations_internal: \"{}\" ({})", query_string, fmt::join(values, ", "));
    auto stmt = prepare_internal(query_string);
    auto mod_stmt = dynamic_pointer_cast<cql3::statements::modification_statement>(stmt->statement);
@@ -999,12 +993,8 @@ future<utils::chunked_vector<mutation>> query_processor::get_mutations_internal(
    co_return co_await mod_stmt->get_mutations(*this, opts, timeout, true, timestamp, query_state, json_cache, std::move(keys));
 }

-future<::shared_ptr<untyped_result_set>>
-query_processor::execute_with_params(
-        statements::prepared_statement::checked_weak_ptr p,
-        db::consistency_level cl,
-        service::query_state& query_state,
-        const data_value_list& values) {
+future<::shared_ptr<untyped_result_set>> query_processor::execute_with_params(
+        statements::prepared_statement::checked_weak_ptr p, db::consistency_level cl, service::query_state& query_state, const data_value_list& values) {
    auto opts = make_internal_options(p, values, cl);
    auto statement = p->statement;

@@ -1012,30 +1002,24 @@ query_processor::execute_with_params(
    co_return ::make_shared<untyped_result_set>(msg);
 }

-future<::shared_ptr<result_message>>
-query_processor::do_execute_with_params(
-        service::query_state& query_state,
-        shared_ptr<cql_statement> statement,
-        const query_options& options, std::optional<service::group0_guard> guard) {
+future<::shared_ptr<result_message>> query_processor::do_execute_with_params(
+        service::query_state& query_state, shared_ptr<cql_statement> statement, const query_options& options, std::optional<service::group0_guard> guard) {
    statement->validate(*this, service::client_state::for_internal_calls());
    co_return co_await coroutine::try_future(statement->execute(*this, query_state, options, std::move(guard)));
 }


-future<::shared_ptr<cql_transport::messages::result_message>>
-query_processor::execute_batch_without_checking_exception_message(
-        ::shared_ptr<statements::batch_statement> batch,
-        service::query_state& query_state,
-        query_options& options,
+future<::shared_ptr<cql_transport::messages::result_message>> query_processor::execute_batch_without_checking_exception_message(
+        ::shared_ptr<statements::batch_statement> batch, service::query_state& query_state, query_options& options,
        std::unordered_map<prepared_cache_key_type, authorized_prepared_statements_cache::value_type> pending_authorization_entries) {
    auto access_future = co_await coroutine::as_future(batch->check_access(*this, query_state.get_client_state()));
-    co_await coroutine::parallel_for_each(pending_authorization_entries, [this, &query_state] (auto& e) -> future<> {
-            try {
-                co_await _authorized_prepared_cache.insert(*query_state.get_client_state().user(), e.first, std::move(e.second));
-            } catch (...) {
-                log.error("failed to cache the entry: {}", std::current_exception());
-            }
-        });
+    co_await coroutine::parallel_for_each(pending_authorization_entries, [this, &query_state](auto& e) -> future<> {
+        try {
+            co_await _authorized_prepared_cache.insert(*query_state.get_client_state().user(), e.first, std::move(e.second));
+        } catch (...) {
+            log.error("failed to cache the entry: {}", std::current_exception());
+        }
+    });
    bool failed = access_future.failed();
    co_await audit::inspect(batch, query_state, options, failed);
    if (access_future.failed()) {
@@ -1044,30 +1028,28 @@ query_processor::execute_batch_without_checking_exception_message(
    batch->validate();
    batch->validate(*this, query_state.get_client_state());
    _stats.queries_by_cl[size_t(options.get_consistency())] += batch->get_statements().size();
-   if (log.is_enabled(logging::log_level::trace)) {
+    if (log.is_enabled(logging::log_level::trace)) {
        std::ostringstream oss;
-        for (const auto& s: batch->get_statements()) {
-            oss << std::endl <<  s.statement->raw_cql_statement;
+        for (const auto& s : batch->get_statements()) {
+            oss << std::endl << s.statement->raw_cql_statement;
        }
        log.trace("execute_batch({}): {}", batch->get_statements().size(), oss.str());
    }
    co_return co_await batch->execute(*this, query_state, options, std::nullopt);
 }

-future<service::broadcast_tables::query_result>
-query_processor::execute_broadcast_table_query(const service::broadcast_tables::query& query) {
+future<service::broadcast_tables::query_result> query_processor::execute_broadcast_table_query(const service::broadcast_tables::query& query) {
    auto [remote_, holder] = remote();
    co_return co_await service::broadcast_tables::execute(remote_.get().group0_client, query);
 }

-future<query::mapreduce_result>
-query_processor::mapreduce(query::mapreduce_request req, tracing::trace_state_ptr tr_state) {
+future<query::mapreduce_result> query_processor::mapreduce(query::mapreduce_request req, tracing::trace_state_ptr tr_state) {
    auto [remote_, holder] = remote();
    co_return co_await remote_.get().mapreducer.dispatch(std::move(req), std::move(tr_state));
 }

-future<::shared_ptr<messages::result_message>>
-query_processor::execute_schema_statement(const statements::schema_altering_statement& stmt, service::query_state& state, const query_options& options, service::group0_batch& mc) {
+future<::shared_ptr<messages::result_message>> query_processor::execute_schema_statement(
+        const statements::schema_altering_statement& stmt, service::query_state& state, const query_options& options, service::group0_batch& mc) {
    if (this_shard_id() != 0) {
        on_internal_error(log, "DDL must be executed on shard 0");
    }
@@ -1121,7 +1103,8 @@ future<> query_processor::announce_schema_statement(const statements::schema_alt
    co_await remote_.get().mm.announce(std::move(m), std::move(guard), description);
 }

-query_processor::migration_subscriber::migration_subscriber(query_processor* qp) : _qp{qp} {
+query_processor::migration_subscriber::migration_subscriber(query_processor* qp)
+    : _qp{qp} {
 }

 void query_processor::migration_subscriber::on_create_keyspace(const sstring& ks_name) {
@@ -1147,10 +1130,7 @@ void query_processor::migration_subscriber::on_create_view(const sstring& ks_nam
 void query_processor::migration_subscriber::on_update_keyspace(const sstring& ks_name) {
 }

-void query_processor::migration_subscriber::on_update_column_family(
-        const sstring& ks_name,
-        const sstring& cf_name,
-        bool columns_changed) {
+void query_processor::migration_subscriber::on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool columns_changed) {
    // #1255: Ignoring columns_changed deliberately.
    log.info("Column definitions for {}.{} changed, invalidating related prepared statements", ks_name, cf_name);
    remove_invalid_prepared_statements(ks_name, cf_name);
@@ -1165,9 +1145,7 @@ void query_processor::migration_subscriber::on_update_function(const sstring& ks
 void query_processor::migration_subscriber::on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) {
 }

-void query_processor::migration_subscriber::on_update_view(
-        const sstring& ks_name,
-        const sstring& view_name, bool columns_changed) {
+void query_processor::migration_subscriber::on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed) {
    // scylladb/scylladb#16392 - Materialized views are also tables so we need at least handle
    // them as such when changed.
    on_update_column_family(ks_name, view_name, columns_changed);
@@ -1196,48 +1174,54 @@ void query_processor::migration_subscriber::on_drop_view(const sstring& ks_name,
    remove_invalid_prepared_statements(ks_name, view_name);
 }

-void query_processor::migration_subscriber::remove_invalid_prepared_statements(
-        sstring ks_name,
-        std::optional<sstring> cf_name) {
-    _qp->_prepared_cache.remove_if([&] (::shared_ptr<cql_statement> stmt) {
+void query_processor::migration_subscriber::remove_invalid_prepared_statements(sstring ks_name, std::optional<sstring> cf_name) {
+    _qp->_prepared_cache.remove_if([&](::shared_ptr<cql_statement> stmt) {
        return this->should_invalidate(ks_name, cf_name, stmt);
    });
 }

-bool query_processor::migration_subscriber::should_invalidate(
-        sstring ks_name,
-        std::optional<sstring> cf_name,
-        ::shared_ptr<cql_statement> statement) {
+bool query_processor::migration_subscriber::should_invalidate(sstring ks_name, std::optional<sstring> cf_name, ::shared_ptr<cql_statement> statement) {
    return statement->depends_on(ks_name, cf_name);
 }

-future<> query_processor::query_internal(
-        const sstring& query_string,
-        db::consistency_level cl,
-        const data_value_list& values,
-        int32_t page_size,
-        noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f,
-        std::optional<service::query_state> qs) {
+future<> query_processor::query_internal(const sstring& query_string, db::consistency_level cl, const data_value_list& values, int32_t page_size,
+        noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f, std::optional<service::query_state> qs) {
    auto query_state = create_paged_state(query_string, cl, values, page_size, std::move(qs));
    co_return co_await for_each_cql_result(query_state, std::move(f));
 }

-future<> query_processor::query_internal(
-        const sstring& query_string,
-        noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f) {
+future<> query_processor::query_internal(const sstring& query_string, noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f) {
    return query_internal(query_string, db::consistency_level::ONE, {}, 1000, std::move(f));
 }

-shared_ptr<cql_transport::messages::result_message> query_processor::bounce_to_shard(unsigned shard, cql3::computed_function_values cached_fn_calls) {
-    _proxy.get_stats().replica_cross_shard_ops++;
-    return ::make_shared<cql_transport::messages::result_message::bounce_to_shard>(shard, std::move(cached_fn_calls));
+shared_ptr<cql_transport::messages::result_message> query_processor::bounce_to_shard(
+        unsigned shard, cql3::computed_function_values cached_fn_calls, bool track) {
+    if (track) {
+        _proxy.get_stats().replica_cross_shard_ops++;
+    }
+    const auto my_host_id = _proxy.get_token_metadata_ptr()->get_topology().my_host_id();
+    return ::make_shared<cql_transport::messages::result_message::bounce>(my_host_id, shard, std::move(cached_fn_calls));
+}
+
+shared_ptr<cql_transport::messages::result_message> query_processor::bounce_to_node(
+        locator::tablet_replica replica, cql3::computed_function_values cached_fn_calls, seastar::lowres_clock::time_point timeout, bool is_write) {
+    get_cql_stats().forwarded_requests++;
+    return ::make_shared<cql_transport::messages::result_message::bounce>(replica.host, replica.shard, std::move(cached_fn_calls), timeout, is_write);
+}
+
+query_processor::consistency_level_set query_processor::to_consistency_level_set(const query_processor::cl_option_list& levels) {
+    query_processor::consistency_level_set result;
+    for (const auto& opt : levels) {
+        result.set(static_cast<db::consistency_level>(opt));
+    }
+    return result;
 }

 void query_processor::update_authorized_prepared_cache_config() {
    utils::loading_cache_config cfg;
    cfg.max_size = _mcfg.authorized_prepared_cache_size;
    cfg.expiry = std::min(std::chrono::milliseconds(_db.get_config().permissions_validity_in_ms()),
-                          std::chrono::duration_cast<std::chrono::milliseconds>(prepared_statements_cache::entry_expiry));
+            std::chrono::duration_cast<std::chrono::milliseconds>(prepared_statements_cache::entry_expiry));
    cfg.refresh = std::chrono::milliseconds(_db.get_config().permissions_update_interval_in_ms());

    if (!_authorized_prepared_cache.update_config(std::move(cfg))) {
@@ -1249,4 +1233,4 @@ void query_processor::reset_cache() {
    _authorized_prepared_cache.reset();
 }

-}
+} // namespace cql3
--- a/cql3/query_processor.hh
+++ b/cql3/query_processor.hh
@@ -31,9 +31,12 @@
 #include "vector_search/vector_store_client.hh"
 #include "utils/assert.hh"
 #include "utils/observable.hh"
+#include "utils/rolling_max_tracker.hh"
 #include "service/raft/raft_group0_client.hh"
 #include "types/types.hh"
-#include "db/auth_version.hh"
+#include "db/consistency_level_type.hh"
+#include "db/config.hh"
+#include "utils/enum_option.hh"
 #include "service/storage_proxy_fwd.hh"


@@ -132,6 +135,9 @@ private:
    prepared_statements_cache _prepared_cache;
    authorized_prepared_statements_cache _authorized_prepared_cache;

+    // Tracks the rolling maximum of gross bytes allocated during CQL parsing
+    utils::rolling_max_tracker _parsing_cost_tracker{1000};
+
    std::function<void(uint32_t)> _auth_prepared_cache_cfg_cb;
    serialized_action _authorized_prepared_cache_config_action;
    utils::observer<uint32_t> _authorized_prepared_cache_update_interval_in_ms_observer;
@@ -142,6 +148,30 @@ private:
    std::unordered_map<sstring, std::unique_ptr<statements::prepared_statement>> _internal_statements;

    lang::manager& _lang_manager;
+
+    using cl_option_list = std::vector<enum_option<db::consistency_level_restriction_t>>;
+
+    /// Efficient bitmask-based set of consistency levels.
+    using consistency_level_set = enum_set<super_enum<db::consistency_level,
+        db::consistency_level::ANY,
+        db::consistency_level::ONE,
+        db::consistency_level::TWO,
+        db::consistency_level::THREE,
+        db::consistency_level::QUORUM,
+        db::consistency_level::ALL,
+        db::consistency_level::LOCAL_QUORUM,
+        db::consistency_level::EACH_QUORUM,
+        db::consistency_level::SERIAL,
+        db::consistency_level::LOCAL_SERIAL,
+        db::consistency_level::LOCAL_ONE>>;
+
+
+    consistency_level_set _write_consistency_levels_warned;
+    consistency_level_set _write_consistency_levels_disallowed;
+    utils::observer<cl_option_list> _write_consistency_levels_warned_observer;
+    utils::observer<cl_option_list> _write_consistency_levels_disallowed_observer;
+
+    static consistency_level_set to_consistency_level_set(const cl_option_list& levels);
 public:
    static const sstring CQL_VERSION;

@@ -186,6 +216,11 @@ public:
        return _cql_stats;
    }

+    /// Returns the estimated peak memory cost of CQL parsing.
+    size_t parsing_cost_estimate() const noexcept {
+        return _parsing_cost_tracker.current_max();
+    }
+
    lang::manager& lang() { return _lang_manager; }

    const vector_search::vector_store_client& vector_store_client() const noexcept {
@@ -196,8 +231,6 @@ public:
        return _vector_store_client;
    }

-    db::auth_version_t auth_version;
-
    statements::prepared_statement::checked_weak_ptr get_prepared(const std::optional<auth::authenticated_user>& user, const prepared_cache_key_type& key) {
        if (user) {
            auto vp = _authorized_prepared_cache.find(*user, key);
@@ -477,7 +510,12 @@ public:

    friend class migration_subscriber;

-    shared_ptr<cql_transport::messages::result_message> bounce_to_shard(unsigned shard, cql3::computed_function_values cached_fn_calls);
+    shared_ptr<cql_transport::messages::result_message> bounce_to_shard(unsigned shard, cql3::computed_function_values cached_fn_calls, bool track = true);
+    shared_ptr<cql_transport::messages::result_message> bounce_to_node(
+            locator::tablet_replica replica,
+            cql3::computed_function_values cached_fn_calls,
+            seastar::lowres_clock::time_point timeout,
+            bool is_write);

    void update_authorized_prepared_cache_config();

@@ -493,6 +531,21 @@ public:
            int32_t page_size = -1,
            service::node_local_only node_local_only = service::node_local_only::no) const;

+    enum class write_consistency_guardrail_state { NONE, WARN, FAIL };
+    inline write_consistency_guardrail_state check_write_consistency_levels_guardrail(db::consistency_level cl) {
+        _cql_stats.writes_per_consistency_level[size_t(cl)]++;
+
+        if (_write_consistency_levels_disallowed.contains(cl)) [[unlikely]] {
+            _cql_stats.write_consistency_levels_disallowed_violations++;
+            return write_consistency_guardrail_state::FAIL;
+        }
+        if (_write_consistency_levels_warned.contains(cl)) [[unlikely]] {
+            _cql_stats.write_consistency_levels_warned_violations++;
+            return write_consistency_guardrail_state::WARN;
+        }
+        return write_consistency_guardrail_state::NONE;
+    }
+
 private:
    // Keep the holder until you stop using the `remote` services.
    std::pair<std::reference_wrapper<remote>, gate::holder> remote();
--- a/cql3/restrictions/bounds_slice.hh
+++ b/cql3/restrictions/bounds_slice.hh
@@ -89,10 +89,10 @@ public:
     */
    void merge(const bounds_slice& other) {
        if (has_bound(statements::bound::START)) {
-            SCYLLA_ASSERT(!other.has_bound(statements::bound::START));
+            throwing_assert(!other.has_bound(statements::bound::START));
            _bounds[get_idx(statements::bound::END)] = other._bounds[get_idx(statements::bound::END)];
        } else {
-            SCYLLA_ASSERT(!other.has_bound(statements::bound::END));
+            throwing_assert(!other.has_bound(statements::bound::END));
            _bounds[get_idx(statements::bound::START)] = other._bounds[get_idx(statements::bound::START)];
        }
    }
--- a/cql3/result_set.cc
+++ b/cql3/result_set.cc
@@ -61,7 +61,7 @@ void metadata::set_paging_state(lw_shared_ptr<const service::pager::paging_state
 }

 void metadata::maybe_set_paging_state(lw_shared_ptr<const service::pager::paging_state> paging_state) {
-    SCYLLA_ASSERT(paging_state);
+    throwing_assert(paging_state);
    if (paging_state->get_remaining() > 0) {
        set_paging_state(std::move(paging_state));
    } else {
@@ -138,7 +138,7 @@ bool result_set::empty() const {
 }

 void result_set::add_row(std::vector<managed_bytes_opt> row) {
-    SCYLLA_ASSERT(row.size() == _metadata->value_count());
+    throwing_assert(row.size() == _metadata->value_count());
    _rows.emplace_back(std::move(row));
 }

--- a/cql3/selection/selection.cc
+++ b/cql3/selection/selection.cc
@@ -212,11 +212,20 @@ public:
    }

    virtual uint32_t add_column_for_post_processing(const column_definition& c) override {
-        uint32_t index = selection::add_column_for_post_processing(c);
+        auto it = std::find_if(_selectors.begin(), _selectors.end(), [&c](const expr::expression& e) {
+            auto col = expr::as_if<expr::column_value>(&e);
+            return col && col->col == &c;
+        });
+        if (it != _selectors.end()) {
+            return std::distance(_selectors.begin(), it);
+        }
+
+        add_column(c);
+        get_result_metadata()->add_non_serialized_column(c.column_specification);
        _selectors.push_back(expr::column_value(&c));
        if (_inner_loop.empty()) {
            // Simple case: no aggregation
-            return index;
+            return _selectors.size() - 1;
        } else {
            // Complex case: aggregation, must pass through temporary
            auto first_func = cql3::functions::aggregate_fcts::make_first_function(c.type);
@@ -470,10 +479,21 @@ std::vector<const column_definition*> selection::wildcard_columns(schema_ptr sch
    return simple_selection::make(schema, std::move(columns), false);
 }

-uint32_t selection::add_column_for_post_processing(const column_definition& c) {
+selection::add_column_result selection::add_column(const column_definition& c) {
+    auto index = index_of(c);
+    if (index != -1) {
+        return {index, false};
+    }
    _columns.push_back(&c);
-    _metadata->add_non_serialized_column(c.column_specification);
-    return _columns.size() - 1;
+    return {_columns.size() - 1, true};
+}
+
+uint32_t selection::add_column_for_post_processing(const column_definition& c) {
+    auto col = add_column(c);
+    if (col.added) {
+        _metadata->add_non_serialized_column(c.column_specification);
+    }
+    return col.index;
 }

 ::shared_ptr<selection> selection::from_selectors(data_dictionary::database db, schema_ptr schema, const sstring& ks, const std::vector<prepared_selector>& prepared_selectors) {
--- a/cql3/selection/selection.hh
+++ b/cql3/selection/selection.hh
@@ -130,6 +130,14 @@ public:
    virtual std::vector<shared_ptr<functions::function>> used_functions() const { return {}; }

    query::partition_slice::option_set get_query_options();
+protected:
+    // Result of add_column: index in _columns and whether it was added now (or existed already).
+    struct add_column_result {
+        uint32_t index;
+        bool added;
+    };
+    // Adds a column to the _columns if not already present, returns add_column_result.
+    add_column_result add_column(const column_definition& c);
 private:
    static bool processes_selection(const std::vector<prepared_selector>& prepared_selectors);

@@ -348,7 +356,7 @@ public:
                    add_value(*def, static_row_iterator);
                    break;
                default:
-                    SCYLLA_ASSERT(0);
+                    throwing_assert(0);
                }
            }
            _builder.complete_row();
--- a/cql3/sets.cc
+++ b/cql3/sets.cc
@@ -34,7 +34,7 @@ sets::setter::execute(mutation& m, const clustering_key_prefix& row_key, const u
 void
 sets::adder::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) {
    const cql3::raw_value value = expr::evaluate(*_e, params._options);
-    SCYLLA_ASSERT(column.type->is_multi_cell()); // "Attempted to add items to a frozen set";
+    throwing_assert(column.type->is_multi_cell()); // "Attempted to add items to a frozen set";
    do_add(m, row_key, params, value, column);
 }

@@ -77,7 +77,7 @@ sets::adder::do_add(mutation& m, const clustering_key_prefix& row_key, const upd

 void
 sets::discarder::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) {
-    SCYLLA_ASSERT(column.type->is_multi_cell()); // "Attempted to remove items from a frozen set";
+    throwing_assert(column.type->is_multi_cell()); // "Attempted to remove items from a frozen set";

    cql3::raw_value svalue = expr::evaluate(*_e, params._options);
    if (svalue.is_null()) {
@@ -98,7 +98,7 @@ sets::discarder::execute(mutation& m, const clustering_key_prefix& row_key, cons

 void sets::element_discarder::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params)
 {
-    SCYLLA_ASSERT(column.type->is_multi_cell() && "Attempted to remove items from a frozen set");
+    throwing_assert(column.type->is_multi_cell() && "Attempted to remove items from a frozen set");
    cql3::raw_value elt = expr::evaluate(*_e, params._options);
    if (elt.is_null()) {
        throw exceptions::invalid_request_exception("Invalid null set element");
--- a/cql3/statements/alter_table_statement.cc
+++ b/cql3/statements/alter_table_statement.cc
@@ -296,7 +296,7 @@ void alter_table_statement::drop_column(const query_options& options, const sche
 std::pair<schema_ptr, std::vector<view_ptr>> alter_table_statement::prepare_schema_update(data_dictionary::database db, const query_options& options) const {
    auto s = validation::validate_column_family(db, keyspace(), column_family());
    if (s->is_view()) {
-        throw exceptions::invalid_request_exception("Cannot use ALTER TABLE on Materialized View");
+        throw exceptions::invalid_request_exception("Cannot use ALTER TABLE on Materialized View. (Did you mean ALTER MATERIALIZED VIEW)?");
    }

    const bool is_cdc_log_table = cdc::is_log_for_some_table(db.real_database(), s->ks_name(), s->cf_name());
@@ -368,7 +368,7 @@ std::pair<schema_ptr, std::vector<view_ptr>> alter_table_statement::prepare_sche

    switch (_type) {
    case alter_table_statement::type::add:
-        SCYLLA_ASSERT(_column_changes.size());
+        throwing_assert(_column_changes.size());
        if (s->is_dense()) {
            throw exceptions::invalid_request_exception("Cannot add new column to a COMPACT STORAGE table");
        }
@@ -376,12 +376,12 @@ std::pair<schema_ptr, std::vector<view_ptr>> alter_table_statement::prepare_sche
        break;

    case alter_table_statement::type::alter:
-        SCYLLA_ASSERT(_column_changes.size() == 1);
+        throwing_assert(_column_changes.size() == 1);
        invoke_column_change_fn(std::mem_fn(&alter_table_statement::alter_column));
        break;

    case alter_table_statement::type::drop:
-        SCYLLA_ASSERT(_column_changes.size());
+        throwing_assert(_column_changes.size());
        if (!s->is_cql3_table()) {
            throw exceptions::invalid_request_exception("Cannot drop columns from a non-CQL3 table");
        }
--- a/cql3/statements/alter_view_statement.cc
+++ b/cql3/statements/alter_view_statement.cc
@@ -46,7 +46,7 @@ future<> alter_view_statement::check_access(query_processor& qp, const service::
 view_ptr alter_view_statement::prepare_view(data_dictionary::database db) const {
    schema_ptr schema = validation::validate_column_family(db, keyspace(), column_family());
    if (!schema->is_view()) {
-        throw exceptions::invalid_request_exception("Cannot use ALTER MATERIALIZED VIEW on Table");
+        throw exceptions::invalid_request_exception("Cannot use ALTER MATERIALIZED VIEW on Table. (Did you mean ALTER TABLE)?");
    }

    if (!_properties) {
--- a/cql3/statements/attach_service_level_statement.cc
+++ b/cql3/statements/attach_service_level_statement.cc
@@ -25,7 +25,7 @@ attach_service_level_statement::attach_service_level_statement(sstring service_l
 }

 bool attach_service_level_statement::needs_guard(query_processor& qp, service::query_state& state) const {
-    return !auth::legacy_mode(qp) || state.get_service_level_controller().is_v2();
+    return true;
 }

 std::unique_ptr<cql3::statements::prepared_statement>
--- a/cql3/statements/authentication_statement.cc
+++ b/cql3/statements/authentication_statement.cc
@@ -11,7 +11,6 @@
 #include "authentication_statement.hh"
 #include "transport/messages/result_message.hh"
 #include "cql3/query_processor.hh"
-#include "auth/common.hh"

 uint32_t cql3::statements::authentication_statement::get_bound_terms() const {
    return 0;
@@ -26,7 +25,7 @@ future<> cql3::statements::authentication_statement::check_access(query_processo
 }

 bool cql3::statements::authentication_altering_statement::needs_guard(query_processor& qp, service::query_state&) const {
-    return !auth::legacy_mode(qp);
+    return true;
 }

 audit::statement_category cql3::statements::authentication_statement::category() const {
--- a/cql3/statements/authorization_statement.cc
+++ b/cql3/statements/authorization_statement.cc
@@ -14,7 +14,6 @@
 #include "cql3/query_processor.hh"
 #include "exceptions/exceptions.hh"
 #include "db/cql_type_parser.hh"
-#include "auth/common.hh"

 uint32_t cql3::statements::authorization_statement::get_bound_terms() const {
    return 0;
@@ -74,7 +73,7 @@ void cql3::statements::authorization_statement::maybe_correct_resource(auth::res

 bool cql3::statements::authorization_altering_statement::needs_guard(
                query_processor& qp, service::query_state&) const {
-    return !auth::legacy_mode(qp);
+    return true;
 };

 audit::statement_category cql3::statements::authorization_statement::category() const {
--- a/cql3/statements/batch_statement.cc
+++ b/cql3/statements/batch_statement.cc
@@ -259,6 +259,18 @@ future<shared_ptr<cql_transport::messages::result_message>> batch_statement::do_
    if (options.getSerialConsistency() == null)
        throw new InvalidRequestException("Invalid empty serial consistency level");
 #endif
+
+    const auto cl = options.get_consistency();
+    const query_processor::write_consistency_guardrail_state guardrail_state = qp.check_write_consistency_levels_guardrail(cl);
+    if (guardrail_state == query_processor::write_consistency_guardrail_state::FAIL) {
+        return make_exception_future<shared_ptr<cql_transport::messages::result_message>>(
+                exceptions::invalid_request_exception(
+                        format("Write consistency level {} is forbidden by the current configuration "
+                               "setting of write_consistency_levels_disallowed. Please use a different "
+                               "consistency level, or remove {} from write_consistency_levels_disallowed "
+                               "set in the configuration.", cl, cl)));
+    }
+
    for (size_t i = 0; i < _statements.size(); ++i) {
        _statements[i].statement->restrictions().validate_primary_key(options.for_statement(i));
    }
@@ -266,23 +278,33 @@ future<shared_ptr<cql_transport::messages::result_message>> batch_statement::do_
    if (_has_conditions) {
        ++_stats.cas_batches;
        _stats.statements_in_cas_batches += _statements.size();
-        return execute_with_conditions(qp, options, query_state);
+        return execute_with_conditions(qp, options, query_state).then([guardrail_state, cl] (auto result) {
+            if (guardrail_state == query_processor::write_consistency_guardrail_state::WARN) {
+                result->add_warning(format("Using write consistency level {} listed on the "
+                                           "write_consistency_levels_warned is not recommended.", cl));
+            }
+            return result;
+        });
    }

    ++_stats.batches;
    _stats.statements_in_batches += _statements.size();

    auto timeout = db::timeout_clock::now() + get_timeout(query_state.get_client_state(), options);
-    return get_mutations(qp, options, timeout, local, now, query_state).then([this, &qp, &options, timeout, tr_state = query_state.get_trace_state(),
+    return get_mutations(qp, options, timeout, local, now, query_state).then([this, &qp, cl, timeout, tr_state = query_state.get_trace_state(),
                                                                                                                               permit = query_state.get_permit()] (utils::chunked_vector<mutation> ms) mutable {
-        return execute_without_conditions(qp, std::move(ms), options.get_consistency(), timeout, std::move(tr_state), std::move(permit));
-    }).then([] (coordinator_result<> res) {
+        return execute_without_conditions(qp, std::move(ms), cl, timeout, std::move(tr_state), std::move(permit));
+    }).then([guardrail_state, cl] (coordinator_result<> res) {
        if (!res) {
            return make_ready_future<shared_ptr<cql_transport::messages::result_message>>(
                    seastar::make_shared<cql_transport::messages::result_message::exception>(std::move(res).assume_error()));
        }
-        return make_ready_future<shared_ptr<cql_transport::messages::result_message>>(
-                make_shared<cql_transport::messages::result_message::void_message>());
+        auto result = make_shared<cql_transport::messages::result_message::void_message>();
+        if (guardrail_state == query_processor::write_consistency_guardrail_state::WARN) {
+            result->add_warning(format("Using write consistency level {} listed on the "
+                                       "write_consistency_levels_warned is not recommended.", cl));
+        }
+        return make_ready_future<shared_ptr<cql_transport::messages::result_message>>(std::move(result));
    });
 }

--- a/cql3/statements/broadcast_modification_statement.cc
+++ b/cql3/statements/broadcast_modification_statement.cc
@@ -20,6 +20,7 @@
 #include "cql3/attributes.hh"
 #include "cql3/expr/expression.hh"
 #include "cql3/expr/evaluate.hh"
+#include "cql3/query_options.hh"
 #include "cql3/query_processor.hh"
 #include "cql3/values.hh"
 #include "timeout_config.hh"
@@ -65,7 +66,7 @@ evaluate_prepared(
 future<::shared_ptr<cql_transport::messages::result_message>>
 broadcast_modification_statement::execute_without_checking_exception_message(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
    if (this_shard_id() != 0) {
-        co_return ::make_shared<cql_transport::messages::result_message::bounce_to_shard>(0, cql3::computed_function_values{});
+        co_return qp.bounce_to_shard(0, cql3::computed_function_values{}, false);
    }

    auto result = co_await qp.execute_broadcast_table_query(
--- a/cql3/statements/broadcast_select_statement.cc
+++ b/cql3/statements/broadcast_select_statement.cc
@@ -96,7 +96,7 @@ evaluate_prepared(
 future<::shared_ptr<cql_transport::messages::result_message>>
 broadcast_select_statement::execute_without_checking_exception_message(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
    if (this_shard_id() != 0) {
-        co_return ::make_shared<cql_transport::messages::result_message::bounce_to_shard>(0, cql3::computed_function_values{});
+        co_return qp.bounce_to_shard(0, cql3::computed_function_values{}, false);
    }

    auto result = co_await qp.execute_broadcast_table_query(
--- a/cql3/statements/cas_request.hh
+++ b/cql3/statements/cas_request.hh
@@ -51,7 +51,7 @@ public:
          , _key(std::move(key_arg))
          , _rows(schema_arg)
    {
-        SCYLLA_ASSERT(_key.size() == 1 && query::is_single_partition(_key.front()));
+        throwing_assert(_key.size() == 1 && query::is_single_partition(_key.front()));
    }

    dht::partition_range_vector key() const {
--- a/cql3/statements/cf_prop_defs.cc
+++ b/cql3/statements/cf_prop_defs.cc
@@ -59,6 +59,8 @@ const sstring cf_prop_defs::COMPACTION_ENABLED_KEY = "enabled";

 const sstring cf_prop_defs::KW_TABLETS = "tablets";

+const sstring cf_prop_defs::KW_STORAGE_ENGINE = "storage_engine";
+
 schema::extensions_map cf_prop_defs::make_schema_extensions(const db::extensions& exts) const {
    schema::extensions_map er;
    for (auto& p : exts.schema_extensions()) {
@@ -106,6 +108,7 @@ void cf_prop_defs::validate(const data_dictionary::database db, sstring ks_name,
        KW_BF_FP_CHANCE, KW_MEMTABLE_FLUSH_PERIOD, KW_COMPACTION,
        KW_COMPRESSION, KW_CRC_CHECK_CHANCE,  KW_ID, KW_PAXOSGRACESECONDS,
        KW_SYNCHRONOUS_UPDATES, KW_TABLETS,
+        KW_STORAGE_ENGINE,
    });
    static std::set<sstring> obsolete_keywords({
        sstring("index_interval"),
@@ -196,6 +199,20 @@ void cf_prop_defs::validate(const data_dictionary::database db, sstring ks_name,
        }
        db::tablet_options::validate(*tablet_options_map);
    }
+
+    if (has_property(KW_STORAGE_ENGINE)) {
+        auto storage_engine = get_string(KW_STORAGE_ENGINE, "");
+        if (storage_engine == "logstor") {
+            if (!db.features().logstor) {
+                throw exceptions::configuration_exception(format("The experimental feature 'logstor' must be enabled in order to use the 'logstor' storage engine."));
+            }
+            if (!db.get_config().enable_logstor()) {
+                throw exceptions::configuration_exception(format("The configuration option 'enable_logstor' must be set to true in the configuration in order to use the 'logstor' storage engine."));
+            }
+        } else {
+            throw exceptions::configuration_exception(format("Illegal value for '{}'", KW_STORAGE_ENGINE));
+        }
+    }
 }

 std::map<sstring, sstring> cf_prop_defs::get_compaction_type_options() const {
@@ -396,6 +413,13 @@ void cf_prop_defs::apply_to_builder(schema_builder& builder, schema::extensions_
    if (auto tablet_options_opt = get_map(KW_TABLETS)) {
        builder.set_tablet_options(std::move(*tablet_options_opt));
    }
+
+    if (has_property(KW_STORAGE_ENGINE)) {
+        auto storage_engine = get_string(KW_STORAGE_ENGINE, "");
+        if (storage_engine == "logstor") {
+            builder.set_logstor();
+        }
+    }
 }

 void cf_prop_defs::validate_minimum_int(const sstring& field, int32_t minimum_value, int32_t default_value) const
--- a/cql3/statements/cf_prop_defs.hh
+++ b/cql3/statements/cf_prop_defs.hh
@@ -64,6 +64,8 @@ public:

    static const sstring KW_TABLETS;

+    static const sstring KW_STORAGE_ENGINE;
+
    // FIXME: In origin the following consts are in CFMetaData.
    static constexpr int32_t DEFAULT_DEFAULT_TIME_TO_LIVE = 0;
    static constexpr int32_t DEFAULT_MIN_INDEX_INTERVAL = 128;
--- a/Show More
+++ b/Show More