test/auth_cluster: cover empty legacy table in service level upgrade

Add a cluster test that upgrades to raft topology with an empty legacy `system_distributed.service_levels` table and verifies that the migration still marks `service_level_version` as `2`.
service_levels: mark v2 migration complete on empty legacy table
2026-04-20 00:20:47 +00:00 · 2026-04-05 19:46:15 +03:00 · 2026-04-05 18:00:12 +03:00 · 2026-03-09 10:25:47 +02:00 · 2026-03-08 11:47:02 +02:00 · 2026-03-05 20:49:03 +02:00
603 changed files with 18799 additions and 5015 deletions
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -84,3 +84,14 @@ ninja build/<mode>/scylla
 - Strive for simplicity and clarity, add complexity only when clearly justified
 - Question requests: don't blindly implement requests - evaluate trade-offs, identify issues, and suggest better alternatives when appropriate
 - Consider different approaches, weigh pros and cons, and recommend the best fit for the specific context
+
+## Test Philosophy
+- Performance matters. Tests should run as quickly as possible. Sleeps in the code are highly discouraged and should be avoided, to reduce run time and flakiness.
+- Stability matters. Tests should be stable. New tests should be executed 100 times at least to ensure they pass 100 out of 100 times. (use --repeat 100 --max-failures 1 when running it)
+- Unit tests should ideally test one thing and one thing only.
+- Tests for bug fixes should run before the fix - and show the failure and after the fix - and show they now pass.
+- Tests for bug fixes should have in their comments which bug fixes (GitHub or JIRA issue) they test.
+- Tests in debug are always slower, so if needed, reduce number of iterations, rows, data used, cycles, etc. in debug mode.
+- Tests should strive to be repeatable, and not use random input that will make their results unpredictable.
+- Tests should consume as little resources as possible. Prefer running tests on a single node if it is sufficient, for example.
+
--- a/.github/workflows/backport-pr-fixes-validation.yaml
+++ b/.github/workflows/backport-pr-fixes-validation.yaml
@@ -18,7 +18,7 @@ jobs:
            
            // Regular expression pattern to check for "Fixes" prefix
            // Adjusted to dynamically insert the repository full name
-            const pattern = `Fixes:? ((?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)|([A-Z]+-\\d+))`;
+            const pattern = `Fixes:? ((?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)|(?:https://scylladb\\.atlassian\\.net/browse/)?([A-Z]+-\\d+))`;
            const regex = new RegExp(pattern);
            
            if (!regex.test(body)) {
--- a/.github/workflows/call_backport_with_jira.yaml
+++ b/.github/workflows/call_backport_with_jira.yaml
@@ -0,0 +1,53 @@
+name: Backport with Jira Integration
+
+on:
+  push:
+    branches:
+      - master
+      - next-*.*
+      - branch-*.*
+  pull_request_target:
+    types: [labeled, closed]
+    branches: 
+      - master
+      - next
+      - next-*.*
+      - branch-*.*
+
+jobs:
+  backport-on-push:
+    if: github.event_name == 'push'
+    uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
+    with:
+      event_type: 'push'
+      base_branch: ${{ github.ref }}
+      commits: ${{ github.event.before }}..${{ github.sha }}
+    secrets:
+      gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
+      jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
+
+  backport-on-label:
+    if: github.event_name == 'pull_request_target' && github.event.action == 'labeled'
+    uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
+    with:
+      event_type: 'labeled'
+      base_branch: refs/heads/${{ github.event.pull_request.base.ref }}
+      pull_request_number: ${{ github.event.pull_request.number }}
+      head_commit: ${{ github.event.pull_request.base.sha }}
+      label_name: ${{ github.event.label.name }}
+      pr_state: ${{ github.event.pull_request.state }}
+    secrets:
+      gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
+      jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
+
+  backport-chain:
+    if: github.event_name == 'pull_request_target' && github.event.action == 'closed' && github.event.pull_request.merged == true
+    uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
+    with:
+      event_type: 'chain'
+      base_branch: refs/heads/${{ github.event.pull_request.base.ref }}
+      pull_request_number: ${{ github.event.pull_request.number }}
+      pr_body: ${{ github.event.pull_request.body }}
+    secrets:
+      gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
+      jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
--- a/.github/workflows/docs-pages.yaml
+++ b/.github/workflows/docs-pages.yaml
@@ -18,6 +18,8 @@ on:

 jobs:
  release:
+    permissions:
+      contents: write
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
--- a/.github/workflows/docs-pr.yaml
+++ b/.github/workflows/docs-pr.yaml
@@ -2,6 +2,9 @@ name: "Docs / Build PR"
 # For more information,
 # see https://sphinx-theme.scylladb.com/stable/deployment/production.html#available-workflows

+permissions:
+  contents: read
+
 env:
  FLAG: ${{ github.repository == 'scylladb/scylla-enterprise' && 'enterprise' || 'opensource' }}

--- a/.github/workflows/docs-validate-metrics.yml
+++ b/.github/workflows/docs-validate-metrics.yml
@@ -1,5 +1,8 @@
 name: Docs / Validate metrics

+permissions:
+  contents: read
+
 on:
  pull_request:
    branches:
--- a/.github/workflows/read-toolchain.yaml
+++ b/.github/workflows/read-toolchain.yaml
@@ -10,6 +10,8 @@ on:
 jobs:
  read-toolchain:
    runs-on: ubuntu-latest
+    permissions:
+      contents: read
    outputs:
      image: ${{ steps.read.outputs.image }}
    steps:
--- a/.github/workflows/trigger-scylla-ci.yaml
+++ b/.github/workflows/trigger-scylla-ci.yaml
@@ -9,16 +9,53 @@ on:

 jobs:
  trigger-jenkins:
-    if: (github.event.comment.user.login != 'scylladbbot' && contains(github.event.comment.body, '@scylladbbot') && contains(github.event.comment.body, 'trigger-ci')) || github.event.label.name == 'conflicts'
+    if: (github.event_name == 'issue_comment' && github.event.comment.user.login != 'scylladbbot') || github.event.label.name == 'conflicts'
    runs-on: ubuntu-latest
    steps:
+      - name: Verify Org Membership
+        id: verify_author
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        shell: bash
+        run: |
+          if [[ "${{ github.event_name }}" == "pull_request_target" ]]; then
+            AUTHOR="${{ github.event.pull_request.user.login }}"
+          else
+            AUTHOR="${{ github.event.comment.user.login }}"
+          fi
+          ORG="scylladb"
+          if gh api "/orgs/${ORG}/members/${AUTHOR}" --silent 2>/dev/null; then
+            echo "member=true" >> $GITHUB_OUTPUT
+          else
+            echo "::warning::${AUTHOR} is not a member of ${ORG}; skipping CI trigger."
+            echo "member=false" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Validate Comment Trigger
+        if: github.event_name == 'issue_comment'
+        id: verify_comment
+        shell: bash
+        run: |
+          BODY=$(cat << 'EOF'
+          ${{ github.event.comment.body }}
+          EOF
+          )
+          CLEAN_BODY=$(echo "$BODY" | grep -v '^[[:space:]]*>')
+
+          if echo "$CLEAN_BODY" | grep -qi '@scylladbbot' && echo "$CLEAN_BODY" | grep -qi 'trigger-ci'; then
+            echo "trigger=true" >> $GITHUB_OUTPUT
+          else
+            echo "trigger=false" >> $GITHUB_OUTPUT
+          fi
+
      - name: Trigger Scylla-CI-Route Jenkins Job
+        if: steps.verify_author.outputs.member == 'true' && (github.event_name == 'pull_request_target' || steps.verify_comment.outputs.trigger == 'true')
        env:
          JENKINS_USER: ${{ secrets.JENKINS_USERNAME }}
          JENKINS_API_TOKEN: ${{ secrets.JENKINS_TOKEN }}
          JENKINS_URL: "https://jenkins.scylladb.com"
+          PR_NUMBER: "${{ github.event.issue.number || github.event.pull_request.number }}"
+          PR_REPO_NAME: "${{ github.event.repository.full_name }}"
        run: |
-          PR_NUMBER=${{ github.event.issue.number }}
-          PR_REPO_NAME=${{ github.event.repository.full_name }}
          curl -X POST "$JENKINS_URL/job/releng/job/Scylla-CI-Route/buildWithParameters?PR_NUMBER=$PR_NUMBER&PR_REPO_NAME=$PR_REPO_NAME" \
-          --user "$JENKINS_USER:$JENKINS_API_TOKEN" --fail -i -v
+            --user "$JENKINS_USER:$JENKINS_API_TOKEN" --fail
--- a/2
+++ b/2
@@ -78,7 +78,7 @@ fi

 # Default scylla product/version tags
 PRODUCT=scylla
-VERSION=2026.1.0-dev
+VERSION=2026.1.0

 if test -f version
 then
--- a/alternator/controller.cc
+++ b/alternator/controller.cc
@@ -105,11 +105,23 @@ future<> controller::start_server() {
            alternator_port = _config.alternator_port();
            _listen_addresses.push_back({addr, *alternator_port});
        }
+        std::optional<uint16_t> alternator_port_proxy_protocol;
+        if (_config.alternator_port_proxy_protocol()) {
+            alternator_port_proxy_protocol = _config.alternator_port_proxy_protocol();
+            _listen_addresses.push_back({addr, *alternator_port_proxy_protocol});
+        }
        std::optional<uint16_t> alternator_https_port;
+        std::optional<uint16_t> alternator_https_port_proxy_protocol;
        std::optional<tls::credentials_builder> creds;
-        if (_config.alternator_https_port()) {
-            alternator_https_port = _config.alternator_https_port();
-            _listen_addresses.push_back({addr, *alternator_https_port});
+        if (_config.alternator_https_port() || _config.alternator_https_port_proxy_protocol()) {
+            if (_config.alternator_https_port()) {
+                alternator_https_port = _config.alternator_https_port();
+                _listen_addresses.push_back({addr, *alternator_https_port});
+            }
+            if (_config.alternator_https_port_proxy_protocol()) {
+                alternator_https_port_proxy_protocol = _config.alternator_https_port_proxy_protocol();
+                _listen_addresses.push_back({addr, *alternator_https_port_proxy_protocol});
+            }
            creds.emplace();
            auto opts = _config.alternator_encryption_options();
            if (opts.empty()) {
@@ -135,20 +147,29 @@ future<> controller::start_server() {
            }
        }
        _server.invoke_on_all(
-                [this, addr, alternator_port, alternator_https_port, creds = std::move(creds)] (server& server) mutable {
-            return server.init(addr, alternator_port, alternator_https_port, creds,
+                [this, addr, alternator_port, alternator_https_port, alternator_port_proxy_protocol, alternator_https_port_proxy_protocol, creds = std::move(creds)] (server& server) mutable {
+            return server.init(addr, alternator_port, alternator_https_port, alternator_port_proxy_protocol, alternator_https_port_proxy_protocol, creds,
                    _config.alternator_enforce_authorization,
                    _config.alternator_warn_authorization,
                    _config.alternator_max_users_query_size_in_trace_output,
                    &_memory_limiter.local().get_semaphore(),
                    _config.max_concurrent_requests_per_shard);
-        }).handle_exception([this, addr, alternator_port, alternator_https_port] (std::exception_ptr ep) {
-            logger.error("Failed to set up Alternator HTTP server on {} port {}, TLS port {}: {}",
-                    addr, alternator_port ? std::to_string(*alternator_port) : "OFF", alternator_https_port ? std::to_string(*alternator_https_port) : "OFF", ep);
+        }).handle_exception([this, addr, alternator_port, alternator_https_port, alternator_port_proxy_protocol, alternator_https_port_proxy_protocol] (std::exception_ptr ep) {
+            logger.error("Failed to set up Alternator HTTP server on {} port {}, TLS port {}, proxy-protocol port {}, TLS proxy-protocol port {}: {}",
+                    addr,
+                    alternator_port ? std::to_string(*alternator_port) : "OFF",
+                    alternator_https_port ? std::to_string(*alternator_https_port) : "OFF",
+                    alternator_port_proxy_protocol ? std::to_string(*alternator_port_proxy_protocol) : "OFF",
+                    alternator_https_port_proxy_protocol ? std::to_string(*alternator_https_port_proxy_protocol) : "OFF",
+                    ep);
            return stop_server().then([ep = std::move(ep)] { return make_exception_future<>(ep); });
-        }).then([addr, alternator_port, alternator_https_port] {
-            logger.info("Alternator server listening on {}, HTTP port {}, HTTPS port {}",
-                    addr, alternator_port ? std::to_string(*alternator_port) : "OFF", alternator_https_port ? std::to_string(*alternator_https_port) : "OFF");
+        }).then([addr, alternator_port, alternator_https_port, alternator_port_proxy_protocol, alternator_https_port_proxy_protocol] {
+            logger.info("Alternator server listening on {}, HTTP port {}, HTTPS port {}, proxy-protocol port {}, TLS proxy-protocol port {}",
+                    addr,
+                    alternator_port ? std::to_string(*alternator_port) : "OFF",
+                    alternator_https_port ? std::to_string(*alternator_https_port) : "OFF",
+                    alternator_port_proxy_protocol ? std::to_string(*alternator_port_proxy_protocol) : "OFF",
+                    alternator_https_port_proxy_protocol ? std::to_string(*alternator_https_port_proxy_protocol) : "OFF");
        }).get();
    });
 }
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
@@ -17,6 +17,7 @@
 #include "auth/service.hh"
 #include "db/config.hh"
 #include "db/view/view_build_status.hh"
+#include "locator/tablets.hh"
 #include "mutation/tombstone.hh"
 #include "locator/abstract_replication_strategy.hh"
 #include "utils/log.hh"
@@ -1875,23 +1876,34 @@ future<executor::request_return_type> executor::create_table_on_shard0(service::
        auto ts = group0_guard.write_timestamp();
        utils::chunked_vector<mutation> schema_mutations;
        auto ksm = create_keyspace_metadata(keyspace_name, _proxy, _gossiper, ts, tags_map, _proxy.features(), tablets_mode);
+        locator::replication_strategy_params params(ksm->strategy_options(), ksm->initial_tablets(), ksm->consistency_option());
+        const auto& topo = _proxy.local_db().get_token_metadata().get_topology();
+        auto rs = locator::abstract_replication_strategy::create_replication_strategy(ksm->strategy_name(), params, topo);
        // Alternator Streams doesn't yet work when the table uses tablets (#23838)
        if (stream_specification && stream_specification->IsObject()) {
            auto stream_enabled = rjson::find(*stream_specification, "StreamEnabled");
            if (stream_enabled && stream_enabled->IsBool() && stream_enabled->GetBool()) {
-                locator::replication_strategy_params params(ksm->strategy_options(), ksm->initial_tablets(), ksm->consistency_option());
-                const auto& topo = _proxy.local_db().get_token_metadata().get_topology();
-                auto rs = locator::abstract_replication_strategy::create_replication_strategy(ksm->strategy_name(), params, topo);
                if (rs->uses_tablets()) {
                    co_return api_error::validation("Streams not yet supported on a table using tablets (issue #23838). "
                    "If you want to use streams, create a table with vnodes by setting the tag 'system:initial_tablets' set to 'none'.");
                }
            }
        }
-        // Creating an index in tablets mode requires the rf_rack_valid_keyspaces option to be enabled.
-        // GSI and LSI indexes are based on materialized views which require this option to avoid consistency issues.
-        if (!view_builders.empty() && ksm->uses_tablets() && !_proxy.data_dictionary().get_config().rf_rack_valid_keyspaces()) {
-            co_return api_error::validation("GlobalSecondaryIndexes and LocalSecondaryIndexes with tablets require the rf_rack_valid_keyspaces option to be enabled.");
+        // Creating an index in tablets mode requires the keyspace to be RF-rack-valid.
+        // GSI and LSI indexes are based on materialized views which require RF-rack-validity to avoid consistency issues.
+        if (!view_builders.empty() || _proxy.data_dictionary().get_config().rf_rack_valid_keyspaces()) {
+            try {
+                locator::assert_rf_rack_valid_keyspace(keyspace_name, _proxy.local_db().get_token_metadata_ptr(), *rs);
+            } catch (const std::invalid_argument& ex) {
+                if (!view_builders.empty()) {
+                    co_return api_error::validation(fmt::format("GlobalSecondaryIndexes and LocalSecondaryIndexes on a table "
+                        "using tablets require the number of racks in the cluster to be either 1 or 3"));
+                } else {
+                    co_return api_error::validation(fmt::format("Cannot create table '{}' with tablets: the configuration "
+                        "option 'rf_rack_valid_keyspaces' is enabled, which enforces that tables using tablets can only be created in clusters "
+                        "that have either 1 or 3 racks", table_name));
+                }
+            }
        }
        try {
            schema_mutations = service::prepare_new_keyspace_announcement(_proxy.local_db(), ksm, ts);
@@ -2114,9 +2126,12 @@ future<executor::request_return_type> executor::update_table(client_state& clien
                            co_return api_error::validation(fmt::format(
                                "LSI {} already exists in table {}, can't use same name for GSI", index_name, table_name));
                        }
-                        if (p.local().local_db().find_keyspace(keyspace_name).get_replication_strategy().uses_tablets() &&
-                                !p.local().data_dictionary().get_config().rf_rack_valid_keyspaces()) {
-                            co_return api_error::validation("GlobalSecondaryIndexes with tablets require the rf_rack_valid_keyspaces option to be enabled.");
+                        try {
+                            locator::assert_rf_rack_valid_keyspace(keyspace_name, p.local().local_db().get_token_metadata_ptr(),
+                                    p.local().local_db().find_keyspace(keyspace_name).get_replication_strategy());
+                        } catch (const std::invalid_argument& ex) {
+                            co_return api_error::validation(fmt::format("GlobalSecondaryIndexes on a table "
+                                "using tablets require the number of racks in the cluster to be either 1 or 3"));
                        }

                        elogger.trace("Adding GSI {}", index_name);
@@ -5986,6 +6001,11 @@ future<executor::request_return_type> executor::list_tables(client_state& client
    _stats.api_operations.list_tables++;
    elogger.trace("Listing tables {}", request);

+    co_await utils::get_local_injector().inject("alternator_list_tables", [] (auto& handler) -> future<> {
+        handler.set("waiting", true);
+        co_await handler.wait_for_message(std::chrono::steady_clock::now() + std::chrono::minutes{5});
+    });
+
    rjson::value* exclusive_start_json = rjson::find(request, "ExclusiveStartTableName");
    rjson::value* limit_json = rjson::find(request, "Limit");
    std::string exclusive_start = exclusive_start_json ? rjson::to_string(*exclusive_start_json) : "";
--- a/alternator/server.cc
+++ b/alternator/server.cc
@@ -374,13 +374,40 @@ future<std::string> server::verify_signature(const request& req, const chunked_c
    for (const auto& header : signed_headers) {
        signed_headers_map.emplace(header, std::string_view());
    }
+    std::vector<std::string> modified_values;
    for (auto& header : req._headers) {
        std::string header_str;
        header_str.resize(header.first.size());
        std::transform(header.first.begin(), header.first.end(), header_str.begin(), ::tolower);
        auto it = signed_headers_map.find(header_str);
        if (it != signed_headers_map.end()) {
-            it->second = std::string_view(header.second);
+            // replace multiple spaces in the header value header.second with
+            // a single space, as required by AWS SigV4 header canonization.
+            // If we modify the value, we need to save it in modified_values
+            // to keep it alive.
+            std::string value;
+            value.reserve(header.second.size());
+            bool prev_space = false;
+            bool modified = false;
+            for (char ch : header.second) {
+                if (ch == ' ') {
+                    if (!prev_space) {
+                        value += ch;
+                        prev_space = true;
+                    } else {
+                        modified = true; // skip a space
+                    }
+                } else {
+                    value += ch;
+                    prev_space = false;
+                }
+            }
+            if (modified) {
+                modified_values.emplace_back(std::move(value));
+                it->second = std::string_view(modified_values.back());
+            } else {
+                it->second = std::string_view(header.second);
+            }
        }
    }

@@ -393,6 +420,7 @@ future<std::string> server::verify_signature(const request& req, const chunked_c
                                                    datestamp = std::move(datestamp),
                                                    signed_headers_str = std::move(signed_headers_str),
                                                    signed_headers_map = std::move(signed_headers_map),
+                                                    modified_values = std::move(modified_values),
                                                    region = std::move(region),
                                                    service = std::move(service),
                                                    user_signature = std::move(user_signature)] (future<key_cache::value_ptr> key_ptr_fut) {
@@ -563,11 +591,11 @@ read_entire_stream(input_stream<char>& inp, size_t length_limit) {
 class safe_gzip_zstream {
    z_stream _zs;
 public:
-    safe_gzip_zstream() {
+    // If gzip is true, decode a gzip header (for "Content-Encoding: gzip").
+    // Otherwise, a zlib header (for "Content-Encoding: deflate").
+    safe_gzip_zstream(bool gzip = true) {
        memset(&_zs, 0, sizeof(_zs));
-        // The strange 16 + WMAX_BITS tells zlib to expect and decode
-        // a gzip header, not a zlib header.
-        if (inflateInit2(&_zs, 16 + MAX_WBITS) != Z_OK) {
+        if (inflateInit2(&_zs, gzip ? 16 + MAX_WBITS : MAX_WBITS) != Z_OK) {
            // Should only happen if memory allocation fails
            throw std::bad_alloc();
        }
@@ -586,19 +614,21 @@ public:
    }
 };

-// ungzip() takes a chunked_content with a gzip-compressed request body,
-// uncompresses it, and returns the uncompressed content as a chunked_content.
+// ungzip() takes a chunked_content of a compressed request body, and returns
+// the uncompressed content as a chunked_content. If gzip is true, we expect
+// gzip header (for "Content-Encoding: gzip"), if gzip is false, we expect a
+// zlib header (for "Content-Encoding: deflate").
 // If the uncompressed content exceeds length_limit, an error is thrown.
 static future<chunked_content>
-ungzip(chunked_content&& compressed_body, size_t length_limit) {
+ungzip(chunked_content&& compressed_body, size_t length_limit, bool gzip = true) {
    chunked_content ret;
    // output_buf can be any size - when uncompressing input_buf, it doesn't
    // need to fit in a single output_buf, we'll use multiple output_buf for
    // a single input_buf if needed.
    constexpr size_t OUTPUT_BUF_SIZE = 4096;
    temporary_buffer<char> output_buf;
-    safe_gzip_zstream strm;
-    bool complete_stream = false; // empty input is not a valid gzip
+    safe_gzip_zstream strm(gzip);
+    bool complete_stream = false; // empty input is not a valid gzip/deflate
    size_t total_out_bytes = 0;
    for (const temporary_buffer<char>& input_buf : compressed_body) {
        if (input_buf.empty()) {
@@ -701,6 +731,8 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
    sstring content_encoding = req->get_header("Content-Encoding");
    if (content_encoding == "gzip") {
        content = co_await ungzip(std::move(content), request_content_length_limit);
+    } else if (content_encoding == "deflate") {
+        content = co_await ungzip(std::move(content), request_content_length_limit, false);
    } else if (!content_encoding.empty()) {
        // DynamoDB returns a 500 error for unsupported Content-Encoding.
        // I'm not sure if this is the best error code, but let's do it too.
@@ -872,7 +904,9 @@ server::server(executor& exec, service::storage_proxy& proxy, gms::gossiper& gos
    } {
 }

-future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds,
+future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port,
+        std::optional<uint16_t> port_proxy_protocol, std::optional<uint16_t> https_port_proxy_protocol,
+        std::optional<tls::credentials_builder> creds,
        utils::updateable_value<bool> enforce_authorization, utils::updateable_value<bool> warn_authorization, utils::updateable_value<uint64_t> max_users_query_size_in_trace_output,
        semaphore* memory_limiter, utils::updateable_value<uint32_t> max_concurrent_requests) {
    _memory_limiter = memory_limiter;
@@ -880,20 +914,28 @@ future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std:
    _warn_authorization = std::move(warn_authorization);
    _max_concurrent_requests = std::move(max_concurrent_requests);
    _max_users_query_size_in_trace_output = std::move(max_users_query_size_in_trace_output);
-    if (!port && !https_port) {
+    if (!port && !https_port && !port_proxy_protocol && !https_port_proxy_protocol) {
        return make_exception_future<>(std::runtime_error("Either regular port or TLS port"
                " must be specified in order to init an alternator HTTP server instance"));
    }
-    return seastar::async([this, addr, port, https_port, creds] {
+    return seastar::async([this, addr, port, https_port, port_proxy_protocol, https_port_proxy_protocol, creds] {
        _executor.start().get();

-        if (port) {
+        if (port || port_proxy_protocol) {
            set_routes(_http_server._routes);
            _http_server.set_content_streaming(true);
-            _http_server.listen(socket_address{addr, *port}).get();
+            if (port) {
+                _http_server.listen(socket_address{addr, *port}).get();
+            }
+            if (port_proxy_protocol) {
+                listen_options lo;
+                lo.reuse_address = true;
+                lo.proxy_protocol = true;
+                _http_server.listen(socket_address{addr, *port_proxy_protocol}, lo).get();
+            }
            _enabled_servers.push_back(std::ref(_http_server));
        }
-        if (https_port) {
+        if (https_port || https_port_proxy_protocol) {
            set_routes(_https_server._routes);
            _https_server.set_content_streaming(true);

@@ -913,7 +955,15 @@ future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std:
            } else {
                _credentials = creds->build_server_credentials();
            }
-            _https_server.listen(socket_address{addr, *https_port}, _credentials).get();
+            if (https_port) {
+                _https_server.listen(socket_address{addr, *https_port}, _credentials).get();
+            }
+            if (https_port_proxy_protocol) {
+                listen_options lo;
+                lo.reuse_address = true;
+                lo.proxy_protocol = true;
+                _https_server.listen(socket_address{addr, *https_port_proxy_protocol}, lo, _credentials).get();
+            }
            _enabled_servers.push_back(std::ref(_https_server));
        }
    });
@@ -986,9 +1036,8 @@ client_data server::ongoing_request::make_client_data() const {
    // and keep "driver_version" unset.
    cd.driver_name = _user_agent;
    // Leave "protocol_version" unset, it has no meaning in Alternator.
-    // Leave "hostname", "ssl_protocol" and "ssl_cipher_suite" unset.
-    // As reported in issue #9216, we never set these fields in CQL
-    // either (see cql_server::connection::make_client_data()).
+    // Leave "hostname", "ssl_protocol" and "ssl_cipher_suite" unset for Alternator.
+    // Note: CQL sets ssl_protocol and ssl_cipher_suite via generic_server::connection base class.
    return cd;
 }

--- a/alternator/server.hh
+++ b/alternator/server.hh
@@ -100,7 +100,9 @@ class server : public peering_sharded_service<server> {
 public:
    server(executor& executor, service::storage_proxy& proxy, gms::gossiper& gossiper, auth::service& service, qos::service_level_controller& sl_controller);

-    future<> init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds,
+    future<> init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port,
+            std::optional<uint16_t> port_proxy_protocol, std::optional<uint16_t> https_port_proxy_protocol,
+            std::optional<tls::credentials_builder> creds,
            utils::updateable_value<bool> enforce_authorization, utils::updateable_value<bool> warn_authorization, utils::updateable_value<uint64_t> max_users_query_size_in_trace_output,
            semaphore* memory_limiter, utils::updateable_value<uint32_t> max_concurrent_requests);
    future<> stop();
--- a/alternator/ttl.cc
+++ b/alternator/ttl.cc
@@ -767,7 +767,7 @@ static future<bool> scan_table(
                // by tasking another node to take over scanning of the dead node's primary
                // ranges. What we do here is that this node will also check expiration
                // on its *secondary* ranges - but only those whose primary owner is down.
-                auto tablet_secondary_replica = tablet_map.get_secondary_replica(*tablet); // throws if no secondary replica
+                auto tablet_secondary_replica = tablet_map.get_secondary_replica(*tablet, erm->get_topology()); // throws if no secondary replica
                if (tablet_secondary_replica.host == my_host_id && tablet_secondary_replica.shard == this_shard_id()) {
                    if (!gossiper.is_alive(tablet_primary_replica.host)) {
                        co_await scan_tablet(*tablet, proxy, abort_source, page_sem, expiration_stats, scan_ctx, tablet_map);
--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -3051,7 +3051,7 @@
                  },
                  {
                     "name":"incremental_mode",
-                     "description":"Set the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to 'disabled' mode.",
+                     "description":"Set the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to incremental mode.",
                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -515,6 +515,15 @@ void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>&
        auto sstables = parsed.GetArray() |
            std::views::transform([] (const auto& s) { return sstring(rjson::to_string_view(s)); }) |
            std::ranges::to<std::vector>();
+        apilog.info("Restore invoked with following parameters: keyspace={}, table={}, endpoint={}, bucket={}, prefix={}, sstables_count={}, scope={}, primary_replica_only={}",
+                    keyspace,
+                    table,
+                    endpoint,
+                    bucket,
+                    prefix,
+                    sstables.size(),
+                    scope,
+                    primary_replica_only);
        auto task_id = co_await sst_loader.local().download_new_sstables(keyspace, table, prefix, std::move(sstables), endpoint, bucket, scope, primary_replica_only);
        co_return json::json_return_type(fmt::to_string(task_id));
    });
@@ -2016,12 +2025,14 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
        auto tag = req->get_query_param("tag");
        auto column_families = split(req->get_query_param("cf"), ",");
        auto sfopt = req->get_query_param("sf");
-        auto sf = db::snapshot_ctl::skip_flush(strcasecmp(sfopt.c_str(), "true") == 0);
+        db::snapshot_options opts = {
+            .skip_flush = strcasecmp(sfopt.c_str(), "true") == 0,
+        };

        std::vector<sstring> keynames = split(req->get_query_param("kn"), ",");
        try {
            if (column_families.empty()) {
-                co_await snap_ctl.local().take_snapshot(tag, keynames, sf);
+                co_await snap_ctl.local().take_snapshot(tag, keynames, opts);
            } else {
                if (keynames.empty()) {
                    throw httpd::bad_param_exception("The keyspace of column families must be specified");
@@ -2029,7 +2040,7 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
                if (keynames.size() > 1) {
                    throw httpd::bad_param_exception("Only one keyspace allowed when specifying a column family");
                }
-                co_await snap_ctl.local().take_column_family_snapshot(keynames[0], column_families, tag, sf);
+                co_await snap_ctl.local().take_column_family_snapshot(keynames[0], column_families, tag, opts);
            }
            co_return json_void();
        } catch (...) {
@@ -2064,7 +2075,8 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
        auto info = parse_scrub_options(ctx, std::move(req));

        if (!info.snapshot_tag.empty()) {
-            co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, db::snapshot_ctl::skip_flush::no);
+            db::snapshot_options opts = {.skip_flush = false};
+            co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, opts);
        }

        compaction::compaction_stats stats;
--- a/api/task_manager.cc
+++ b/api/task_manager.cc
@@ -9,6 +9,7 @@
 #include <seastar/core/chunked_fifo.hh>
 #include <seastar/core/coroutine.hh>
 #include <seastar/coroutine/exception.hh>
+#include <seastar/coroutine/maybe_yield.hh>
 #include <seastar/http/exception.hh>

 #include "task_manager.hh"
@@ -264,7 +265,7 @@ void set_task_manager(http_context& ctx, routes& r, sharded<tasks::task_manager>
                if (id) {
                    module->unregister_task(id);
                }
-                co_await maybe_yield();
+                co_await coroutine::maybe_yield();
            }
        });
        co_return json_void();
--- a/api/tasks.cc
+++ b/api/tasks.cc
@@ -146,7 +146,8 @@ void set_tasks_compaction_module(http_context& ctx, routes& r, sharded<service::
        auto info = parse_scrub_options(ctx, std::move(req));

        if (!info.snapshot_tag.empty()) {
-            co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, db::snapshot_ctl::skip_flush::no);
+            db::snapshot_options opts = {.skip_flush = false};
+            co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, opts);
        }

        auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
--- a/audit/audit_syslog_storage_helper.cc
+++ b/audit/audit_syslog_storage_helper.cc
@@ -53,10 +53,10 @@ static std::string json_escape(std::string_view str) {

 }

-future<> audit_syslog_storage_helper::syslog_send_helper(const sstring& msg) {
+future<> audit_syslog_storage_helper::syslog_send_helper(temporary_buffer<char> msg) {
    try {
        auto lock = co_await get_units(_semaphore, 1, std::chrono::hours(1));
-        co_await _sender.send(_syslog_address, net::packet{msg.data(), msg.size()});
+        co_await _sender.send(_syslog_address, std::span(&msg, 1));
    }
    catch (const std::exception& e) {
        auto error_msg = seastar::format(
@@ -90,7 +90,7 @@ future<> audit_syslog_storage_helper::start(const db::config& cfg) {
        co_return;
    }

-    co_await syslog_send_helper("Initializing syslog audit backend.");
+    co_await syslog_send_helper(temporary_buffer<char>::copy_of("Initializing syslog audit backend."));
 }

 future<> audit_syslog_storage_helper::stop() {
@@ -120,7 +120,7 @@ future<> audit_syslog_storage_helper::write(const audit_info* audit_info,
                                    audit_info->table(),
                                    username);

-    co_await syslog_send_helper(msg);
+    co_await syslog_send_helper(std::move(msg).release());
 }

 future<> audit_syslog_storage_helper::write_login(const sstring& username,
@@ -139,7 +139,7 @@ future<> audit_syslog_storage_helper::write_login(const sstring& username,
                                    client_ip,
                                    username);

-    co_await syslog_send_helper(msg.c_str());
+    co_await syslog_send_helper(std::move(msg).release());
 }

 }
--- a/audit/audit_syslog_storage_helper.hh
+++ b/audit/audit_syslog_storage_helper.hh
@@ -26,7 +26,7 @@ class audit_syslog_storage_helper : public storage_helper {
    net::datagram_channel _sender;
    seastar::semaphore _semaphore;

-    future<> syslog_send_helper(const sstring& msg);
+    future<> syslog_send_helper(seastar::temporary_buffer<char> msg);
 public:
    explicit audit_syslog_storage_helper(cql3::query_processor&, service::migration_manager&);
    virtual ~audit_syslog_storage_helper();
--- a/auth/passwords.hh
+++ b/auth/passwords.hh
@@ -76,11 +76,14 @@ sstring generate_salt(RandomNumberEngine& g, scheme scheme) {

 ///
 /// Hash a password combined with an implementation-specific salt string.
-/// Deprecated in favor of `hash_with_salt_async`.
+/// Deprecated in favor of `hash_with_salt_async`. This function is still used
+/// when generating password hashes for storage to ensure that
+/// `hash_with_salt` and `hash_with_salt_async` produce identical results,
+/// preserving backward compatibility.
 ///
 /// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
 ///
-[[deprecated("Use hash_with_salt_async instead")]] sstring hash_with_salt(const sstring& pass, const sstring& salt);
+sstring hash_with_salt(const sstring& pass, const sstring& salt);

 ///
 /// Async version of `hash_with_salt` that returns a future.
--- a/auth/service.cc
+++ b/auth/service.cc
@@ -876,22 +876,6 @@ future<> migrate_to_auth_v2(db::system_keyspace& sys_ks, ::service::raft_group0_
                continue; // some tables might not have been created if they were not used
            }

-            // use longer than usual timeout as we scan the whole table
-            // but not infinite or very long as we want to fail reasonably fast
-            const auto t = 5min;
-            const timeout_config tc{t, t, t, t, t, t, t};
-            ::service::client_state cs(::service::client_state::internal_tag{}, tc);
-            ::service::query_state qs(cs, empty_service_permit());
-
-            auto rows = co_await qp.execute_internal(
-                    seastar::format("SELECT * FROM {}.{}", meta::legacy::AUTH_KS, cf_name),
-                    db::consistency_level::ALL,
-                    qs,
-                    {},
-                    cql3::query_processor::cache_internal::no);
-            if (rows->empty()) {
-                continue;
-            }
            std::vector<sstring> col_names;
            for (const auto& col : schema->all_columns()) {
                col_names.push_back(col.name_as_cql_string());
@@ -900,30 +884,51 @@ future<> migrate_to_auth_v2(db::system_keyspace& sys_ks, ::service::raft_group0_
            for (size_t i = 1; i < col_names.size(); ++i) {
                val_binders_str += ", ?";
            }
-            for (const auto& row : *rows) {
-                std::vector<data_value_or_unset> values;
-                for (const auto& col : schema->all_columns()) {
-                    if (row.has(col.name_as_text())) {
-                        values.push_back(
-                                col.type->deserialize(row.get_blob_unfragmented(col.name_as_text())));
-                    } else {
-                        values.push_back(unset_value{});
+
+            std::vector<mutation> collected;
+            // use longer than usual timeout as we scan the whole table
+            // but not infinite or very long as we want to fail reasonably fast
+            const auto t = 5min;
+            const timeout_config tc{t, t, t, t, t, t, t};
+            ::service::client_state cs(::service::client_state::internal_tag{}, tc);
+            ::service::query_state qs(cs, empty_service_permit());
+
+            co_await qp.query_internal(
+                seastar::format("SELECT * FROM {}.{}", meta::legacy::AUTH_KS, cf_name),
+                db::consistency_level::ALL,
+                {},
+                1000,
+                [&qp, &cf_name, &col_names, &val_binders_str, &schema, ts, &collected] (const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
+                    std::vector<data_value_or_unset> values;
+                    for (const auto& col : schema->all_columns()) {
+                        if (row.has(col.name_as_text())) {
+                            values.push_back(
+                                    col.type->deserialize(row.get_blob_unfragmented(col.name_as_text())));
+                        } else {
+                            values.push_back(unset_value{});
+                        }
                    }
-                }
-                auto muts = co_await qp.get_mutations_internal(
-                        seastar::format("INSERT INTO {}.{} ({}) VALUES ({})",
-                                db::system_keyspace::NAME,
-                                cf_name,
-                                fmt::join(col_names, ", "),
-                                val_binders_str),
-                        internal_distributed_query_state(),
-                        ts,
-                        std::move(values));
-                if (muts.size() != 1) {
-                    on_internal_error(log,
-                            format("expecting single insert mutation, got {}", muts.size()));
-                }
-                co_yield std::move(muts[0]);
+                    auto muts = co_await qp.get_mutations_internal(
+                            seastar::format("INSERT INTO {}.{} ({}) VALUES ({})",
+                                    db::system_keyspace::NAME,
+                                    cf_name,
+                                    fmt::join(col_names, ", "),
+                                    val_binders_str),
+                            internal_distributed_query_state(),
+                            ts,
+                            std::move(values));
+                    if (muts.size() != 1) {
+                        on_internal_error(log,
+                                format("expecting single insert mutation, got {}", muts.size()));
+                    }
+
+                    collected.push_back(std::move(muts[0]));
+                    co_return stop_iteration::no;
+                },
+                std::move(qs));
+
+            for (auto& m : collected) {
+                co_yield std::move(m);
            }
        }
        co_yield co_await sys_ks.make_auth_version_mutation(ts,
--- a/cdc/generation.cc
+++ b/cdc/generation.cc
@@ -204,7 +204,7 @@ future<topology_description> topology_description::clone_async() const {

    for (const auto& entry : _entries) {
        vec.push_back(entry);
-        co_await seastar::maybe_yield();
+        co_await coroutine::maybe_yield();
    }

    co_return topology_description{std::move(vec)};
--- a/cdc/split.cc
+++ b/cdc/split.cc
@@ -15,7 +15,7 @@
 #include "mutation/tombstone.hh"
 #include "schema/schema.hh"

-#include "seastar/core/sstring.hh"
+#include <seastar/core/sstring.hh>
 #include "types/concrete_types.hh"
 #include "types/types.hh"
 #include "types/user.hh"
--- a/compaction/compaction_manager.cc
+++ b/compaction/compaction_manager.cc
@@ -778,6 +778,7 @@ compaction_manager::get_incremental_repair_read_lock(compaction::compaction_grou
        cmlog.debug("Get get_incremental_repair_read_lock for {} started", reason);
    }
    compaction::compaction_state& cs = get_compaction_state(&t);
+    auto gh = cs.gate.hold();
    auto ret = co_await cs.incremental_repair_lock.hold_read_lock();
    if (!reason.empty()) {
        cmlog.debug("Get get_incremental_repair_read_lock for {} done", reason);
@@ -791,6 +792,7 @@ compaction_manager::get_incremental_repair_write_lock(compaction::compaction_gro
        cmlog.debug("Get get_incremental_repair_write_lock for {} started", reason);
    }
    compaction::compaction_state& cs = get_compaction_state(&t);
+    auto gh = cs.gate.hold();
    auto ret = co_await cs.incremental_repair_lock.hold_write_lock();
    if (!reason.empty()) {
        cmlog.debug("Get get_incremental_repair_write_lock for {} done", reason);
@@ -1519,7 +1521,9 @@ future<> compaction_manager::maybe_wait_for_sstable_count_reduction(compaction_g
            | std::views::transform(std::mem_fn(&sstables::sstable::run_identifier))
            | std::ranges::to<std::unordered_set>());
    };
-    const auto threshold = size_t(std::max(schema->max_compaction_threshold(), 32));
+    const auto threshold = utils::get_local_injector().inject_parameter<size_t>("set_sstable_count_reduction_threshold")
+        .value_or(size_t(std::max(schema->max_compaction_threshold(), 32)));
+
    auto count = co_await num_runs_for_compaction();
    if (count <= threshold) {
        cmlog.trace("No need to wait for sstable count reduction in {}: {} <= {}",
@@ -1534,9 +1538,7 @@ future<> compaction_manager::maybe_wait_for_sstable_count_reduction(compaction_g
    auto& cstate = get_compaction_state(&t);
    try {
        while (can_perform_regular_compaction(t) && co_await num_runs_for_compaction() > threshold) {
-            co_await cstate.compaction_done.wait([this, &t] {
-                return !can_perform_regular_compaction(t);
-            });
+            co_await cstate.compaction_done.wait();
        }
    } catch (const broken_condition_variable&) {
        co_return;
@@ -2387,6 +2389,8 @@ future<> compaction_manager::remove(compaction_group_view& t, sstring reason) no
    if (!c_state.gate.is_closed()) {
        auto close_gate = c_state.gate.close();
        co_await stop_ongoing_compactions(reason, &t);
+        // Wait for users of incremental repair lock (can be either repair itself or maintenance compactions).
+        co_await c_state.incremental_repair_lock.write_lock();
        co_await std::move(close_gate);
    }

--- a/configure.py
+++ b/configure.py
@@ -725,29 +725,9 @@ raft_tests = set([
 vector_search_tests = set([
    'test/vector_search/vector_store_client_test',
    'test/vector_search/load_balancer_test',
-    'test/vector_search/client_test'
-])
-
-vector_search_validator_bin = 'vector-search-validator/bin/vector-search-validator'
-vector_search_validator_deps = set([
-    'test/vector_search_validator/build-validator',
-    'test/vector_search_validator/Cargo.toml',
-    'test/vector_search_validator/crates/validator/Cargo.toml',
-    'test/vector_search_validator/crates/validator/src/main.rs',
-    'test/vector_search_validator/crates/validator-scylla/Cargo.toml',
-    'test/vector_search_validator/crates/validator-scylla/src/lib.rs',
-    'test/vector_search_validator/crates/validator-scylla/src/cql.rs',
-])
-
-vector_store_bin = 'vector-search-validator/bin/vector-store'
-vector_store_deps = set([
-    'test/vector_search_validator/build-env',
-    'test/vector_search_validator/build-vector-store',
-])
-
-vector_search_validator_bins = set([
-    vector_search_validator_bin,
-    vector_store_bin,
+    'test/vector_search/client_test',
+    'test/vector_search/filter_test',
+    'test/vector_search/rescoring_test'
 ])

 wasms = set([
@@ -783,7 +763,7 @@ other = set([
    'iotune',
 ])

-all_artifacts = apps | cpp_apps | tests | other | wasms | vector_search_validator_bins
+all_artifacts = apps | cpp_apps | tests | other | wasms

 arg_parser = argparse.ArgumentParser('Configure scylla', add_help=False, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 arg_parser.add_argument('--out', dest='buildfile', action='store', default='build.ninja',
@@ -1034,6 +1014,9 @@ scylla_core = (['message/messaging_service.cc',
                'cql3/functions/aggregate_fcts.cc',
                'cql3/functions/castas_fcts.cc',
                'cql3/functions/error_injection_fcts.cc',
+                'cql3/statements/strong_consistency/modification_statement.cc',
+                'cql3/statements/strong_consistency/select_statement.cc',
+                'cql3/statements/strong_consistency/statement_helpers.cc',
                'cql3/functions/vector_similarity_fcts.cc',
                'cql3/statements/cf_prop_defs.cc',
                'cql3/statements/cf_statement.cc',
@@ -1059,8 +1042,8 @@ scylla_core = (['message/messaging_service.cc',
                'cql3/statements/raw/parsed_statement.cc',
                'cql3/statements/property_definitions.cc',
                'cql3/statements/update_statement.cc',
-                'cql3/statements/strongly_consistent_modification_statement.cc',
-                'cql3/statements/strongly_consistent_select_statement.cc',
+                'cql3/statements/broadcast_modification_statement.cc',
+                'cql3/statements/broadcast_select_statement.cc',
                'cql3/statements/delete_statement.cc',
                'cql3/statements/prune_materialized_view_statement.cc',
                'cql3/statements/batch_statement.cc',
@@ -1092,6 +1075,7 @@ scylla_core = (['message/messaging_service.cc',
                'cql3/statements/list_service_level_attachments_statement.cc',
                'cql3/statements/list_effective_service_level_statement.cc',
                'cql3/statements/describe_statement.cc',
+                'cql3/statements/view_prop_defs.cc',
                'cql3/update_parameters.cc',
                'cql3/util.cc',
                'cql3/ut_name.cc',
@@ -1350,6 +1334,9 @@ scylla_core = (['message/messaging_service.cc',
                'lang/wasm.cc',
                'lang/wasm_alien_thread_runner.cc',
                'lang/wasm_instance_cache.cc',
+                'service/strong_consistency/groups_manager.cc',
+                'service/strong_consistency/coordinator.cc',
+                'service/strong_consistency/state_machine.cc',
                'service/raft/group0_state_id_handler.cc',
                'service/raft/group0_state_machine.cc',
                'service/raft/group0_state_machine_merger.cc',
@@ -1379,6 +1366,7 @@ scylla_core = (['message/messaging_service.cc',
                'vector_search/dns.cc',
                'vector_search/client.cc',
                'vector_search/clients.cc',
+                'vector_search/filter.cc',
                'vector_search/truststore.cc'
                ] + [Antlr3Grammar('cql3/Cql.g')] \
                  + scylla_raft_core
@@ -1488,6 +1476,7 @@ idls = ['idl/gossip_digest.idl.hh',
        'idl/hinted_handoff.idl.hh',
        'idl/storage_proxy.idl.hh',
        'idl/sstables.idl.hh',
+        'idl/strong_consistency/state_machine.idl.hh',
        'idl/group0_state_machine.idl.hh',
        'idl/mapreduce_request.idl.hh',
        'idl/replica_exception.idl.hh',
@@ -1783,6 +1772,8 @@ deps['test/raft/discovery_test'] =  ['test/raft/discovery_test.cc',
 deps['test/vector_search/vector_store_client_test'] =  ['test/vector_search/vector_store_client_test.cc'] + scylla_tests_dependencies
 deps['test/vector_search/load_balancer_test'] = ['test/vector_search/load_balancer_test.cc'] + scylla_tests_dependencies
 deps['test/vector_search/client_test'] = ['test/vector_search/client_test.cc'] + scylla_tests_dependencies
+deps['test/vector_search/filter_test'] = ['test/vector_search/filter_test.cc'] + scylla_tests_dependencies
+deps['test/vector_search/rescoring_test'] = ['test/vector_search/rescoring_test.cc'] + scylla_tests_dependencies

 boost_tests_prefixes = ["test/boost/", "test/vector_search/", "test/raft/", "test/manual/", "test/ldap/"]

@@ -2569,11 +2560,10 @@ def write_build_file(f,
              description = RUST_LIB $out
            ''').format(mode=mode, antlr3_exec=args.antlr3_exec, fmt_lib=fmt_lib, test_repeat=args.test_repeat, test_timeout=args.test_timeout, rustc_wrapper=rustc_wrapper, **modeval))
        f.write(
-            'build {mode}-build: phony {artifacts} {wasms} {vector_search_validator_bins}\n'.format(
+            'build {mode}-build: phony {artifacts} {wasms}\n'.format(
                mode=mode,
-                artifacts=str.join(' ', ['$builddir/' + mode + '/' + x for x in sorted(build_artifacts - wasms - vector_search_validator_bins)]),
+                artifacts=str.join(' ', ['$builddir/' + mode + '/' + x for x in sorted(build_artifacts - wasms)]),
                wasms = str.join(' ', ['$builddir/' + x for x in sorted(build_artifacts & wasms)]),
-                vector_search_validator_bins=str.join(' ', ['$builddir/' + x for x in sorted(build_artifacts & vector_search_validator_bins)]),
            )
        )
        if profile_recipe := modes[mode].get('profile_recipe'):
@@ -2603,7 +2593,7 @@ def write_build_file(f,
                continue
            profile_dep = modes[mode].get('profile_target', "")

-            if binary in other or binary in wasms or binary in vector_search_validator_bins:
+            if binary in other or binary in wasms:
                continue
            srcs = deps[binary]
            # 'scylla'
@@ -2714,11 +2704,10 @@ def write_build_file(f,
        )

        f.write(
-            'build {mode}-test: test.{mode} {test_executables} $builddir/{mode}/scylla {wasms} {vector_search_validator_bins} \n'.format(
+            'build {mode}-test: test.{mode} {test_executables} $builddir/{mode}/scylla {wasms}\n'.format(
                mode=mode,
                test_executables=' '.join(['$builddir/{}/{}'.format(mode, binary) for binary in sorted(tests)]),
                wasms=' '.join([f'$builddir/{binary}' for binary in sorted(wasms)]),
-                vector_search_validator_bins=' '.join([f'$builddir/{binary}' for binary in sorted(vector_search_validator_bins)]),
            )
        )
        f.write(
@@ -2805,38 +2794,35 @@ def write_build_file(f,

        seastar_dep = f'$builddir/{mode}/seastar/libseastar.{seastar_lib_ext}'
        seastar_testing_dep = f'$builddir/{mode}/seastar/libseastar_testing.{seastar_lib_ext}'
-        f.write('build {seastar_dep}: ninja $builddir/{mode}/seastar/build.ninja | always {profile_dep}\n'
-                .format(**locals()))
+        f.write(f'build {seastar_dep}: ninja $builddir/{mode}/seastar/build.ninja | always {profile_dep}\n')
        f.write('  pool = submodule_pool\n')
-        f.write('  subdir = $builddir/{mode}/seastar\n'.format(**locals()))
-        f.write('  target = seastar\n'.format(**locals()))
-        f.write('build {seastar_testing_dep}: ninja $builddir/{mode}/seastar/build.ninja | always {profile_dep}\n'
-                .format(**locals()))
+        f.write(f'  subdir = $builddir/{mode}/seastar\n')
+        f.write('  target = seastar\n')
+        f.write(f'build {seastar_testing_dep}: ninja $builddir/{mode}/seastar/build.ninja | always {profile_dep}\n')
        f.write('  pool = submodule_pool\n')
-        f.write('  subdir = $builddir/{mode}/seastar\n'.format(**locals()))
-        f.write('  target = seastar_testing\n'.format(**locals()))
-        f.write('  profile_dep = {profile_dep}\n'.format(**locals()))
+        f.write(f'  subdir = $builddir/{mode}/seastar\n')
+        f.write('  target = seastar_testing\n')
+        f.write(f'  profile_dep = {profile_dep}\n')

        for lib in abseil_libs:
-            f.write('build $builddir/{mode}/abseil/{lib}: ninja $builddir/{mode}/abseil/build.ninja | always {profile_dep}\n'.format(**locals()))
-            f.write('  pool = submodule_pool\n')
-            f.write('  subdir = $builddir/{mode}/abseil\n'.format(**locals()))
-            f.write('  target = {lib}\n'.format(**locals()))
-            f.write('  profile_dep = {profile_dep}\n'.format(**locals()))
+            f.write(f'build $builddir/{mode}/abseil/{lib}: ninja $builddir/{mode}/abseil/build.ninja | always {profile_dep}\n')
+            f.write(f'  pool = submodule_pool\n')
+            f.write(f'  subdir = $builddir/{mode}/abseil\n')
+            f.write(f'  target = {lib}\n')
+            f.write(f'  profile_dep = {profile_dep}\n')

        f.write(f'build $builddir/{mode}/stdafx.hh.pch: cxx_build_precompiled_header.{mode} stdafx.hh | {profile_dep} {seastar_dep} {abseil_dep} {gen_headers_dep} {pch_dep}\n')

-        f.write('build $builddir/{mode}/seastar/apps/iotune/iotune: ninja $builddir/{mode}/seastar/build.ninja | $builddir/{mode}/seastar/libseastar.{seastar_lib_ext}\n'
-                .format(**locals()))
+        f.write(f'build $builddir/{mode}/seastar/apps/iotune/iotune: ninja $builddir/{mode}/seastar/build.ninja | $builddir/{mode}/seastar/libseastar.{seastar_lib_ext}\n')
        f.write('  pool = submodule_pool\n')
-        f.write('  subdir = $builddir/{mode}/seastar\n'.format(**locals()))
-        f.write('  target = iotune\n'.format(**locals()))
-        f.write('  profile_dep = {profile_dep}\n'.format(**locals()))
-        f.write(textwrap.dedent('''\
+        f.write(f'  subdir = $builddir/{mode}/seastar\n')
+        f.write('  target = iotune\n')
+        f.write(f'  profile_dep = {profile_dep}\n')
+        f.write(textwrap.dedent(f'''\
            build $builddir/{mode}/iotune: copy $builddir/{mode}/seastar/apps/iotune/iotune
            build $builddir/{mode}/iotune.stripped: strip $builddir/{mode}/iotune
            build $builddir/{mode}/iotune.debug: phony $builddir/{mode}/iotune.stripped
-            ''').format(**locals()))
+            '''))
        if args.dist_only:
            include_scylla_and_iotune = ''
            include_scylla_and_iotune_stripped = ''
@@ -2845,16 +2831,16 @@ def write_build_file(f,
            include_scylla_and_iotune = f'$builddir/{mode}/scylla $builddir/{mode}/iotune $builddir/{mode}/patchelf'
            include_scylla_and_iotune_stripped = f'$builddir/{mode}/scylla.stripped $builddir/{mode}/iotune.stripped $builddir/{mode}/patchelf.stripped'
            include_scylla_and_iotune_debug = f'$builddir/{mode}/scylla.debug $builddir/{mode}/iotune.debug'
-        f.write('build $builddir/{mode}/dist/tar/{scylla_product}-unstripped-{scylla_version}-{scylla_release}.{arch}.tar.gz: package {include_scylla_and_iotune} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter | always\n'.format(**locals()))
-        f.write('  mode = {mode}\n'.format(**locals()))
-        f.write('build $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz: stripped_package {include_scylla_and_iotune_stripped} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter.stripped | always\n'.format(**locals()))
-        f.write('  mode = {mode}\n'.format(**locals()))
-        f.write('build $builddir/{mode}/dist/tar/{scylla_product}-debuginfo-{scylla_version}-{scylla_release}.{arch}.tar.gz: debuginfo_package {include_scylla_and_iotune_debug} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter.debug | always\n'.format(**locals()))
-        f.write('  mode = {mode}\n'.format(**locals()))
-        f.write('build $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz\n'.format(**locals()))
-        f.write('  mode = {mode}\n'.format(**locals()))
-        f.write('build $builddir/{mode}/dist/tar/{scylla_product}-{arch}-package.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz\n'.format(**locals()))
-        f.write('  mode = {mode}\n'.format(**locals()))
+        f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-unstripped-{scylla_version}-{scylla_release}.{arch}.tar.gz: package {include_scylla_and_iotune} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter | always\n')
+        f.write(f'  mode = {mode}\n')
+        f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz: stripped_package {include_scylla_and_iotune_stripped} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter.stripped | always\n')
+        f.write(f'  mode = {mode}\n')
+        f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-debuginfo-{scylla_version}-{scylla_release}.{arch}.tar.gz: debuginfo_package {include_scylla_and_iotune_debug} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter.debug | always\n')
+        f.write(f'  mode = {mode}\n')
+        f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
+        f.write(f'  mode = {mode}\n')
+        f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-{arch}-package.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
+        f.write(f'  mode = {mode}\n')

        f.write(f'build $builddir/dist/{mode}/redhat: rpmbuild $builddir/{mode}/dist/tar/{scylla_product}-unstripped-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
        f.write(f'  mode = {mode}\n')
@@ -2889,19 +2875,6 @@ def write_build_file(f,
            'build compiler-training: phony {}\n'.format(' '.join(['{mode}-compiler-training'.format(mode=mode) for mode in default_modes]))
    )

-    f.write(textwrap.dedent(f'''\
-        rule build-vector-search-validator
-            command = test/vector_search_validator/build-validator $builddir
-        rule build-vector-store
-            command = test/vector_search_validator/build-vector-store $builddir
-        '''))
-    f.write(
-            'build $builddir/{vector_search_validator_bin}: build-vector-search-validator {}\n'.format(' '.join([dep for dep in sorted(vector_search_validator_deps)]), vector_search_validator_bin=vector_search_validator_bin)
-    )
-    f.write(
-            'build $builddir/{vector_store_bin}: build-vector-store {}\n'.format(' '.join([dep for dep in sorted(vector_store_deps)]), vector_store_bin=vector_store_bin)
-    )
-
    f.write(textwrap.dedent(f'''\
        build dist-unified-tar: phony {' '.join([f'$builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz' for mode in default_modes])}
        build dist-unified: phony dist-unified-tar
--- a/cql3/CMakeLists.txt
+++ b/cql3/CMakeLists.txt
@@ -47,6 +47,9 @@ target_sources(cql3
    functions/aggregate_fcts.cc
    functions/castas_fcts.cc
    functions/error_injection_fcts.cc
+    statements/strong_consistency/select_statement.cc
+    statements/strong_consistency/modification_statement.cc
+    statements/strong_consistency/statement_helpers.cc
    functions/vector_similarity_fcts.cc
    statements/cf_prop_defs.cc
    statements/cf_statement.cc
@@ -72,8 +75,8 @@ target_sources(cql3
    statements/raw/parsed_statement.cc
    statements/property_definitions.cc
    statements/update_statement.cc
-    statements/strongly_consistent_modification_statement.cc
-    statements/strongly_consistent_select_statement.cc
+    statements/broadcast_modification_statement.cc
+    statements/broadcast_select_statement.cc
    statements/delete_statement.cc
    statements/prune_materialized_view_statement.cc
    statements/batch_statement.cc
@@ -105,6 +108,7 @@ target_sources(cql3
    statements/list_service_level_attachments_statement.cc
    statements/list_effective_service_level_statement.cc
    statements/describe_statement.cc
+    statements/view_prop_defs.cc
    update_parameters.cc
    util.cc
    ut_name.cc
--- a/cql3/Cql.g
+++ b/cql3/Cql.g
@@ -898,6 +898,10 @@ pkDef[cql3::statements::create_table_statement::raw_statement& expr]
    | '(' k1=ident { l.push_back(k1); } ( ',' kn=ident { l.push_back(kn); } )* ')' { $expr.add_key_aliases(l); }
    ;

+cfamProperties[cql3::statements::cf_properties& expr]
+    : cfamProperty[expr] (K_AND cfamProperty[expr])*
+    ;
+
 cfamProperty[cql3::statements::cf_properties& expr]
    : property[*$expr.properties()]
    | K_COMPACT K_STORAGE { $expr.set_compact_storage(); }
@@ -935,16 +939,22 @@ typeColumns[create_type_statement& expr]
 */
 createIndexStatement returns [std::unique_ptr<create_index_statement> expr]
    @init {
-        auto props = make_shared<index_prop_defs>();
+        auto idx_props = make_shared<index_specific_prop_defs>();
+        auto props = index_prop_defs();
        bool if_not_exists = false;
        auto name = ::make_shared<cql3::index_name>();
        std::vector<::shared_ptr<index_target::raw>> targets;
    }
-    : K_CREATE (K_CUSTOM { props->is_custom = true; })? K_INDEX (K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
+    : K_CREATE (K_CUSTOM { idx_props->is_custom = true; })? K_INDEX (K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
        (idxName[*name])? K_ON cf=columnFamilyName '(' (target1=indexIdent { targets.emplace_back(target1); } (',' target2=indexIdent { targets.emplace_back(target2); } )*)? ')'
-        (K_USING cls=STRING_LITERAL { props->custom_class = sstring{$cls.text}; })?
-        (K_WITH properties[*props])?
-      { $expr = std::make_unique<create_index_statement>(cf, name, targets, props, if_not_exists); }
+        (K_USING cls=STRING_LITERAL { idx_props->custom_class = sstring{$cls.text}; })?
+        (K_WITH cfamProperties[props])?
+      {
+        props.extract_index_specific_properties_to(*idx_props);
+        view_prop_defs view_props = std::move(props).into_view_prop_defs();
+
+        $expr = std::make_unique<create_index_statement>(cf, name, targets, std::move(idx_props), std::move(view_props), if_not_exists);
+      }
    ;

 indexIdent returns [::shared_ptr<index_target::raw> id]
@@ -1092,9 +1102,9 @@ alterTypeStatement returns [std::unique_ptr<alter_type_statement> expr]
 */
 alterViewStatement returns [std::unique_ptr<alter_view_statement> expr]
    @init {
-        auto props = cql3::statements::cf_prop_defs();
+        auto props = cql3::statements::view_prop_defs();
    }
-    : K_ALTER K_MATERIALIZED K_VIEW cf=columnFamilyName K_WITH properties[props]
+    : K_ALTER K_MATERIALIZED K_VIEW cf=columnFamilyName K_WITH properties[*props.properties()]
    {
        $expr = std::make_unique<alter_view_statement>(std::move(cf), std::move(props));
    }
--- a/cql3/functions/vector_similarity_fcts.cc
+++ b/cql3/functions/vector_similarity_fcts.cc
@@ -10,9 +10,41 @@
 #include "types/types.hh"
 #include "types/vector.hh"
 #include "exceptions/exceptions.hh"
+#include <span>
+#include <bit>

 namespace cql3 {
 namespace functions {
+
+namespace detail {
+
+std::vector<float> extract_float_vector(const bytes_opt& param, size_t dimension) {
+    if (!param) {
+        throw exceptions::invalid_request_exception("Cannot extract float vector from null parameter");
+    }
+
+    const size_t expected_size = dimension * sizeof(float);
+    if (param->size() != expected_size) {
+        throw exceptions::invalid_request_exception(
+            fmt::format("Invalid vector size: expected {} bytes for {} floats, got {} bytes",
+                       expected_size, dimension, param->size()));
+    }
+
+    std::vector<float> result;
+    result.reserve(dimension);
+
+    bytes_view view(*param);
+    for (size_t i = 0; i < dimension; ++i) {
+        // read_simple handles network byte order (big-endian) conversion
+        uint32_t raw = read_simple<uint32_t>(view);
+        result.push_back(std::bit_cast<float>(raw));
+    }
+
+    return result;
+}
+
+} // namespace detail
+
 namespace {

 // The computations of similarity scores match the exact formulas of Cassandra's (jVector's) implementation to ensure compatibility.
@@ -22,14 +54,14 @@ namespace {

 // You should only use this function if you need to preserve the original vectors and cannot normalize
 // them in advance.
-float compute_cosine_similarity(const std::vector<data_value>& v1, const std::vector<data_value>& v2) {
+float compute_cosine_similarity(std::span<const float> v1, std::span<const float> v2) {
    double dot_product = 0.0;
    double squared_norm_a = 0.0;
    double squared_norm_b = 0.0;

    for (size_t i = 0; i < v1.size(); ++i) {
-        double a = value_cast<float>(v1[i]);
-        double b = value_cast<float>(v2[i]);
+        double a = v1[i];
+        double b = v2[i];

        dot_product += a * b;
        squared_norm_a += a * a;
@@ -37,7 +69,7 @@ float compute_cosine_similarity(const std::vector<data_value>& v1, const std::ve
    }

    if (squared_norm_a == 0 || squared_norm_b == 0) {
-        throw exceptions::invalid_request_exception("Function system.similarity_cosine doesn't support all-zero vectors");
+        return std::numeric_limits<float>::quiet_NaN();
    }

    // The cosine similarity is in the range [-1, 1].
@@ -46,12 +78,12 @@ float compute_cosine_similarity(const std::vector<data_value>& v1, const std::ve
    return (1 + (dot_product / (std::sqrt(squared_norm_a * squared_norm_b)))) / 2;
 }

-float compute_euclidean_similarity(const std::vector<data_value>& v1, const std::vector<data_value>& v2) {
+float compute_euclidean_similarity(std::span<const float> v1, std::span<const float> v2) {
    double sum = 0.0;

    for (size_t i = 0; i < v1.size(); ++i) {
-        double a = value_cast<float>(v1[i]);
-        double b = value_cast<float>(v2[i]);
+        double a = v1[i];
+        double b = v2[i];

        double diff = a - b;
        sum += diff * diff;
@@ -65,12 +97,12 @@ float compute_euclidean_similarity(const std::vector<data_value>& v1, const std:

 // Assumes that both vectors are L2-normalized.
 // This similarity is intended as an optimized way to perform cosine similarity calculation.
-float compute_dot_product_similarity(const std::vector<data_value>& v1, const std::vector<data_value>& v2) {
+float compute_dot_product_similarity(std::span<const float> v1, std::span<const float> v2) {
    double dot_product = 0.0;

    for (size_t i = 0; i < v1.size(); ++i) {
-        double a = value_cast<float>(v1[i]);
-        double b = value_cast<float>(v2[i]);
+        double a = v1[i];
+        double b = v2[i];
        dot_product += a * b;
    }

@@ -136,13 +168,15 @@ bytes_opt vector_similarity_fct::execute(std::span<const bytes_opt> parameters)
        return std::nullopt;
    }

-    const auto& type = arg_types()[0];
-    data_value v1 = type->deserialize(*parameters[0]);
-    data_value v2 = type->deserialize(*parameters[1]);
-    const auto& v1_elements = value_cast<std::vector<data_value>>(v1);
-    const auto& v2_elements = value_cast<std::vector<data_value>>(v2);
+    // Extract dimension from the vector type
+    const auto& type = static_cast<const vector_type_impl&>(*arg_types()[0]);
+    size_t dimension = type.get_dimension();

-    float result = SIMILARITY_FUNCTIONS.at(_name)(v1_elements, v2_elements);
+    // Optimized path: extract floats directly from bytes, bypassing data_value overhead
+    std::vector<float> v1 = detail::extract_float_vector(parameters[0], dimension);
+    std::vector<float> v2 = detail::extract_float_vector(parameters[1], dimension);
+
+    float result = SIMILARITY_FUNCTIONS.at(_name)(v1, v2);
    return float_type->decompose(result);
 }

--- a/cql3/functions/vector_similarity_fcts.hh
+++ b/cql3/functions/vector_similarity_fcts.hh
@@ -11,6 +11,7 @@
 #include "native_scalar_function.hh"
 #include "cql3/assignment_testable.hh"
 #include "cql3/functions/function_name.hh"
+#include <span>

 namespace cql3 {
 namespace functions {
@@ -19,7 +20,7 @@ static const function_name SIMILARITY_COSINE_FUNCTION_NAME = function_name::nati
 static const function_name SIMILARITY_EUCLIDEAN_FUNCTION_NAME = function_name::native_function("similarity_euclidean");
 static const function_name SIMILARITY_DOT_PRODUCT_FUNCTION_NAME = function_name::native_function("similarity_dot_product");

-using similarity_function_t = float (*)(const std::vector<data_value>&, const std::vector<data_value>&);
+using similarity_function_t = float (*)(std::span<const float>, std::span<const float>);
 extern thread_local const std::unordered_map<function_name, similarity_function_t> SIMILARITY_FUNCTIONS;

 std::vector<data_type> retrieve_vector_arg_types(const function_name& name, const std::vector<shared_ptr<assignment_testable>>& provided_args);
@@ -33,5 +34,14 @@ public:
    virtual bytes_opt execute(std::span<const bytes_opt> parameters) override;
 };

+namespace detail {
+
+// Extract float vector directly from serialized bytes, bypassing data_value overhead.
+// This is an internal API exposed for testing purposes.
+// Vector<float, N> wire format: N floats as big-endian uint32_t values, 4 bytes each.
+std::vector<float> extract_float_vector(const bytes_opt& param, size_t dimension);
+
+} // namespace detail
+
 } // namespace functions
 } // namespace cql3
--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -14,6 +14,7 @@
 #include <seastar/core/shared_ptr.hh>
 #include <seastar/coroutine/parallel_for_each.hh>
 #include <seastar/coroutine/as_future.hh>
+#include <seastar/coroutine/try_future.hh>

 #include "service/storage_proxy.hh"
 #include "service/migration_manager.hh"
@@ -47,8 +48,10 @@ const std::chrono::minutes prepared_statements_cache::entry_expiry = std::chrono

 struct query_processor::remote {
    remote(service::migration_manager& mm, service::mapreduce_service& fwd,
-           service::storage_service& ss, service::raft_group0_client& group0_client)
+           service::storage_service& ss, service::raft_group0_client& group0_client,
+           service::strong_consistency::coordinator& _sc_coordinator)
            : mm(mm), mapreducer(fwd), ss(ss), group0_client(group0_client)
+            , sc_coordinator(_sc_coordinator)
            , gate("query_processor::remote")
    {}

@@ -56,6 +59,7 @@ struct query_processor::remote {
    service::mapreduce_service& mapreducer;
    service::storage_service& ss;
    service::raft_group0_client& group0_client;
+    service::strong_consistency::coordinator& sc_coordinator;

    seastar::named_gate gate;
 };
@@ -513,9 +517,16 @@ query_processor::~query_processor() {
    }
 }

+std::pair<std::reference_wrapper<service::strong_consistency::coordinator>, gate::holder>
+query_processor::acquire_strongly_consistent_coordinator() {
+    auto [remote_, holder] = remote();
+    return {remote_.get().sc_coordinator, std::move(holder)};
+}
+
 void query_processor::start_remote(service::migration_manager& mm, service::mapreduce_service& mapreducer,
-                                   service::storage_service& ss, service::raft_group0_client& group0_client) {
-    _remote = std::make_unique<struct remote>(mm, mapreducer, ss, group0_client);
+                                   service::storage_service& ss, service::raft_group0_client& group0_client,
+                                   service::strong_consistency::coordinator& sc_coordinator) {
+    _remote = std::make_unique<struct remote>(mm, mapreducer, ss, group0_client, sc_coordinator);
 }

 future<> query_processor::stop_remote() {
@@ -859,6 +870,7 @@ struct internal_query_state {
    sstring query_string;
    std::unique_ptr<query_options> opts;
    statements::prepared_statement::checked_weak_ptr p;
+    std::optional<service::query_state> qs;
    bool more_results = true;
 };

@@ -866,10 +878,14 @@ internal_query_state query_processor::create_paged_state(
        const sstring& query_string,
        db::consistency_level cl,
        const data_value_list& values,
-        int32_t page_size) {
+        int32_t page_size,
+        std::optional<service::query_state> qs) {
    auto p = prepare_internal(query_string);
    auto opts = make_internal_options(p, values, cl, page_size);
-    return internal_query_state{query_string, std::make_unique<cql3::query_options>(std::move(opts)), std::move(p), true};
+    if (!qs) {
+        qs.emplace(query_state_for_internal_call());
+    }
+    return internal_query_state{query_string, std::make_unique<cql3::query_options>(std::move(opts)), std::move(p), std::move(qs), true};
 }

 bool query_processor::has_more_results(cql3::internal_query_state& state) const {
@@ -892,9 +908,8 @@ future<> query_processor::for_each_cql_result(
 future<::shared_ptr<untyped_result_set>>
 query_processor::execute_paged_internal(internal_query_state& state) {
    state.p->statement->validate(*this, service::client_state::for_internal_calls());
-    auto qs = query_state_for_internal_call();
    ::shared_ptr<cql_transport::messages::result_message> msg =
-      co_await state.p->statement->execute(*this, qs, *state.opts, std::nullopt);
+      co_await state.p->statement->execute(*this, *state.qs, *state.opts, std::nullopt);

    class visitor : public result_message::visitor_base {
        internal_query_state& _state;
@@ -993,7 +1008,7 @@ query_processor::execute_with_params(
    auto opts = make_internal_options(p, values, cl);
    auto statement = p->statement;

-    auto msg = co_await execute_maybe_with_guard(query_state, std::move(statement), opts, &query_processor::do_execute_with_params);
+    auto msg = co_await coroutine::try_future(execute_maybe_with_guard(query_state, std::move(statement), opts, &query_processor::do_execute_with_params));
    co_return ::make_shared<untyped_result_set>(msg);
 }

@@ -1003,7 +1018,7 @@ query_processor::do_execute_with_params(
        shared_ptr<cql_statement> statement,
        const query_options& options, std::optional<service::group0_guard> guard) {
    statement->validate(*this, service::client_state::for_internal_calls());
-    co_return co_await statement->execute(*this, query_state, options, std::move(guard));
+    co_return co_await coroutine::try_future(statement->execute(*this, query_state, options, std::move(guard)));
 }


@@ -1201,8 +1216,9 @@ future<> query_processor::query_internal(
        db::consistency_level cl,
        const data_value_list& values,
        int32_t page_size,
-        noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f) {
-    auto query_state = create_paged_state(query_string, cl, values, page_size);
+        noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f,
+        std::optional<service::query_state> qs) {
+    auto query_state = create_paged_state(query_string, cl, values, page_size, std::move(qs));
    co_return co_await for_each_cql_result(query_state, std::move(f));
 }

--- a/cql3/query_processor.hh
+++ b/cql3/query_processor.hh
@@ -44,6 +44,10 @@ class query_state;
 class mapreduce_service;
 class raft_group0_client;

+namespace strong_consistency {
+class coordinator;
+}
+
 namespace broadcast_tables {
 struct query;
 }
@@ -155,7 +159,8 @@ public:
    ~query_processor();

    void start_remote(service::migration_manager&, service::mapreduce_service&,
-                      service::storage_service& ss, service::raft_group0_client&);
+                      service::storage_service& ss, service::raft_group0_client&,
+                      service::strong_consistency::coordinator&);
    future<> stop_remote();

    data_dictionary::database db() {
@@ -174,6 +179,9 @@ public:
        return _proxy;
    }

+    std::pair<std::reference_wrapper<service::strong_consistency::coordinator>, gate::holder>
+    acquire_strongly_consistent_coordinator();
+
    cql_stats& get_cql_stats() {
        return _cql_stats;
    }
@@ -322,6 +330,7 @@ public:
     * page_size - maximum page size
     * f - a function to be run on each row of the query result,
     *     if the function returns stop_iteration::yes the iteration will stop
+     * qs - optional query state (default: std::nullopt)
     *
     * \note This function is optimized for convenience, not performance.
     */
@@ -330,7 +339,8 @@ public:
            db::consistency_level cl,
            const data_value_list& values,
            int32_t page_size,
-            noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f);
+            noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f,
+            std::optional<service::query_state> qs = std::nullopt);

    /*
     * \brief iterate over all cql results using paging
@@ -499,7 +509,8 @@ private:
            const sstring& query_string,
            db::consistency_level,
            const data_value_list& values,
-            int32_t page_size);
+            int32_t page_size,
+            std::optional<service::query_state> qs = std::nullopt);

    /*!
     * \brief run a query using paging
--- a/cql3/query_result_printer.hh
+++ b/cql3/query_result_printer.hh
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#pragma once
+
+#include <ostream>
+
+namespace cql3 {
+
+class result;
+
+void print_query_results_text(std::ostream& os, const result& result);
+void print_query_results_json(std::ostream& os, const result& result);
+
+} // namespace cql3
--- a/cql3/result_set.cc
+++ b/cql3/result_set.cc
@@ -9,8 +9,10 @@
 */

 #include <cstdint>
+#include "types/json_utils.hh"
 #include "utils/assert.hh"
 #include "utils/hashers.hh"
+#include "utils/rjson.hh"
 #include "cql3/result_set.hh"

 namespace cql3 {
@@ -46,6 +48,13 @@ void metadata::add_non_serialized_column(lw_shared_ptr<column_specification> nam
    _column_info->_names.emplace_back(std::move(name));
 }

+void metadata::hide_last_column() {
+    if (_column_info->_column_count == 0) {
+        utils::on_internal_error("Trying to hide a column when there are no columns visible.");
+    }
+    _column_info->_column_count--;
+}
+
 void metadata::set_paging_state(lw_shared_ptr<const service::pager::paging_state> paging_state) {
    _flags.set<flag::HAS_MORE_PAGES>();
    _paging_state = std::move(paging_state);
@@ -188,4 +197,85 @@ make_empty_metadata() {
    return empty_metadata_cache;
 }

+void print_query_results_text(std::ostream& os, const cql3::result& result) {
+    const auto& metadata = result.get_metadata();
+    const auto& column_metadata = metadata.get_names();
+
+    struct column_values {
+        size_t max_size{0};
+        sstring header_format;
+        sstring row_format;
+        std::vector<sstring> values;
+
+        void add(sstring value) {
+            max_size = std::max(max_size, value.size());
+            values.push_back(std::move(value));
+        }
+    };
+
+    std::vector<column_values> columns;
+    columns.resize(column_metadata.size());
+
+    for (size_t i = 0; i < column_metadata.size(); ++i) {
+        columns[i].add(column_metadata[i]->name->text());
+    }
+
+    for (const auto& row : result.result_set().rows()) {
+        for (size_t i = 0; i < row.size(); ++i) {
+            if (row[i]) {
+                columns[i].add(column_metadata[i]->type->to_string(linearized(managed_bytes_view(*row[i]))));
+            } else {
+                columns[i].add("");
+            }
+        }
+    }
+
+    std::vector<sstring> separators(columns.size(), sstring());
+    for (size_t i = 0; i < columns.size(); ++i) {
+        auto& col_values = columns[i];
+        col_values.header_format = seastar::format(" {{:<{}}} ", col_values.max_size);
+        col_values.row_format = seastar::format(" {{:>{}}} ", col_values.max_size);
+        for (size_t c = 0; c < col_values.max_size; ++c) {
+            separators[i] += "-";
+        }
+    }
+
+    for (size_t r = 0; r < result.result_set().rows().size() + 1; ++r) {
+        std::vector<sstring> row;
+        row.reserve(columns.size());
+        for (size_t i = 0; i < columns.size(); ++i) {
+            const auto& format = r == 0 ? columns[i].header_format : columns[i].row_format;
+            row.push_back(fmt::format(fmt::runtime(std::string_view(format)), columns[i].values[r]));
+        }
+        fmt::print(os, "{}\n", fmt::join(row, "|"));
+        if (!r) {
+            fmt::print(os, "-{}-\n", fmt::join(separators, "-+-"));
+        }
+    }
+}
+
+void print_query_results_json(std::ostream& os, const cql3::result& result) {
+    const auto& metadata = result.get_metadata();
+    const auto& column_metadata = metadata.get_names();
+
+    rjson::streaming_writer writer(os);
+
+    writer.StartArray();
+    for (const auto& row : result.result_set().rows()) {
+        writer.StartObject();
+        for (size_t i = 0; i < row.size(); ++i) {
+            writer.Key(column_metadata[i]->name->text());
+            if (!row[i] || row[i]->empty()) {
+                writer.Null();
+                continue;
+            }
+            const auto value = to_json_string(*column_metadata[i]->type, *row[i]);
+            const auto type = to_json_type(*column_metadata[i]->type, *row[i]);
+            writer.RawValue(value, type);
+        }
+        writer.EndObject();
+    }
+    writer.EndArray();
+}
+
 }
--- a/cql3/result_set.hh
+++ b/cql3/result_set.hh
@@ -73,6 +73,7 @@ public:
    uint32_t value_count() const;

    void add_non_serialized_column(lw_shared_ptr<column_specification> name);
+    void hide_last_column();

 public:
    void set_paging_state(lw_shared_ptr<const service::pager::paging_state> paging_state);
--- a/cql3/statements/alter_keyspace_statement.cc
+++ b/cql3/statements/alter_keyspace_statement.cc
@@ -19,7 +19,7 @@
 #include "locator/abstract_replication_strategy.hh"
 #include "mutation/canonical_mutation.hh"
 #include "prepared_statement.hh"
-#include "seastar/coroutine/exception.hh"
+#include <seastar/coroutine/exception.hh>
 #include "service/migration_manager.hh"
 #include "service/storage_proxy.hh"
 #include "service/topology_mutation.hh"
@@ -206,8 +206,9 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
                locator::replication_strategy_params(ks_md_update->strategy_options(), ks_md_update->initial_tablets(), ks_md_update->consistency_option()),
                topo);

-        // If `rf_rack_valid_keyspaces` is enabled, it's forbidden to perform a schema change that
-        // would lead to an RF-rack-valid keyspace. Verify that this change does not.
+        // If RF-rack-validity must be enforced for the keyspace according to `enforce_rf_rack_validity_for_keyspace`,
+        // it's forbidden to perform a schema change that would lead to an RF-rack-invalid keyspace.
+        // Verify that this change does not.
        // For more context, see: scylladb/scylladb#23071.
        try {
            // There are two things to note here:
@@ -224,14 +225,13 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
            //    The second hyphen is not really true because currently topological changes can
            //    disturb it (see scylladb/scylladb#23345), but we ignore that.
            locator::assert_rf_rack_valid_keyspace(_name, tmptr, *rs);
-        } catch (const std::exception& e) {
-            if (qp.db().get_config().rf_rack_valid_keyspaces()) {
-                // There's no guarantee what the type of the exception will be, so we need to
-                // wrap it manually here in a type that can be passed to the user.
+        } catch (const std::invalid_argument& e) {
+            if (replica::database::enforce_rf_rack_validity_for_keyspace(qp.db().get_config(), *ks_md)) {
+                // wrap the exception manually here in a type that can be passed to the user.
                throw exceptions::invalid_request_exception(e.what());
            } else {
-                // Even when the configuration option `rf_rack_valid_keyspaces` is set to false,
-                // we'd like to inform the user that the keyspace they're altering will not
+                // Even when RF-rack-validity is not enforced for the keyspace, we'd
+                // like to inform the user that the keyspace they're altering will not
                // satisfy the restriction after the change--but just as a warning.
                // For more context, see issue: scylladb/scylladb#23330.
                warnings.push_back(seastar::format(
--- a/cql3/statements/alter_view_statement.cc
+++ b/cql3/statements/alter_view_statement.cc
@@ -11,6 +11,7 @@
 #include <seastar/core/coroutine.hh>
 #include "cql3/statements/alter_view_statement.hh"
 #include "cql3/statements/prepared_statement.hh"
+#include "cql3/statements/view_prop_defs.hh"
 #include "service/migration_manager.hh"
 #include "service/storage_proxy.hh"
 #include "validation.hh"
@@ -22,7 +23,7 @@ namespace cql3 {

 namespace statements {

-alter_view_statement::alter_view_statement(cf_name view_name, std::optional<cf_prop_defs> properties)
+alter_view_statement::alter_view_statement(cf_name view_name, std::optional<view_prop_defs> properties)
        : schema_altering_statement{std::move(view_name)}
        , _properties{std::move(properties)}
 {
@@ -52,8 +53,8 @@ view_ptr alter_view_statement::prepare_view(data_dictionary::database db) const
        throw exceptions::invalid_request_exception("ALTER MATERIALIZED VIEW WITH invoked, but no parameters found");
    }

-    auto schema_extensions = _properties->make_schema_extensions(db.extensions());
-    _properties->validate(db, keyspace(), schema_extensions);
+    auto schema_extensions = _properties->properties()->make_schema_extensions(db.extensions());
+    _properties->validate_raw(view_prop_defs::op_type::alter, db, keyspace(), schema_extensions);

    bool is_colocated = [&] {
        if (!db.find_keyspace(keyspace()).get_replication_strategy().uses_tablets()) {
@@ -70,28 +71,15 @@ view_ptr alter_view_statement::prepare_view(data_dictionary::database db) const
    }();

    if (is_colocated) {
-        auto gc_opts = _properties->get_tombstone_gc_options(schema_extensions);
+        auto gc_opts = _properties->properties()->get_tombstone_gc_options(schema_extensions);
        if (gc_opts && gc_opts->mode() == tombstone_gc_mode::repair) {
            throw exceptions::invalid_request_exception("The 'repair' mode for tombstone_gc is not allowed on co-located materialized view tables.");
        }
    }

    auto builder = schema_builder(schema);
-    _properties->apply_to_builder(builder, std::move(schema_extensions), db, keyspace(), !is_colocated);
-
-    if (builder.get_gc_grace_seconds() == 0) {
-        throw exceptions::invalid_request_exception(
-                "Cannot alter gc_grace_seconds of a materialized view to 0, since this "
-                "value is used to TTL undelivered updates. Setting gc_grace_seconds too "
-                "low might cause undelivered updates to expire before being replayed.");
-    }
-
-    if (builder.default_time_to_live().count() > 0) {
-        throw exceptions::invalid_request_exception(
-                "Cannot set or alter default_time_to_live for a materialized view. "
-                "Data in a materialized view always expire at the same time than "
-                "the corresponding data in the parent table.");
-    }
+    _properties->apply_to_builder(view_prop_defs::op_type::alter, builder, std::move(schema_extensions),
+            db, keyspace(), is_colocated);

    return view_ptr(builder.build());
 }
--- a/cql3/statements/alter_view_statement.hh
+++ b/cql3/statements/alter_view_statement.hh
@@ -12,8 +12,8 @@

 #include <seastar/core/shared_ptr.hh>

+#include "cql3/statements/view_prop_defs.hh"
 #include "data_dictionary/data_dictionary.hh"
-#include "cql3/statements/cf_prop_defs.hh"
 #include "cql3/statements/schema_altering_statement.hh"

 namespace cql3 {
@@ -26,10 +26,10 @@ namespace statements {
 /** An <code>ALTER MATERIALIZED VIEW</code> parsed from a CQL query statement. */
 class alter_view_statement : public schema_altering_statement {
 private:
-    std::optional<cf_prop_defs> _properties;
+    std::optional<view_prop_defs> _properties;
    view_ptr prepare_view(data_dictionary::database db) const;
 public:
-    alter_view_statement(cf_name view_name, std::optional<cf_prop_defs> properties);
+    alter_view_statement(cf_name view_name, std::optional<view_prop_defs> properties);

    virtual future<> check_access(query_processor& qp, const service::client_state& state) const override;

--- a/cql3/statements/strongly_consistent_modification_statement.cc
+++ b/cql3/statements/strongly_consistent_modification_statement.cc
@@ -9,7 +9,7 @@
 */


-#include "cql3/statements/strongly_consistent_modification_statement.hh"
+#include "cql3/statements/broadcast_modification_statement.hh"

 #include <optional>

@@ -28,11 +28,11 @@

 namespace cql3 {

-static logging::logger logger("strongly_consistent_modification_statement");
+static logging::logger logger("broadcast_modification_statement");

 namespace statements {

-strongly_consistent_modification_statement::strongly_consistent_modification_statement(
+broadcast_modification_statement::broadcast_modification_statement(
    uint32_t bound_terms,
    schema_ptr schema,
    broadcast_tables::prepared_update query)
@@ -43,7 +43,7 @@ strongly_consistent_modification_statement::strongly_consistent_modification_sta
 { }

 future<::shared_ptr<cql_transport::messages::result_message>>
-strongly_consistent_modification_statement::execute(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
+broadcast_modification_statement::execute(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
    return execute_without_checking_exception_message(qp, qs, options, std::move(guard))
            .then(cql_transport::messages::propagate_exception_as_future<shared_ptr<cql_transport::messages::result_message>>);
 }
@@ -63,7 +63,7 @@ evaluate_prepared(
 }

 future<::shared_ptr<cql_transport::messages::result_message>>
-strongly_consistent_modification_statement::execute_without_checking_exception_message(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
+broadcast_modification_statement::execute_without_checking_exception_message(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
    if (this_shard_id() != 0) {
        co_return ::make_shared<cql_transport::messages::result_message::bounce_to_shard>(0, cql3::computed_function_values{});
    }
@@ -103,11 +103,11 @@ strongly_consistent_modification_statement::execute_without_checking_exception_m
    ), result);
 }

-uint32_t strongly_consistent_modification_statement::get_bound_terms() const {
+uint32_t broadcast_modification_statement::get_bound_terms() const {
    return _bound_terms;
 }

-future<> strongly_consistent_modification_statement::check_access(query_processor& qp, const service::client_state& state) const {
+future<> broadcast_modification_statement::check_access(query_processor& qp, const service::client_state& state) const {
    auto f = state.has_column_family_access(_schema->ks_name(), _schema->cf_name(), auth::permission::MODIFY);
    if (_query.value_condition.has_value()) {
        f = f.then([this, &state] {
@@ -117,7 +117,7 @@ future<> strongly_consistent_modification_statement::check_access(query_processo
    return f;
 }

-bool strongly_consistent_modification_statement::depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const {
+bool broadcast_modification_statement::depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const {
    return _schema->ks_name() == ks_name && (!cf_name || _schema->cf_name() == *cf_name);
 }

--- a/cql3/statements/strongly_consistent_modification_statement.hh
+++ b/cql3/statements/strongly_consistent_modification_statement.hh
@@ -27,13 +27,13 @@ struct prepared_update {

 }

-class strongly_consistent_modification_statement : public cql_statement_opt_metadata {
+class broadcast_modification_statement : public cql_statement_opt_metadata {
    const uint32_t _bound_terms;
    const schema_ptr _schema;
    const broadcast_tables::prepared_update _query;

 public:
-    strongly_consistent_modification_statement(uint32_t bound_terms, schema_ptr schema, broadcast_tables::prepared_update query);
+    broadcast_modification_statement(uint32_t bound_terms, schema_ptr schema, broadcast_tables::prepared_update query);

    virtual future<::shared_ptr<cql_transport::messages::result_message>>
    execute(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const override;
--- a/cql3/statements/strongly_consistent_select_statement.cc
+++ b/cql3/statements/strongly_consistent_select_statement.cc
@@ -9,7 +9,7 @@
 */


-#include "cql3/statements/strongly_consistent_select_statement.hh"
+#include "cql3/statements/broadcast_select_statement.hh"

 #include <seastar/core/future.hh>
 #include <seastar/core/on_internal_error.hh>
@@ -24,7 +24,7 @@ namespace cql3 {

 namespace statements {

-static logging::logger logger("strongly_consistent_select_statement");
+static logging::logger logger("broadcast_select_statement");

 static
 expr::expression get_key(const cql3::expr::expression& partition_key_restrictions) {
@@ -58,7 +58,7 @@ bool is_selecting_only_value(const cql3::selection::selection& selection) {
           selection.get_columns()[0]->name() == "value";
 }

-strongly_consistent_select_statement::strongly_consistent_select_statement(schema_ptr schema, uint32_t bound_terms,
+broadcast_select_statement::broadcast_select_statement(schema_ptr schema, uint32_t bound_terms,
                                                                           lw_shared_ptr<const parameters> parameters,
                                                                           ::shared_ptr<selection::selection> selection,
                                                                           ::shared_ptr<const restrictions::statement_restrictions> restrictions,
@@ -73,7 +73,7 @@ strongly_consistent_select_statement::strongly_consistent_select_statement(schem
      _query{prepare_query()}
 { }

-broadcast_tables::prepared_select strongly_consistent_select_statement::prepare_query() const {
+broadcast_tables::prepared_select broadcast_select_statement::prepare_query() const {
    if (!is_selecting_only_value(*_selection)) {
        throw service::broadcast_tables::unsupported_operation_error("only 'value' selector is allowed");
    }
@@ -94,7 +94,7 @@ evaluate_prepared(
 }

 future<::shared_ptr<cql_transport::messages::result_message>>
-strongly_consistent_select_statement::execute_without_checking_exception_message(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
+broadcast_select_statement::execute_without_checking_exception_message(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
    if (this_shard_id() != 0) {
        co_return ::make_shared<cql_transport::messages::result_message::bounce_to_shard>(0, cql3::computed_function_values{});
    }
--- a/cql3/statements/strongly_consistent_select_statement.hh
+++ b/cql3/statements/strongly_consistent_select_statement.hh
@@ -25,12 +25,12 @@ struct prepared_select {

 }

-class strongly_consistent_select_statement : public select_statement {
+class broadcast_select_statement : public select_statement {
    const broadcast_tables::prepared_select _query;

    broadcast_tables::prepared_select prepare_query() const;
 public:
-    strongly_consistent_select_statement(schema_ptr schema,
+    broadcast_select_statement(schema_ptr schema,
                     uint32_t bound_terms,
                     lw_shared_ptr<const parameters> parameters,
                     ::shared_ptr<selection::selection> selection,
--- a/cql3/statements/cf_properties.hh
+++ b/cql3/statements/cf_properties.hh
@@ -19,7 +19,8 @@ namespace statements {
 /**
 * Class for common statement properties.
 */
-class cf_properties final {
+class cf_properties {
+protected:
    const ::shared_ptr<cf_prop_defs> _properties = ::make_shared<cf_prop_defs>();
    bool _use_compact_storage = false;
    std::vector<std::pair<::shared_ptr<column_identifier>, bool>> _defined_ordering; // Insertion ordering is important
--- a/cql3/statements/create_index_statement.cc
+++ b/cql3/statements/create_index_statement.cc
@@ -14,6 +14,7 @@
 #include "db/view/view.hh"
 #include "exceptions/exceptions.hh"
 #include "index/vector_index.hh"
+#include "locator/token_metadata_fwd.hh"
 #include "prepared_statement.hh"
 #include "replica/database.hh"
 #include "types/types.hh"
@@ -218,18 +219,24 @@ view_ptr create_index_statement::create_view_for_index(const schema_ptr schema,
        std::map<sstring, sstring> tags_map = {{db::SYNCHRONOUS_VIEW_UPDATES_TAG_KEY, "true"}};
        builder.add_extension(db::tags_extension::NAME, ::make_shared<db::tags_extension>(tags_map));
    }
+
+    const schema::extensions_map exts = _view_properties.properties()->make_schema_extensions(db.extensions());
+    _view_properties.apply_to_builder(view_prop_defs::op_type::create, builder, exts, db, keyspace(), is_colocated);
+
    return view_ptr{builder.build()};
 }

 create_index_statement::create_index_statement(cf_name name,
                                               ::shared_ptr<index_name> index_name,
                                               std::vector<::shared_ptr<index_target::raw>> raw_targets,
-                                               ::shared_ptr<index_prop_defs> properties,
+                                               ::shared_ptr<index_specific_prop_defs> idx_properties,
+                                               view_prop_defs view_properties,
                                               bool if_not_exists)
    : schema_altering_statement(name)
    , _index_name(index_name->get_idx())
    , _raw_targets(raw_targets)
-    , _properties(properties)
+    , _idx_properties(std::move(idx_properties))
+    , _view_properties(std::move(view_properties))
    , _if_not_exists(if_not_exists)
 {
 }
@@ -252,14 +259,53 @@ static sstring target_type_name(index_target::target_type type) {
 void
 create_index_statement::validate(query_processor& qp, const service::client_state& state) const
 {
-    if (_raw_targets.empty() && !_properties->is_custom) {
+    if (_raw_targets.empty() && !_idx_properties->is_custom) {
        throw exceptions::invalid_request_exception("Only CUSTOM indexes can be created without specifying a target column");
    }

-    _properties->validate();
+    _idx_properties->validate();
+
+    // FIXME: This is ugly and can be improved.
+    const bool is_vector_index = _idx_properties->custom_class && *_idx_properties->custom_class == "vector_index";
+    const bool uses_view_properties = _view_properties.properties()->count() > 0
+            || _view_properties.use_compact_storage()
+            || _view_properties.defined_ordering().size() > 0;
+
+    if (is_vector_index && uses_view_properties) {
+        throw exceptions::invalid_request_exception("You cannot use view properties with a vector index");
+    }
+
+    const schema::extensions_map exts = _view_properties.properties()->make_schema_extensions(qp.db().extensions());
+    _view_properties.validate_raw(view_prop_defs::op_type::create, qp.db(), keyspace(), exts);
+
+    // These keywords are still accepted by other schema entities, but they don't have effect on them.
+    // Since indexes are not bound by any backward compatibility contract in this regard, let's forbid these.
+    static sstring obsolete_keywords[] = {
+        "index_interval",
+        "replicate_on_write",
+        "populate_io_cache_on_flush",
+        "read_repair_chance",
+        "dclocal_read_repair_chance",
+    };
+
+    for (const sstring& keyword : obsolete_keywords) {
+        if (_view_properties.properties()->has_property(keyword)) {
+            // We use the same type of exception and the same error message as would be thrown for
+            // an invalid property via `_view_properties.validate_raw`.
+            throw exceptions::syntax_exception(seastar::format("Unknown property '{}'", keyword));
+        }
+    }
+
+    // FIXME: This is a temporary limitation as it might deserve more attention.
+    if (!_view_properties.defined_ordering().empty()) {
+        throw exceptions::invalid_request_exception("Indexes do not allow for specifying the clustering order");
+    }
 }

-std::vector<::shared_ptr<index_target>> create_index_statement::validate_while_executing(data_dictionary::database db) const {
+std::pair<std::vector<::shared_ptr<index_target>>, cql3::cql_warnings_vec>
+create_index_statement::validate_while_executing(data_dictionary::database db, locator::token_metadata_ptr tmptr) const {
+    cql3::cql_warnings_vec warnings;
+
    auto schema = validation::validate_column_family(db, keyspace(), column_family());

    if (schema->is_counter()) {
@@ -281,13 +327,22 @@ std::vector<::shared_ptr<index_target>> create_index_statement::validate_while_e

    // Regular secondary indexes require rf-rack-validity.
    // Custom indexes need to validate this property themselves, if they need it.
-    if (!_properties || !_properties->custom_class) {
+    if (!_idx_properties || !_idx_properties->custom_class) {
        try {
-            db::view::validate_view_keyspace(db, keyspace());
+            db::view::validate_view_keyspace(db, keyspace(), tmptr);
        } catch (const std::exception& e) {
            // The type of the thrown exception is not specified, so we need to wrap it here.
            throw exceptions::invalid_request_exception(e.what());
        }
+
+        if (db.find_keyspace(keyspace()).uses_tablets()) {
+            warnings.emplace_back(
+                "Creating an index in a keyspace that uses tablets requires "
+                "the keyspace to remain RF-rack-valid while the index exists. "
+                "Some operations will be restricted to enforce this: altering the keyspace's replication "
+                "factor, adding a node in a new rack, and removing or decommissioning a node that would "
+                "eliminate a rack.");
+        }
    }

    validate_for_local_index(*schema);
@@ -297,14 +352,14 @@ std::vector<::shared_ptr<index_target>> create_index_statement::validate_while_e
        targets.emplace_back(raw_target->prepare(*schema));
    }

-    if (_properties && _properties->custom_class) {
-        auto custom_index_factory = secondary_index::secondary_index_manager::get_custom_class_factory(*_properties->custom_class);
+    if (_idx_properties && _idx_properties->custom_class) {
+        auto custom_index_factory = secondary_index::secondary_index_manager::get_custom_class_factory(*_idx_properties->custom_class);
        if (!custom_index_factory) {
-            throw exceptions::invalid_request_exception(format("Non-supported custom class \'{}\' provided", *(_properties->custom_class)));
+            throw exceptions::invalid_request_exception(format("Non-supported custom class \'{}\' provided", *_idx_properties->custom_class));
        }
        auto custom_index = (*custom_index_factory)();
-        custom_index->validate(*schema, *_properties, targets, db.features(), db);
-        _properties->index_version = custom_index->index_version(*schema);
+        custom_index->validate(*schema, *_idx_properties, targets, db.features(), db);
+        _idx_properties->index_version = custom_index->index_version(*schema);
    }

    if (targets.size() > 1) {
@@ -384,7 +439,7 @@ std::vector<::shared_ptr<index_target>> create_index_statement::validate_while_e
        }
    }

-    return targets;
+    return std::make_pair(std::move(targets), std::move(warnings));
 }

 void create_index_statement::validate_for_local_index(const schema& schema) const {
@@ -523,7 +578,7 @@ void create_index_statement::validate_target_column_is_map_if_index_involves_key

 void create_index_statement::validate_targets_for_multi_column_index(std::vector<::shared_ptr<index_target>> targets) const
 {
-    if (!_properties->is_custom) {
+    if (!_idx_properties->is_custom) {
        if (targets.size() > 2 || (targets.size() == 2 && std::holds_alternative<index_target::single_column>(targets.front()->value))) {
            throw exceptions::invalid_request_exception("Only CUSTOM indexes support multiple columns");
        }
@@ -537,8 +592,9 @@ void create_index_statement::validate_targets_for_multi_column_index(std::vector
    }
 }

-std::optional<create_index_statement::base_schema_with_new_index> create_index_statement::build_index_schema(data_dictionary::database db) const {
-    auto targets = validate_while_executing(db);
+std::pair<std::optional<create_index_statement::base_schema_with_new_index>, cql3::cql_warnings_vec>
+create_index_statement::build_index_schema(data_dictionary::database db, locator::token_metadata_ptr tmptr) const {
+    auto [targets, warnings] = validate_while_executing(db, tmptr);

    auto schema = db.find_schema(keyspace(), column_family());

@@ -554,8 +610,8 @@ std::optional<create_index_statement::base_schema_with_new_index> create_index_s
    }
    index_metadata_kind kind;
    index_options_map index_options;
-    if (_properties->custom_class) {
-        index_options = _properties->get_options();
+    if (_idx_properties->custom_class) {
+        index_options = _idx_properties->get_options();
        kind = index_metadata_kind::custom;
    } else {
        kind = schema->is_compound() ? index_metadata_kind::composites : index_metadata_kind::keys;
@@ -564,17 +620,17 @@ std::optional<create_index_statement::base_schema_with_new_index> create_index_s
    auto existing_index = schema->find_index_noname(index);
    if (existing_index) {
        if (_if_not_exists) {
-            return {};
+            return std::make_pair(std::nullopt, std::move(warnings));
        } else {
            throw exceptions::invalid_request_exception(
                    format("Index {} is a duplicate of existing index {}", index.name(), existing_index.value().name()));
        }
    }
-    bool existing_vector_index = _properties->custom_class && _properties->custom_class == "vector_index" && secondary_index::vector_index::has_vector_index_on_column(*schema, targets[0]->column_name());
-    bool custom_index_with_same_name = _properties->custom_class && db.existing_index_names(keyspace()).contains(_index_name);
+    bool existing_vector_index = _idx_properties->custom_class && _idx_properties->custom_class == "vector_index" && secondary_index::vector_index::has_vector_index_on_column(*schema, targets[0]->column_name());
+    bool custom_index_with_same_name = _idx_properties->custom_class && db.existing_index_names(keyspace()).contains(_index_name);
    if (existing_vector_index || custom_index_with_same_name) {
        if (_if_not_exists) {
-            return {};
+            return std::make_pair(std::nullopt, std::move(warnings));
        } else {
            throw exceptions::invalid_request_exception("There exists a duplicate custom index");
        }
@@ -590,13 +646,13 @@ std::optional<create_index_statement::base_schema_with_new_index> create_index_s
    schema_builder builder{schema};
    builder.with_index(index);

-    return base_schema_with_new_index{builder.build(), index};
+    return std::make_pair(base_schema_with_new_index{builder.build(), index}, std::move(warnings));
 }

 future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, utils::chunked_vector<mutation>, cql3::cql_warnings_vec>>
 create_index_statement::prepare_schema_mutations(query_processor& qp, const query_options&, api::timestamp_type ts) const {
    using namespace cql_transport;
-    auto res = build_index_schema(qp.db());
+    auto [res, warnings] = build_index_schema(qp.db(), qp.proxy().get_token_metadata_ptr());

    ::shared_ptr<event::schema_change> ret;
    utils::chunked_vector<mutation> muts;
@@ -626,7 +682,7 @@ create_index_statement::prepare_schema_mutations(query_processor& qp, const quer
                column_family());
    }

-    co_return std::make_tuple(std::move(ret), std::move(muts), std::vector<sstring>());
+    co_return std::make_tuple(std::move(ret), std::move(muts), std::move(warnings));
 }

 std::unique_ptr<cql3::statements::prepared_statement>
--- a/cql3/statements/create_index_statement.hh
+++ b/cql3/statements/create_index_statement.hh
@@ -10,6 +10,8 @@

 #pragma once

+#include "cql3/statements/index_prop_defs.hh"
+#include "cql3/statements/view_prop_defs.hh"
 #include "schema_altering_statement.hh"
 #include "index_target.hh"

@@ -27,20 +29,25 @@ class index_name;

 namespace statements {

-class index_prop_defs;
+class index_specific_prop_defs;

 /** A <code>CREATE INDEX</code> statement parsed from a CQL query. */
 class create_index_statement : public schema_altering_statement {
    const sstring _index_name;
    const std::vector<::shared_ptr<index_target::raw>> _raw_targets;
-    const ::shared_ptr<index_prop_defs> _properties;
+
+    // Options specific to this index.
+    const ::shared_ptr<index_specific_prop_defs> _idx_properties;
+    // Options corresponding to the underlying materialized view.
+    const view_prop_defs _view_properties;
+
    const bool _if_not_exists;
    cql_stats* _cql_stats = nullptr;

 public:
    create_index_statement(cf_name name, ::shared_ptr<index_name> index_name,
            std::vector<::shared_ptr<index_target::raw>> raw_targets,
-            ::shared_ptr<index_prop_defs> properties, bool if_not_exists);
+            ::shared_ptr<index_specific_prop_defs> idx_properties, view_prop_defs view_properties, bool if_not_exists);

    future<> check_access(query_processor& qp, const service::client_state& state) const override;
    void validate(query_processor&, const service::client_state& state) const override;
@@ -53,7 +60,7 @@ public:
        schema_ptr schema;
        index_metadata index;
    };
-    std::optional<base_schema_with_new_index> build_index_schema(data_dictionary::database db) const;
+    std::pair<std::optional<base_schema_with_new_index>, cql3::cql_warnings_vec> build_index_schema(data_dictionary::database db, locator::token_metadata_ptr tmptr) const;
    view_ptr create_view_for_index(const schema_ptr, const index_metadata& im, const data_dictionary::database&) const;
 private:
    void validate_for_local_index(const schema& schema) const;
@@ -69,7 +76,7 @@ private:
                                              const sstring& name,
                                              index_metadata_kind kind,
                                              const index_options_map& options);
-    std::vector<::shared_ptr<index_target>> validate_while_executing(data_dictionary::database db) const;
+    std::pair<std::vector<::shared_ptr<index_target>>, cql3::cql_warnings_vec> validate_while_executing(data_dictionary::database db, locator::token_metadata_ptr tmptr) const;
 };

 }
--- a/cql3/statements/create_keyspace_statement.cc
+++ b/cql3/statements/create_keyspace_statement.cc
@@ -116,21 +116,20 @@ future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, utils::chun
            warnings.push_back("Keyspace `initial` tablets option is deprecated.  Use per-table tablet options instead.");
        }

-        // If `rf_rack_valid_keyspaces` is enabled, it's forbidden to create an RF-rack-invalid keyspace.
-        // Verify that it's RF-rack-valid.
+        // If RF-rack-validity must be enforced for the keyspace according to `enforce_rf_rack_validity_for_keyspace`,
+        // it's forbidden to create an RF-rack-invalid keyspace. Verify that it's RF-rack-valid.
        // For more context, see: scylladb/scylladb#23071.
        try {
            // We hold a group0_guard, so it's correct to check this here.
            // The topology or schema cannot change while we're performing this query.
            locator::assert_rf_rack_valid_keyspace(_name, tmptr, *rs);
-        } catch (const std::exception& e) {
-            if (cfg.rf_rack_valid_keyspaces()) {
-                // There's no guarantee what the type of the exception will be, so we need to
-                // wrap it manually here in a type that can be passed to the user.
+        } catch (const std::invalid_argument& e) {
+            if (replica::database::enforce_rf_rack_validity_for_keyspace(cfg, *ksm)) {
+                // wrap the exception in a type that can be passed to the user.
                throw exceptions::invalid_request_exception(e.what());
            } else {
-                // Even when the configuration option `rf_rack_valid_keyspaces` is set to false,
-                // we'd like to inform the user that the keyspace they're creating does not
+                // Even when RF-rack-validity is not enforced for the keyspace, we'd
+                // like to inform the user that the keyspace they're creating does not
                // satisfy the restriction--but just as a warning.
                // For more context, see issue: scylladb/scylladb#23330.
                warnings.push_back(seastar::format(
--- a/cql3/statements/create_table_statement.cc
+++ b/cql3/statements/create_table_statement.cc
@@ -31,8 +31,6 @@
 #include "db/config.hh"
 #include "compaction/time_window_compaction_strategy.hh"

-bool is_internal_keyspace(std::string_view name);
-
 namespace cql3 {

 namespace statements {
@@ -124,10 +122,6 @@ void create_table_statement::apply_properties_to(schema_builder& builder, const
        addColumnMetadataFromAliases(cfmd, Collections.singletonList(valueAlias), defaultValidator, ColumnDefinition.Kind.COMPACT_VALUE);
 #endif

-    if (!_properties->get_compression_options() && !is_internal_keyspace(keyspace())) {
-        builder.set_compressor_params(db.get_config().sstable_compression_user_table_options());
-    }
-
    _properties->apply_to_builder(builder, _properties->make_schema_extensions(db.extensions()), db, keyspace(), true);
 }

--- a/cql3/statements/create_view_statement.cc
+++ b/cql3/statements/create_view_statement.cc
@@ -8,6 +8,7 @@
 * SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
 */

+#include "cql3/statements/view_prop_defs.hh"
 #include "exceptions/exceptions.hh"
 #include "utils/assert.hh"
 #include <unordered_set>
@@ -105,7 +106,7 @@ static bool validate_primary_key(
    return new_non_pk_column;
 }

-std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(data_dictionary::database db) const {
+std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(data_dictionary::database db, locator::token_metadata_ptr tmptr) const {
    // We need to make sure that:
    //  - materialized view name is valid
    //  - primary key includes all columns in base table's primary key
@@ -119,15 +120,7 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(
    cql3::cql_warnings_vec warnings;

    auto schema_extensions = _properties.properties()->make_schema_extensions(db.extensions());
-    _properties.validate(db, keyspace(), schema_extensions);
-
-    if (_properties.use_compact_storage()) {
-        throw exceptions::invalid_request_exception(format("Cannot use 'COMPACT STORAGE' when defining a materialized view"));
-    }
-
-    if (_properties.properties()->get_cdc_options(schema_extensions)) {
-        throw exceptions::invalid_request_exception("Cannot enable CDC for a materialized view");
-    }
+    _properties.validate_raw(view_prop_defs::op_type::create, db, keyspace(), schema_extensions);

    // View and base tables must be in the same keyspace, to ensure that RF
    // is the same (because we assign a view replica to each base replica).
@@ -153,12 +146,21 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(
    schema_ptr schema = validation::validate_column_family(db, _base_name.get_keyspace(), _base_name.get_column_family());

    try {
-        db::view::validate_view_keyspace(db, keyspace());
+        db::view::validate_view_keyspace(db, keyspace(), tmptr);
    } catch (const std::exception& e) {
        // The type of the thrown exception is not specified, so we need to wrap it here.
        throw exceptions::invalid_request_exception(e.what());
    }

+    if (db.find_keyspace(keyspace()).uses_tablets()) {
+        warnings.emplace_back(
+            "Creating a materialized view in a keyspaces that uses tablets requires "
+            "the keyspace to remain RF-rack-valid while the materialized view exists. "
+            "Some operations will be restricted to enforce this: altering the keyspace's replication "
+            "factor, adding a node in a new rack, and removing or decommissioning a node that would "
+            "eliminate a rack.");
+    }
+
    if (schema->is_counter()) {
        throw exceptions::invalid_request_exception(format("Materialized views are not supported on counter tables"));
    }
@@ -341,16 +343,7 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(
        warnings.emplace_back(std::move(warning_text));
    }

-    const auto maybe_id = _properties.properties()->get_id();
-    if (maybe_id && db.try_find_table(*maybe_id)) {
-        const auto schema_ptr = db.find_schema(*maybe_id);
-        const auto& ks_name = schema_ptr->ks_name();
-        const auto& cf_name = schema_ptr->cf_name();
-
-        throw exceptions::invalid_request_exception(seastar::format("Table with ID {} already exists: {}.{}", *maybe_id, ks_name, cf_name));
-    }
-
-    schema_builder builder{keyspace(), column_family(), maybe_id};
+    schema_builder builder{keyspace(), column_family()};
    auto add_columns = [this, &builder] (std::vector<const column_definition*>& defs, column_kind kind) mutable {
        for (auto* def : defs) {
            auto&& type = _properties.get_reversable_type(*def->column_specification->name, def->type);
@@ -396,14 +389,8 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(
        }
    }

-    _properties.properties()->apply_to_builder(builder, std::move(schema_extensions), db, keyspace(), !is_colocated);
-
-    if (builder.default_time_to_live().count() > 0) {
-        throw exceptions::invalid_request_exception(
-                "Cannot set or alter default_time_to_live for a materialized view. "
-                "Data in a materialized view always expire at the same time than "
-                "the corresponding data in the parent table.");
-    }
+    _properties.apply_to_builder(view_prop_defs::op_type::create, builder, std::move(schema_extensions),
+            db, keyspace(), is_colocated);

    auto where_clause_text = util::relations_to_where_clause(_where_clause);
    builder.with_view_info(schema, included.empty(), std::move(where_clause_text));
@@ -414,7 +401,7 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(
 future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, utils::chunked_vector<mutation>, cql3::cql_warnings_vec>>
 create_view_statement::prepare_schema_mutations(query_processor& qp, const query_options&, api::timestamp_type ts) const {
    utils::chunked_vector<mutation> m;
-    auto [definition, warnings] = prepare_view(qp.db());
+    auto [definition, warnings] = prepare_view(qp.db(), qp.proxy().get_token_metadata_ptr());
    try {
        m = co_await service::prepare_new_view_announcement(qp.proxy(), std::move(definition), ts);
    } catch (const exceptions::already_exists_exception& e) {
--- a/cql3/statements/create_view_statement.hh
+++ b/cql3/statements/create_view_statement.hh
@@ -7,9 +7,9 @@
 #pragma once

 #include "cql3/statements/schema_altering_statement.hh"
-#include "cql3/statements/cf_properties.hh"
 #include "cql3/cf_name.hh"
 #include "cql3/expr/expression.hh"
+#include "cql3/statements/view_prop_defs.hh"

 #include <seastar/core/shared_ptr.hh>

@@ -35,7 +35,7 @@ private:
    expr::expression _where_clause;
    std::vector<::shared_ptr<cql3::column_identifier::raw>> _partition_keys;
    std::vector<::shared_ptr<cql3::column_identifier::raw>> _clustering_keys;
-    cf_properties _properties;
+    view_prop_defs _properties;
    bool _if_not_exists;

 public:
@@ -48,7 +48,7 @@ public:
            std::vector<::shared_ptr<cql3::column_identifier::raw>> clustering_keys,
            bool if_not_exists);

-    std::pair<view_ptr, cql3::cql_warnings_vec> prepare_view(data_dictionary::database db) const;
+    std::pair<view_ptr, cql3::cql_warnings_vec> prepare_view(data_dictionary::database db, locator::token_metadata_ptr tmptr) const;

    auto& properties() {
        return _properties;
--- a/cql3/statements/describe_statement.cc
+++ b/cql3/statements/describe_statement.cc
@@ -23,6 +23,7 @@
 #include "index/vector_index.hh"
 #include "schema/schema.hh"
 #include "service/client_state.hh"
+#include "service/paxos/paxos_state.hh"
 #include "types/types.hh"
 #include "cql3/query_processor.hh"
 #include "cql3/cql_statement.hh"
@@ -329,6 +330,19 @@ future<std::vector<description>> table(const data_dictionary::database& db, cons
                "*/",
                *table_desc.create_statement);

+        table_desc.create_statement = std::move(os).to_managed_string();
+    } else if (service::paxos::paxos_store::try_get_base_table(name)) {
+        // Paxos state table is internally managed by Scylla and it shouldn't be exposed to the user.
+        // The table is allowed to be described as a comment to ease administrative work but it's hidden from all listings.
+        fragmented_ostringstream os{};
+
+        fmt::format_to(os.to_iter(),
+                "/* Do NOT execute this statement! It's only for informational purposes.\n"
+                "   A paxos state table is created automatically when enabling LWT on a base table.\n"
+                "\n{}\n"
+                "*/",
+                *table_desc.create_statement);
+
        table_desc.create_statement = std::move(os).to_managed_string();
    }
    result.push_back(std::move(table_desc));
@@ -364,7 +378,7 @@ future<std::vector<description>> table(const data_dictionary::database& db, cons
 future<std::vector<description>> tables(const data_dictionary::database& db, const lw_shared_ptr<keyspace_metadata>& ks, std::optional<bool> with_internals = std::nullopt) {
    auto& replica_db = db.real_database();
    auto tables = ks->tables() | std::views::filter([&replica_db] (const schema_ptr& s) {
-        return !cdc::is_log_for_some_table(replica_db, s->ks_name(), s->cf_name());
+        return !cdc::is_log_for_some_table(replica_db, s->ks_name(), s->cf_name()) && !service::paxos::paxos_store::try_get_base_table(s->cf_name());
    }) | std::ranges::to<std::vector<schema_ptr>>();
    std::ranges::sort(tables, std::ranges::less(), std::mem_fn(&schema::cf_name));

--- a/cql3/statements/index_prop_defs.cc
+++ b/cql3/statements/index_prop_defs.cc
@@ -11,6 +11,7 @@
 #include <set>
 #include <seastar/core/format.hh>
 #include "index_prop_defs.hh"
+#include "cql3/statements/view_prop_defs.hh"
 #include "index/secondary_index.hh"
 #include "exceptions/exceptions.hh"

@@ -21,7 +22,9 @@ static void check_system_option_specified(const index_options_map& options, cons
    }
 }

-void cql3::statements::index_prop_defs::validate() const {
+namespace cql3::statements {
+    
+void index_specific_prop_defs::validate() const {
    static std::set<sstring> keywords({ sstring(KW_OPTIONS) });

    property_definitions::validate(keywords);
@@ -40,13 +43,13 @@ void cql3::statements::index_prop_defs::validate() const {
 }

 index_options_map
-cql3::statements::index_prop_defs::get_raw_options() const {
+index_specific_prop_defs::get_raw_options() const {
    auto options = get_map(KW_OPTIONS);
    return !options ? std::unordered_map<sstring, sstring>() : std::unordered_map<sstring, sstring>(options->begin(), options->end());
 }

 index_options_map
-cql3::statements::index_prop_defs::get_options() const {
+index_specific_prop_defs::get_options() const {
    auto options = get_raw_options();
    options.emplace(db::index::secondary_index::custom_class_option_name, *custom_class);
    if (index_version.has_value()) {
@@ -54,3 +57,25 @@ cql3::statements::index_prop_defs::get_options() const {
    }
    return options;
 }
+
+void index_prop_defs::extract_index_specific_properties_to(index_specific_prop_defs& target) {
+    if (properties()->has_property(index_specific_prop_defs::KW_OPTIONS)) {
+        auto value = properties()->extract_property(index_specific_prop_defs::KW_OPTIONS);
+
+        std::visit([&target] <typename T> (T&& val) {
+            target.add_property(index_specific_prop_defs::KW_OPTIONS, std::forward<T>(val));
+        }, std::move(value));
+    }
+}
+
+view_prop_defs index_prop_defs::into_view_prop_defs() && {
+    if (properties()->has_property(index_specific_prop_defs::KW_OPTIONS)) {
+        utils::on_internal_error(seastar::format(
+                "Precondition has been violated. The property '{}' is still present", index_specific_prop_defs::KW_OPTIONS));
+    }
+
+    view_prop_defs result = std::move(static_cast<view_prop_defs&>(*this));
+    return result;
+}
+
+} // namespace cql3::statements
--- a/cql3/statements/index_prop_defs.hh
+++ b/cql3/statements/index_prop_defs.hh
@@ -10,6 +10,7 @@

 #pragma once

+#include "cql3/statements/view_prop_defs.hh"
 #include "property_definitions.hh"
 #include <seastar/core/sstring.hh>
 #include "schema/schema_fwd.hh"
@@ -23,7 +24,7 @@ namespace cql3 {

 namespace statements {

-class index_prop_defs : public property_definitions {
+class index_specific_prop_defs : public property_definitions {
 public:
    static constexpr auto KW_OPTIONS = "options";

@@ -37,6 +38,19 @@ public:
    index_options_map get_options() const;
 };

+struct index_prop_defs : public view_prop_defs {
+    /// Extract all of the index-specific properties to `target`.
+    ///
+    /// If there's a property at an index-specific key, and if `target` already has
+    /// a value at that key, that value will be replaced.
+    void extract_index_specific_properties_to(index_specific_prop_defs& target);
+
+    /// Turns this object into an object of type `view_prop_defs`, as if moved.
+    ///
+    /// Precondition: the object MUST NOT contain any index-specific property.
+    view_prop_defs into_view_prop_defs() &&;
+};
+
 }
 }

--- a/cql3/statements/ks_prop_defs.cc
+++ b/cql3/statements/ks_prop_defs.cc
@@ -8,8 +8,8 @@
 * SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
 */

-#include "seastar/core/format.hh"
-#include "seastar/core/sstring.hh"
+#include <seastar/core/format.hh>
+#include <seastar/core/sstring.hh>
 #include "utils/assert.hh"
 #include "cql3/statements/ks_prop_defs.hh"
 #include "cql3/statements/request_validations.hh"
@@ -98,6 +98,7 @@ static locator::replication_strategy_config_options prepare_options(
        const sstring& strategy_class,
        const locator::token_metadata& tm,
        bool rf_rack_valid_keyspaces,
+        bool enforce_rack_list,
        locator::replication_strategy_config_options options,
        const locator::replication_strategy_config_options& old_options,
        bool rack_list_enabled,
@@ -107,7 +108,7 @@ static locator::replication_strategy_config_options prepare_options(
    auto is_nts = locator::abstract_replication_strategy::to_qualified_class_name(strategy_class) == "org.apache.cassandra.locator.NetworkTopologyStrategy";
    auto is_alter = !old_options.empty();
    const auto& all_dcs = tm.get_datacenter_racks_token_owners();
-    auto auto_expand_racks = uses_tablets && rf_rack_valid_keyspaces && rack_list_enabled;
+    auto auto_expand_racks = uses_tablets && rack_list_enabled && (rf_rack_valid_keyspaces || enforce_rack_list);

    logger.debug("prepare_options: {}: is_nts={} auto_expand_racks={} rack_list_enabled={} old_options={} new_options={} all_dcs={}",
                 strategy_class, is_nts, auto_expand_racks, rack_list_enabled, old_options, options, all_dcs);
@@ -417,7 +418,7 @@ lw_shared_ptr<data_dictionary::keyspace_metadata> ks_prop_defs::as_ks_metadata(s
    auto initial_tablets = get_initial_tablets(default_initial_tablets, cfg.enforce_tablets());
    bool uses_tablets = initial_tablets.has_value();
    bool rack_list_enabled = utils::get_local_injector().enter("create_with_numeric") ? false : feat.rack_list_rf;
-    auto options = prepare_options(sc, tm, cfg.rf_rack_valid_keyspaces(), get_replication_options(), {}, rack_list_enabled, uses_tablets);
+    auto options = prepare_options(sc, tm, cfg.rf_rack_valid_keyspaces(), cfg.enforce_rack_list(), get_replication_options(), {}, rack_list_enabled, uses_tablets);
    return data_dictionary::keyspace_metadata::new_keyspace(ks_name, sc,
            std::move(options), initial_tablets, get_consistency_option(), get_boolean(KW_DURABLE_WRITES, true), get_storage_options());
 }
@@ -434,7 +435,7 @@ lw_shared_ptr<data_dictionary::keyspace_metadata> ks_prop_defs::as_ks_metadata_u
    auto sc = get_replication_strategy_class();
    bool rack_list_enabled = utils::get_local_injector().enter("create_with_numeric") ? false : feat.rack_list_rf;
    if (sc) {
-        options = prepare_options(*sc, tm, cfg.rf_rack_valid_keyspaces(), get_replication_options(), old_options, rack_list_enabled, uses_tablets);
+        options = prepare_options(*sc, tm, cfg.rf_rack_valid_keyspaces(), cfg.enforce_rack_list(), get_replication_options(), old_options, rack_list_enabled, uses_tablets);
    } else {
        sc = old->strategy_name();
        options = old_options;
--- a/cql3/statements/modification_statement.cc
+++ b/cql3/statements/modification_statement.cc
@@ -11,7 +11,7 @@
 #include "utils/assert.hh"
 #include "cql3/cql_statement.hh"
 #include "cql3/statements/modification_statement.hh"
-#include "cql3/statements/strongly_consistent_modification_statement.hh"
+#include "cql3/statements/broadcast_modification_statement.hh"
 #include "cql3/statements/raw/modification_statement.hh"
 #include "cql3/statements/prepared_statement.hh"
 #include "cql3/expr/expr-utils.hh"
@@ -29,6 +29,8 @@
 #include "cql3/query_processor.hh"
 #include "service/storage_proxy.hh"
 #include "service/broadcast_tables/experimental/lang.hh"
+#include "cql3/statements/strong_consistency/modification_statement.hh"
+#include "cql3/statements/strong_consistency/statement_helpers.hh"

 #include <boost/lexical_cast.hpp>

@@ -546,7 +548,7 @@ modification_statement::process_where_clause(data_dictionary::database db, expr:
    }
 }

-::shared_ptr<strongly_consistent_modification_statement>
+::shared_ptr<broadcast_modification_statement>
 modification_statement::prepare_for_broadcast_tables() const {
    // FIXME: implement for every type of `modification_statement`.
    throw service::broadcast_tables::unsupported_operation_error{};
@@ -554,24 +556,27 @@ modification_statement::prepare_for_broadcast_tables() const {

 namespace raw {

-::shared_ptr<cql_statement_opt_metadata>
-modification_statement::prepare_statement(data_dictionary::database db, prepare_context& ctx, cql_stats& stats) {
-    ::shared_ptr<cql3::statements::modification_statement> statement = prepare(db, ctx, stats);
-
-    if (service::broadcast_tables::is_broadcast_table_statement(keyspace(), column_family())) {
-        return statement->prepare_for_broadcast_tables();
-    } else {
-        return statement;
-    }
-}
-
 std::unique_ptr<prepared_statement>
 modification_statement::prepare(data_dictionary::database db, cql_stats& stats) {
    schema_ptr schema = validation::validate_column_family(db, keyspace(), column_family());
    auto meta = get_prepare_context();
-    auto statement = prepare_statement(db, meta, stats);
+
+    auto statement = std::invoke([&] -> shared_ptr<cql_statement> {
+        auto result = prepare(db, meta, stats);
+
+        if (strong_consistency::is_strongly_consistent(db, schema->ks_name())) {
+            return ::make_shared<strong_consistency::modification_statement>(std::move(result));
+        }
+
+        if (service::broadcast_tables::is_broadcast_table_statement(keyspace(), column_family())) {
+            return result->prepare_for_broadcast_tables();
+        }
+        return result;
+    });
+
    auto partition_key_bind_indices = meta.get_partition_key_bind_indexes(*schema);
-    return std::make_unique<prepared_statement>(audit_info(), std::move(statement), meta, std::move(partition_key_bind_indices));
+    return std::make_unique<prepared_statement>(audit_info(), std::move(statement), meta, 
+        std::move(partition_key_bind_indices));
 }

 ::shared_ptr<cql3::statements::modification_statement>
--- a/cql3/statements/modification_statement.hh
+++ b/cql3/statements/modification_statement.hh
@@ -30,7 +30,7 @@ class operation;

 namespace statements {

-class strongly_consistent_modification_statement;
+class broadcast_modification_statement;

 namespace raw { class modification_statement; }

@@ -113,15 +113,15 @@ public:

    virtual void add_update_for_key(mutation& m, const query::clustering_range& range, const update_parameters& params, const json_cache_opt& json_cache) const = 0;

-    virtual uint32_t get_bound_terms() const override;
+    uint32_t get_bound_terms() const override;

-    virtual const sstring& keyspace() const;
+    const sstring& keyspace() const;

-    virtual const sstring& column_family() const;
+    const sstring& column_family() const;

-    virtual bool is_counter() const;
+    bool is_counter() const;

-    virtual bool is_view() const;
+    bool is_view() const;

    int64_t get_timestamp(int64_t now, const query_options& options) const;

@@ -129,12 +129,12 @@ public:

    std::optional<gc_clock::duration> get_time_to_live(const query_options& options) const;

-    virtual future<> check_access(query_processor& qp, const service::client_state& state) const override;
+    future<> check_access(query_processor& qp, const service::client_state& state) const override;

    // Validate before execute, using client state and current schema
    void validate(query_processor&, const service::client_state& state) const override;

-    virtual bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const override;
+    bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const override;

    void add_operation(::shared_ptr<operation> op);

@@ -256,7 +256,9 @@ public:

    virtual json_cache_opt maybe_prepare_json_cache(const query_options& options) const;

-    virtual ::shared_ptr<strongly_consistent_modification_statement> prepare_for_broadcast_tables() const;
+    virtual ::shared_ptr<broadcast_modification_statement> prepare_for_broadcast_tables() const;
+
+    db::timeout_clock::duration get_timeout(const service::client_state& state, const query_options& options) const;

 protected:
    /**
@@ -264,9 +266,7 @@ protected:
     * processed to check that they are compatible.
     * @throws InvalidRequestException
     */
-    virtual void validate_where_clause_for_conditions() const;
-
-    db::timeout_clock::duration get_timeout(const service::client_state& state, const query_options& options) const;
+    void validate_where_clause_for_conditions() const;

    friend class raw::modification_statement;
 };
--- a/cql3/statements/property_definitions.cc
+++ b/cql3/statements/property_definitions.cc
@@ -11,6 +11,7 @@
 #include <ranges>

 #include <seastar/core/format.hh>
+#include <stdexcept>
 #include "cql3/statements/property_definitions.hh"
 #include "exceptions/exceptions.hh"
 #include "utils/overloaded_functor.hh"
@@ -102,6 +103,18 @@ bool property_definitions::has_property(const sstring& name) const {
    return _properties.contains(name);
 }

+property_definitions::value_type property_definitions::extract_property(const sstring& name) {
+    auto it = _properties.find(name);
+
+    if (it == _properties.end()) {
+        throw std::out_of_range{std::format("No property of name '{}'", std::string_view(name))};
+    }
+
+    value_type result = std::move(it->second);
+    _properties.erase(it);
+    return result;
+}
+
 std::optional<property_definitions::value_type> property_definitions::get(const sstring& name) const {
    if (auto it = _properties.find(name); it != _properties.end()) {
        return it->second;
--- a/cql3/statements/property_definitions.hh
+++ b/cql3/statements/property_definitions.hh
@@ -59,6 +59,8 @@ protected:
 public:
    bool has_property(const sstring& name) const;

+    value_type extract_property(const sstring& name);
+
    std::optional<value_type> get(const sstring& name) const;

    std::optional<extended_map_type> get_extended_map(const sstring& name) const;
--- a/cql3/statements/raw/modification_statement.hh
+++ b/cql3/statements/raw/modification_statement.hh
@@ -40,7 +40,6 @@ protected:

 public:
    virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
-    ::shared_ptr<cql_statement_opt_metadata> prepare_statement(data_dictionary::database db, prepare_context& ctx, cql_stats& stats);
    ::shared_ptr<cql3::statements::modification_statement> prepare(data_dictionary::database db, prepare_context& ctx, cql_stats& stats) const;
    void add_raw(sstring&& raw) { _raw_cql = std::move(raw); }
    const sstring& get_raw_cql() const { return _raw_cql; }
--- a/cql3/statements/raw/select_statement.hh
+++ b/cql3/statements/raw/select_statement.hh
@@ -131,8 +131,6 @@ private:

    void verify_ordering_is_valid(const prepared_orderings_type&, const schema&, const restrictions::statement_restrictions& restrictions) const;

-    prepared_ann_ordering_type prepare_ann_ordering(const schema& schema, prepare_context& ctx, data_dictionary::database db) const;
-
    // Checks whether this ordering reverses all results.
    // We only allow leaving select results unchanged or reversing them.
    bool is_ordering_reversed(const prepared_orderings_type&) const;
--- a/cql3/statements/select_statement.cc
+++ b/cql3/statements/select_statement.cc
@@ -8,6 +8,8 @@
 * SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
 */

+#include "cql3/statements/strong_consistency/select_statement.hh"
+#include "cql3/statements/strong_consistency/statement_helpers.hh"
 #include "cql3/statements/select_statement.hh"
 #include "cql3/expr/expression.hh"
 #include "cql3/expr/evaluate.hh"
@@ -16,7 +18,7 @@
 #include "cql3/statements/raw/select_statement.hh"
 #include "cql3/query_processor.hh"
 #include "cql3/statements/prune_materialized_view_statement.hh"
-#include "cql3/statements/strongly_consistent_select_statement.hh"
+#include "cql3/statements/broadcast_select_statement.hh"

 #include "exceptions/exceptions.hh"
 #include <seastar/core/future.hh>
@@ -25,12 +27,14 @@
 #include "service/broadcast_tables/experimental/lang.hh"
 #include "service/qos/qos_common.hh"
 #include "transport/messages/result_message.hh"
+#include "cql3/functions/functions.hh"
 #include "cql3/functions/as_json_function.hh"
 #include "cql3/selection/selection.hh"
 #include "cql3/util.hh"
 #include "cql3/restrictions/statement_restrictions.hh"
 #include "index/secondary_index.hh"
 #include "types/vector.hh"
+#include "vector_search/filter.hh"
 #include "validation.hh"
 #include "exceptions/unrecognized_entity_exception.hh"
 #include <optional>
@@ -261,7 +265,8 @@ future<> select_statement::check_access(query_processor& qp, const service::clie
        auto& cf_name = s->is_view()
            ? s->view_info()->base_name()
            : (cdc ? cdc->cf_name() : column_family());
-        bool is_vector_indexed = secondary_index::vector_index::has_vector_index(*_schema);
+        const schema_ptr& base_schema = cdc ? cdc : _schema;
+        bool is_vector_indexed = secondary_index::vector_index::has_vector_index(*base_schema);
        co_await state.has_column_family_access(keyspace(), cf_name, auth::permission::SELECT, auth::command_desc::type::OTHER, is_vector_indexed);
    } catch (const data_dictionary::no_such_column_family& e) {
        // Will be validated afterwards.
@@ -367,8 +372,9 @@ uint64_t select_statement::get_inner_loop_limit(uint64_t limit, bool is_aggregat
 }

 bool select_statement::needs_post_query_ordering() const {
-    // We need post-query ordering only for queries with IN on the partition key and an ORDER BY.
-    return _restrictions->key_is_in_relation() && !_parameters->orderings().empty();
+    // We need post-query ordering for queries with IN on the partition key and an ORDER BY
+    // and ANN index queries with rescoring.
+    return static_cast<bool>(_ordering_comparator);
 }

 struct select_statement_executor {
@@ -1957,14 +1963,46 @@ mutation_fragments_select_statement::do_execute(query_processor& qp, service::qu
            }));
 }

-::shared_ptr<cql3::statements::select_statement> vector_indexed_table_select_statement::prepare(data_dictionary::database db, schema_ptr schema,
-        uint32_t bound_terms, lw_shared_ptr<const parameters> parameters, ::shared_ptr<selection::selection> selection,
-        ::shared_ptr<restrictions::statement_restrictions> restrictions, ::shared_ptr<std::vector<size_t>> group_by_cell_indices, bool is_reversed,
-        ordering_comparator_type ordering_comparator, prepared_ann_ordering_type prepared_ann_ordering, std::optional<expr::expression> limit,
-        std::optional<expr::expression> per_partition_limit, cql_stats& stats, std::unique_ptr<attributes> attrs) {
+struct ann_ordering_info {
+    secondary_index::index _index;
+    raw::select_statement::prepared_ann_ordering_type _prepared_ann_ordering;
+    bool is_rescoring_enabled;
+};
+
+static std::optional<ann_ordering_info> get_ann_ordering_info(
+        data_dictionary::database db,
+        schema_ptr schema,
+        lw_shared_ptr<const raw::select_statement::parameters> parameters,
+        prepare_context& ctx) {
+
+    if (parameters->orderings().empty()) {
+        return std::nullopt;
+    }
+
+    auto [column_id, ordering] = parameters->orderings().front();
+    const auto& ann_vector = std::get_if<raw::select_statement::ann_vector>(&ordering);
+    if (!ann_vector) {
+        return std::nullopt;
+    }
+
+    ::shared_ptr<column_identifier> column = column_id->prepare_column_identifier(*schema);
+    const column_definition* def = schema->get_column_definition(column->name());
+    if (!def) {
+        throw exceptions::invalid_request_exception(
+                fmt::format("Undefined column name {}", column->text()));
+    }
+
+    if (!def->type->is_vector() || static_cast<const vector_type_impl*>(def->type.get())->get_elements_type()->get_kind() != abstract_type::kind::float_kind) {
+        throw exceptions::invalid_request_exception("ANN ordering is only supported on float vector indexes");
+    }
+
+    auto e =  expr::prepare_expression(*ann_vector, db, schema->ks_name(), nullptr, def->column_specification);
+    expr::fill_prepare_context(e, ctx);
+
+    raw::select_statement::prepared_ann_ordering_type prepared_ann_ordering = std::make_pair(std::move(def), std::move(e));
+
    auto cf = db.find_column_family(schema);
    auto& sim = cf.get_index_manager();
-    auto [index_opt, _] = restrictions->find_idx(sim);

    auto indexes = sim.list_indexes();
    auto it = std::find_if(indexes.begin(), indexes.end(), [&prepared_ann_ordering](const auto& ind) {
@@ -1976,29 +2014,90 @@ mutation_fragments_select_statement::do_execute(query_processor& qp, service::qu
    if (it == indexes.end()) {
        throw exceptions::invalid_request_exception("ANN ordering by vector requires the column to be indexed using 'vector_index'");
    }
-    if (index_opt || parameters->allow_filtering() || !(restrictions->is_empty()) || check_needs_allow_filtering_anyway(*restrictions)) {
-        throw exceptions::invalid_request_exception("ANN ordering by vector does not support filtering");
-    }
-    index_opt = *it;

-    if (!index_opt) {
-        throw std::runtime_error("No index found.");
+    return ann_ordering_info{
+        *it,
+        std::move(prepared_ann_ordering),
+        secondary_index::vector_index::is_rescoring_enabled(it->metadata().options())
+    };
+}
+
+static uint32_t add_similarity_function_to_selectors(
+        std::vector<selection::prepared_selector>& prepared_selectors,
+        const ann_ordering_info& ann_ordering_info,
+        data_dictionary::database db,
+        schema_ptr schema) {
+    auto similarity_function_name = secondary_index::vector_index::get_cql_similarity_function_name(ann_ordering_info._index.metadata().options());
+    // Create the function name
+    auto func_name = functions::function_name::native_function(sstring(similarity_function_name));
+
+    // Create the function arguments
+    std::vector<expr::expression> args;
+    args.push_back(expr::column_value(ann_ordering_info._prepared_ann_ordering.first));
+    args.push_back(ann_ordering_info._prepared_ann_ordering.second);
+
+    // Get the function object
+    std::vector<shared_ptr<assignment_testable>> provided_args;
+    provided_args.push_back(expr::as_assignment_testable(args[0], expr::type_of(args[0])));
+    provided_args.push_back(expr::as_assignment_testable(args[1], expr::type_of(args[1])));
+
+    auto func = cql3::functions::instance().get(db, schema->ks_name(), func_name, provided_args, schema->ks_name(), schema->cf_name(), nullptr);
+
+    // Create the function call expression
+    expr::function_call similarity_func_call{
+        .func = func,
+        .args = std::move(args),
+    };
+
+    // Add the similarity function as a prepared selector (last)
+    prepared_selectors.push_back(selection::prepared_selector{
+        .expr = std::move(similarity_func_call),
+        .alias = nullptr,
+    });
+    return prepared_selectors.size() - 1;
+}
+
+static select_statement::ordering_comparator_type get_similarity_ordering_comparator(std::vector<selection::prepared_selector>& prepared_selectors, uint32_t similarity_column_index) {
+    auto type = expr::type_of(prepared_selectors[similarity_column_index].expr);
+    if (type->get_kind() != abstract_type::kind::float_kind) {
+        seastar::on_internal_error(logger, "Similarity function must return float type.");
    }
+    return [similarity_column_index, type] (const raw::select_statement::result_row_type& r1, const raw::select_statement::result_row_type& r2) {
+        auto& c1 = r1[similarity_column_index];
+        auto& c2 = r2[similarity_column_index];
+        auto f1 = c1 ? value_cast<float>(type->deserialize(*c1)) : std::numeric_limits<float>::quiet_NaN();
+        auto f2 = c2 ? value_cast<float>(type->deserialize(*c2)) : std::numeric_limits<float>::quiet_NaN();
+        if (std::isfinite(f1) && std::isfinite(f2)) {
+            return f1 > f2;
+        }
+        return std::isfinite(f1);
+    };
+}
+
+::shared_ptr<cql3::statements::select_statement> vector_indexed_table_select_statement::prepare(data_dictionary::database db, schema_ptr schema,
+        uint32_t bound_terms, lw_shared_ptr<const parameters> parameters, ::shared_ptr<selection::selection> selection,
+        ::shared_ptr<restrictions::statement_restrictions> restrictions, ::shared_ptr<std::vector<size_t>> group_by_cell_indices, bool is_reversed,
+        ordering_comparator_type ordering_comparator, prepared_ann_ordering_type prepared_ann_ordering, std::optional<expr::expression> limit,
+        std::optional<expr::expression> per_partition_limit, cql_stats& stats, const secondary_index::index& index, std::unique_ptr<attributes> attrs) {
+
+    auto prepared_filter = vector_search::prepare_filter(*restrictions, parameters->allow_filtering());

    return ::make_shared<cql3::statements::vector_indexed_table_select_statement>(schema, bound_terms, parameters, std::move(selection), std::move(restrictions),
            std::move(group_by_cell_indices), is_reversed, std::move(ordering_comparator), std::move(prepared_ann_ordering), std::move(limit),
-            std::move(per_partition_limit), stats, *index_opt, std::move(attrs));
+            std::move(per_partition_limit), stats, index, std::move(prepared_filter), std::move(attrs));
 }

 vector_indexed_table_select_statement::vector_indexed_table_select_statement(schema_ptr schema, uint32_t bound_terms, lw_shared_ptr<const parameters> parameters,
        ::shared_ptr<selection::selection> selection, ::shared_ptr<const restrictions::statement_restrictions> restrictions,
        ::shared_ptr<std::vector<size_t>> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator,
        prepared_ann_ordering_type prepared_ann_ordering, std::optional<expr::expression> limit,
-        std::optional<expr::expression> per_partition_limit, cql_stats& stats, const secondary_index::index& index, std::unique_ptr<attributes> attrs)
+        std::optional<expr::expression> per_partition_limit, cql_stats& stats, const secondary_index::index& index,
+        vector_search::prepared_filter prepared_filter, std::unique_ptr<attributes> attrs)
    : select_statement{schema, bound_terms, parameters, selection, restrictions, group_by_cell_indices, is_reversed, ordering_comparator, limit,
              per_partition_limit, stats, std::move(attrs)}
    , _index{index}
-    , _prepared_ann_ordering(std::move(prepared_ann_ordering)) {
+    , _prepared_ann_ordering(std::move(prepared_ann_ordering))
+    , _prepared_filter(std::move(prepared_filter)) {

    if (!limit.has_value()) {
        throw exceptions::invalid_request_exception("Vector ANN queries must have a limit specified");
@@ -2033,13 +2132,19 @@ future<shared_ptr<cql_transport::messages::result_message>> vector_indexed_table

        auto timeout = db::timeout_clock::now() + get_timeout(state.get_client_state(), options);
        auto aoe = abort_on_expiry(timeout);
+        auto filter_json = _prepared_filter.to_json(options);
+        uint64_t fetch = static_cast<uint64_t>(std::ceil(limit * secondary_index::vector_index::get_oversampling(_index.metadata().options())));
        auto pkeys = co_await qp.vector_store_client().ann(
-                _schema->ks_name(), _index.metadata().name(), _schema, get_ann_ordering_vector(options), limit, aoe.abort_source());
+                _schema->ks_name(), _index.metadata().name(), _schema, get_ann_ordering_vector(options), fetch, filter_json, aoe.abort_source());
        if (!pkeys.has_value()) {
            co_await coroutine::return_exception(
                    exceptions::invalid_request_exception(std::visit(vector_search::vector_store_client::ann_error_visitor{}, pkeys.error())));
        }

+        if (pkeys->size() > limit && !secondary_index::vector_index::is_rescoring_enabled(_index.metadata().options())) {
+            pkeys->erase(pkeys->begin() + limit, pkeys->end());
+        }
+
        co_return co_await query_base_table(qp, state, options, pkeys.value(), timeout);
    });

@@ -2056,11 +2161,11 @@ void vector_indexed_table_select_statement::update_stats() const {
 }

 lw_shared_ptr<query::read_command> vector_indexed_table_select_statement::prepare_command_for_base_query(
-        query_processor& qp, service::query_state& state, const query_options& options) const {
+        query_processor& qp, service::query_state& state, const query_options& options, uint64_t fetch_limit) const {
    auto slice = make_partition_slice(options);
    return ::make_lw_shared<query::read_command>(_schema->id(), _schema->version(), std::move(slice), qp.proxy().get_max_result_size(slice),
            query::tombstone_limit(qp.proxy().get_tombstone_limit()),
-            query::row_limit(get_inner_loop_limit(get_limit(options, _limit), _selection->is_aggregate())), query::partition_limit(query::max_partitions),
+            query::row_limit(get_inner_loop_limit(fetch_limit, _selection->is_aggregate())), query::partition_limit(query::max_partitions),
            _query_start_time_point, tracing::make_trace_info(state.get_trace_state()), query_id::create_null_id(), query::is_first_page::no,
            options.get_timestamp(state));
 }
@@ -2078,7 +2183,7 @@ std::vector<float> vector_indexed_table_select_statement::get_ann_ordering_vecto
 future<::shared_ptr<cql_transport::messages::result_message>> vector_indexed_table_select_statement::query_base_table(query_processor& qp,
        service::query_state& state, const query_options& options, const std::vector<vector_search::primary_key>& pkeys,
        lowres_clock::time_point timeout) const {
-    auto command = prepare_command_for_base_query(qp, state, options);
+    auto command = prepare_command_for_base_query(qp, state, options, pkeys.size());

    // For tables without clustering columns, we can optimize by querying
    // partition ranges instead of individual primary keys, since the
@@ -2117,6 +2222,7 @@ future<::shared_ptr<cql_transport::messages::result_message>> vector_indexed_tab
            query::result_merger{command->get_row_limit(), query::max_partitions});

    co_return co_await wrap_result_to_error_message([this, &command, &options](auto result) {
+        command->set_row_limit(get_limit(options, _limit));
        return process_results(std::move(result), command, options, _query_start_time_point);
    })(std::move(result));
 }
@@ -2130,6 +2236,7 @@ future<::shared_ptr<cql_transport::messages::result_message>> vector_indexed_tab
                    {timeout, state.get_permit(), state.get_client_state(), state.get_trace_state(), {}, {}, options.get_specific_options().node_local_only},
                    std::nullopt)
            .then(wrap_result_to_error_message([this, &options, command](service::storage_proxy::coordinator_query_result qr) {
+                command->set_row_limit(get_limit(options, _limit));
                return this->process_results(std::move(qr.query_result), command, options, _query_start_time_point);
            }));
 }
@@ -2224,32 +2331,41 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d

    prepared_selectors = maybe_jsonize_select_clause(std::move(prepared_selectors), db, schema);

-    auto aggregation_depth = 0u;
+    std::optional<ann_ordering_info> ann_ordering_info_opt = get_ann_ordering_info(db, schema, _parameters, ctx);
+    bool is_ann_query = ann_ordering_info_opt.has_value();

-    // Force aggregation if GROUP BY is used. This will wrap every column x as first(x).
-    if (!_group_by_columns.empty()) {
-        aggregation_depth = std::max(aggregation_depth, 1u);
-        if (prepared_selectors.empty()) {
-            // We have a "SELECT * GROUP BY". If we leave prepared_selectors
-            // empty, below we choose selection::wildcard() for SELECT *, and
-            // forget to do the "levellize" trick needed for the GROUP BY.
-            // So we need to set prepared_selectors. See #16531.
-            auto all_columns = selection::selection::wildcard_columns(schema);
-            std::vector<::shared_ptr<selection::raw_selector>> select_all;
-            select_all.reserve(all_columns.size());
-            for (const column_definition *cdef : all_columns) {
-                auto name = ::make_shared<cql3::column_identifier::raw>(cdef->name_as_text(), true);
-                select_all.push_back(::make_shared<selection::raw_selector>(
-                    expr::unresolved_identifier(std::move(name)), nullptr));
-            }
-            prepared_selectors = selection::raw_selector::to_prepared_selectors(select_all, *schema, db, keyspace());
+    if (prepared_selectors.empty() && (!_group_by_columns.empty() || (is_ann_query && ann_ordering_info_opt->is_rescoring_enabled))) {
+        // We have a "SELECT * GROUP BY" or "SELECT * ORDER BY ANN" with rescoring enabled. If we leave prepared_selectors
+        // empty, below we choose selection::wildcard() for SELECT *, and either:
+        //  - forget to do the "levellize" trick needed for the GROUP BY. See #16531.
+        //  - forget to add the similarity function needed for ORDER BY ANN with rescoring. See below.
+        // So we need to set prepared_selectors. 
+        auto all_columns = selection::selection::wildcard_columns(schema);
+        std::vector<::shared_ptr<selection::raw_selector>> select_all;
+        select_all.reserve(all_columns.size());
+        for (const column_definition *cdef : all_columns) {
+            auto name = ::make_shared<cql3::column_identifier::raw>(cdef->name_as_text(), true);
+            select_all.push_back(::make_shared<selection::raw_selector>(
+                expr::unresolved_identifier(std::move(name)), nullptr));
        }
+        prepared_selectors = selection::raw_selector::to_prepared_selectors(select_all, *schema, db, keyspace());
    }

    for (auto& ps : prepared_selectors) {
        expr::fill_prepare_context(ps.expr, ctx);
    }

+    // Force aggregation if GROUP BY is used. This will wrap every column x as first(x).
+    auto aggregation_depth = _group_by_columns.empty() ? 0u : 1u;
+
+    select_statement::ordering_comparator_type ordering_comparator;
+    bool hide_last_column = false;
+    if (is_ann_query && ann_ordering_info_opt->is_rescoring_enabled) {
+        uint32_t similarity_column_index = add_similarity_function_to_selectors(prepared_selectors, *ann_ordering_info_opt, db, schema);
+        hide_last_column = true;
+        ordering_comparator = get_similarity_ordering_comparator(prepared_selectors, similarity_column_index);
+    }
+
    for (auto& ps : prepared_selectors) {
        aggregation_depth = std::max(aggregation_depth, expr::aggregation_depth(ps.expr));
    }
@@ -2267,6 +2383,11 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
                     ? selection::selection::wildcard(schema)
                     : selection::selection::from_selectors(db, schema, keyspace(), levellized_prepared_selectors);

+    if (is_ann_query && hide_last_column) {
+        // Hide the similarity selector from the client by reducing column_count
+        selection->get_result_metadata()->hide_last_column();
+    }
+
    // Cassandra 5.0.2 disallows PER PARTITION LIMIT with aggregate queries
    // but only if GROUP BY is not used.
    // See #9879 for more details.
@@ -2274,26 +2395,21 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
        throw exceptions::invalid_request_exception("PER PARTITION LIMIT is not allowed with aggregate queries.");
    }

-    auto restrictions = prepare_restrictions(db, schema, ctx, selection, for_view, _parameters->allow_filtering(),
+    auto restrictions = prepare_restrictions(db, schema, ctx, selection, for_view, _parameters->allow_filtering() || is_ann_query,
            restrictions::check_indexes(!_parameters->is_mutation_fragments()));

    if (_parameters->is_distinct()) {
        validate_distinct_selection(*schema, *selection, *restrictions);
    }

-    select_statement::ordering_comparator_type ordering_comparator;
    bool is_reversed_ = false;

-    std::optional<prepared_ann_ordering_type> prepared_ann_ordering;
-
    auto orderings = _parameters->orderings();

-    if (!orderings.empty()) {
+    if (!orderings.empty() && !is_ann_query) {
        std::visit([&](auto&& ordering) {
            using T = std::decay_t<decltype(ordering)>;
-            if constexpr (std::is_same_v<T, select_statement::ann_vector>) {
-                prepared_ann_ordering = prepare_ann_ordering(*schema, ctx, db);
-            } else {
+            if constexpr (!std::is_same_v<T, select_statement::ann_vector>) {
                SCYLLA_ASSERT(!for_view);
                verify_ordering_is_allowed(*_parameters, *restrictions);
                prepared_orderings_type prepared_orderings = prepare_orderings(*schema);
@@ -2306,7 +2422,7 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
    }

    std::vector<sstring> warnings;
-    if (!prepared_ann_ordering.has_value()) {
+    if (!is_ann_query) {
        check_needs_filtering(*restrictions, db.get_config().strict_allow_filtering(), warnings);
        ensure_filtering_columns_retrieval(db, *selection, *restrictions);
    }
@@ -2360,7 +2476,21 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
                && restrictions->partition_key_restrictions_size() == schema->partition_key_size());
    };

-    if (_parameters->is_prune_materialized_view()) {
+    if (strong_consistency::is_strongly_consistent(db, schema->ks_name())) {
+        stmt = ::make_shared<strong_consistency::select_statement>(
+                schema,
+                ctx.bound_variables_size(),
+                _parameters,
+                std::move(selection),
+                std::move(restrictions),
+                std::move(group_by_cell_indices),
+                is_reversed_,
+                std::move(ordering_comparator),
+                prepare_limit(db, ctx, _limit),
+                prepare_limit(db, ctx, _per_partition_limit),
+                stats,
+                std::move(prepared_attrs));
+    } else if (_parameters->is_prune_materialized_view()) {
        stmt = ::make_shared<cql3::statements::prune_materialized_view_statement>(
                schema,
                ctx.bound_variables_size(),
@@ -2389,10 +2519,10 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
                prepare_limit(db, ctx, _per_partition_limit),
                stats,
                std::move(prepared_attrs));
-    } else if (prepared_ann_ordering) {
+    } else if (is_ann_query) {
        stmt = vector_indexed_table_select_statement::prepare(db, schema, ctx.bound_variables_size(), _parameters, std::move(selection), std::move(restrictions),
-                std::move(group_by_cell_indices), is_reversed_, std::move(ordering_comparator), std::move(*prepared_ann_ordering),
-                prepare_limit(db, ctx, _limit), prepare_limit(db, ctx, _per_partition_limit), stats, std::move(prepared_attrs));
+                std::move(group_by_cell_indices), is_reversed_, std::move(ordering_comparator), std::move(ann_ordering_info_opt->_prepared_ann_ordering),
+                prepare_limit(db, ctx, _limit), prepare_limit(db, ctx, _per_partition_limit), stats, ann_ordering_info_opt->_index, std::move(prepared_attrs));
    } else if (restrictions->uses_secondary_indexing()) {
        stmt = view_indexed_table_select_statement::prepare(
                db,
@@ -2424,7 +2554,7 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
            std::move(prepared_attrs)
        );
    } else if (service::broadcast_tables::is_broadcast_table_statement(keyspace(), column_family())) {
-        stmt = ::make_shared<cql3::statements::strongly_consistent_select_statement>(
+        stmt = ::make_shared<cql3::statements::broadcast_select_statement>(
                schema,
                ctx.bound_variables_size(),
                _parameters,
@@ -2614,28 +2744,6 @@ void select_statement::verify_ordering_is_valid(const prepared_orderings_type& o
    }
 }

-select_statement::prepared_ann_ordering_type select_statement::prepare_ann_ordering(const schema& schema, prepare_context& ctx, data_dictionary::database db) const {
-    auto [column_id, ordering] = _parameters->orderings().front();
-    const auto& ann_vector = std::get_if<select_statement::ann_vector>(&ordering);
-    SCYLLA_ASSERT(ann_vector);
-
-    ::shared_ptr<column_identifier> column = column_id->prepare_column_identifier(schema);
-    const column_definition* def = schema.get_column_definition(column->name());
-    if (!def) {
-        throw exceptions::invalid_request_exception(
-                fmt::format("Undefined column name {}", column->text()));
-    }
-
-    if (!def->type->is_vector() || static_cast<const vector_type_impl*>(def->type.get())->get_elements_type()->get_kind() != abstract_type::kind::float_kind) {
-        throw exceptions::invalid_request_exception("ANN ordering is only supported on float vector indexes");
-    }
-
-    auto e =  expr::prepare_expression(*ann_vector, db, keyspace(), nullptr, def->column_specification);
-    expr::fill_prepare_context(e, ctx);
-
-    return std::make_pair(std::move(def), std::move(e));
-}
-
 select_statement::ordering_comparator_type select_statement::get_ordering_comparator(const prepared_orderings_type& orderings,
    selection::selection& selection,
    const restrictions::statement_restrictions& restrictions) {
--- a/cql3/statements/select_statement.hh
+++ b/cql3/statements/select_statement.hh
@@ -22,6 +22,7 @@
 #include "locator/host_id.hh"
 #include "service/cas_shard.hh"
 #include "vector_search/vector_store_client.hh"
+#include "vector_search/filter.hh"

 namespace service {
    class client_state;
@@ -362,6 +363,7 @@ private:
 class vector_indexed_table_select_statement : public select_statement {
    secondary_index::index _index;
    prepared_ann_ordering_type _prepared_ann_ordering;
+    vector_search::prepared_filter _prepared_filter;
    mutable gc_clock::time_point _query_start_time_point;

 public:
@@ -371,13 +373,13 @@ public:
            lw_shared_ptr<const parameters> parameters, ::shared_ptr<selection::selection> selection,
            ::shared_ptr<restrictions::statement_restrictions> restrictions, ::shared_ptr<std::vector<size_t>> group_by_cell_indices, bool is_reversed,
            ordering_comparator_type ordering_comparator, prepared_ann_ordering_type prepared_ann_ordering, std::optional<expr::expression> limit,
-            std::optional<expr::expression> per_partition_limit, cql_stats& stats, std::unique_ptr<cql3::attributes> attrs);
+            std::optional<expr::expression> per_partition_limit, cql_stats& stats, const secondary_index::index& index, std::unique_ptr<cql3::attributes> attrs);

    vector_indexed_table_select_statement(schema_ptr schema, uint32_t bound_terms, lw_shared_ptr<const parameters> parameters,
            ::shared_ptr<selection::selection> selection, ::shared_ptr<const restrictions::statement_restrictions> restrictions,
            ::shared_ptr<std::vector<size_t>> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator,
            prepared_ann_ordering_type prepared_ann_ordering, std::optional<expr::expression> limit, std::optional<expr::expression> per_partition_limit,
-            cql_stats& stats, const secondary_index::index& index, std::unique_ptr<cql3::attributes> attrs);
+            cql_stats& stats, const secondary_index::index& index, vector_search::prepared_filter prepared_filter, std::unique_ptr<cql3::attributes> attrs);

 private:
    future<::shared_ptr<cql_transport::messages::result_message>> do_execute(
@@ -385,7 +387,7 @@ private:

    void update_stats() const;

-    lw_shared_ptr<query::read_command> prepare_command_for_base_query(query_processor& qp, service::query_state& state, const query_options& options) const;
+    lw_shared_ptr<query::read_command> prepare_command_for_base_query(query_processor& qp, service::query_state& state, const query_options& options, uint64_t fetch_limit) const;

    std::vector<float> get_ann_ordering_vector(const query_options& options) const;

--- a/cql3/statements/strong_consistency/modification_statement.cc
+++ b/cql3/statements/strong_consistency/modification_statement.cc
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#include "modification_statement.hh"
+
+#include "transport/messages/result_message.hh"
+#include "cql3/query_processor.hh"
+#include "service/strong_consistency/coordinator.hh"
+#include "cql3/statements/strong_consistency/statement_helpers.hh"
+
+namespace cql3::statements::strong_consistency {
+static logging::logger logger("sc_modification_statement");
+
+modification_statement::modification_statement(shared_ptr<base_statement> statement)
+    : cql_statement_opt_metadata(&timeout_config::write_timeout)
+    , _statement(std::move(statement))
+{
+}
+
+using result_message = cql_transport::messages::result_message;
+
+future<shared_ptr<result_message>> modification_statement::execute(query_processor& qp, service::query_state& qs, 
+    const query_options& options, std::optional<service::group0_guard> guard) const
+{
+    return execute_without_checking_exception_message(qp, qs, options, std::move(guard))
+            .then(cql_transport::messages::propagate_exception_as_future<shared_ptr<result_message>>);
+}
+
+future<shared_ptr<result_message>> modification_statement::execute_without_checking_exception_message(
+        query_processor& qp, service::query_state& qs, const query_options& options,
+        std::optional<service::group0_guard> guard) const
+{
+    auto json_cache = base_statement::json_cache_opt{};
+    const auto keys = _statement->build_partition_keys(options, json_cache);
+    if (keys.size() != 1 || !query::is_single_partition(keys[0])) {
+        throw exceptions::invalid_request_exception("Strongly consistent queries can only target a single partition");
+    }
+    if (_statement->requires_read()) {
+        throw exceptions::invalid_request_exception("Strongly consistent updates don't support data prefetch");
+    }
+
+    auto [coordinator, holder] = qp.acquire_strongly_consistent_coordinator();
+    const auto mutate_result = co_await coordinator.get().mutate(_statement->s,
+        keys[0].start()->value().token(),
+        [&](api::timestamp_type ts) {
+            const auto prefetch_data = update_parameters::prefetch_data(_statement->s);
+            const auto ttl = _statement->get_time_to_live(options);
+            const auto params = update_parameters(_statement->s, options, ts, ttl, prefetch_data);
+            const auto ranges = _statement->create_clustering_ranges(options, json_cache);
+            auto muts = _statement->apply_updates(keys, ranges, params, json_cache);
+            if (muts.size() != 1) {
+                on_internal_error(logger, ::format("statement '{}' has unexpected number of mutations {}",
+                    raw_cql_statement, muts.size()));
+            }
+            return std::move(*muts.begin());
+        });
+
+    using namespace service::strong_consistency;
+    if (const auto* redirect = get_if<need_redirect>(&mutate_result)) {
+        co_return co_await redirect_statement(qp, options, redirect->target);
+    }
+
+    co_return seastar::make_shared<result_message::void_message>();
+}
+
+future<> modification_statement::check_access(query_processor& qp, const service::client_state& state) const {
+    return _statement->check_access(qp, state);
+}
+
+uint32_t modification_statement::get_bound_terms() const {
+    return _statement->get_bound_terms();
+}
+
+bool modification_statement::depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const {
+    return _statement->depends_on(ks_name, cf_name);
+}
+}
--- a/cql3/statements/strong_consistency/modification_statement.hh
+++ b/cql3/statements/strong_consistency/modification_statement.hh
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#pragma once
+
+#include "cql3/cql_statement.hh"
+#include "cql3/expr/expression.hh"
+#include "cql3/statements/modification_statement.hh"
+
+namespace cql3::statements::strong_consistency {
+
+class modification_statement : public cql_statement_opt_metadata {
+    using result_message = cql_transport::messages::result_message;
+    using base_statement = cql3::statements::modification_statement;
+
+    shared_ptr<base_statement> _statement;
+public:
+    modification_statement(shared_ptr<base_statement> statement);
+
+    future<shared_ptr<result_message>> execute(query_processor& qp, service::query_state& state,
+        const query_options& options, std::optional<service::group0_guard> guard) const override;
+
+    future<shared_ptr<result_message>> execute_without_checking_exception_message(query_processor& qp,
+        service::query_state& qs, const query_options& options,
+        std::optional<service::group0_guard> guard) const override;
+
+    future<> check_access(query_processor& qp, const service::client_state& state) const override;
+
+    uint32_t get_bound_terms() const override;
+
+    bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const override;
+};
+
+}
--- a/cql3/statements/strong_consistency/select_statement.cc
+++ b/cql3/statements/strong_consistency/select_statement.cc
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#include "select_statement.hh"
+
+#include "query/query-request.hh"
+#include "cql3/query_processor.hh"
+#include "service/strong_consistency/coordinator.hh"
+#include "cql3/statements/strong_consistency/statement_helpers.hh"
+
+namespace cql3::statements::strong_consistency {
+
+using result_message = cql_transport::messages::result_message;
+
+future<::shared_ptr<result_message>> select_statement::do_execute(query_processor& qp,
+        service::query_state& state, 
+        const query_options& options) const
+{
+    const auto key_ranges = _restrictions->get_partition_key_ranges(options);
+    if (key_ranges.size() != 1 || !query::is_single_partition(key_ranges[0])) {
+        throw exceptions::invalid_request_exception("Strongly consistent queries can only target a single partition");
+    }
+    const auto now = gc_clock::now();
+    auto read_command = make_lw_shared<query::read_command>(
+        _query_schema->id(),
+        _query_schema->version(),
+        make_partition_slice(options),
+        query::max_result_size(query::result_memory_limiter::maximum_result_size),
+        query::tombstone_limit(query::tombstone_limit::max),
+        query::row_limit(get_inner_loop_limit(get_limit(options, _limit), _selection->is_aggregate())),
+        query::partition_limit(query::max_partitions),
+        now,
+        tracing::make_trace_info(state.get_trace_state()),
+        query_id::create_null_id(),
+        query::is_first_page::no,
+        options.get_timestamp(state));
+    const auto timeout = db::timeout_clock::now() + get_timeout(state.get_client_state(), options);
+    auto [coordinator, holder] = qp.acquire_strongly_consistent_coordinator();
+    auto query_result = co_await coordinator.get().query(_query_schema, *read_command,
+        key_ranges, state.get_trace_state(), timeout);
+
+    using namespace service::strong_consistency;
+    if (const auto* redirect = get_if<need_redirect>(&query_result)) {
+        co_return co_await redirect_statement(qp, options, redirect->target);
+    }
+
+    co_return co_await process_results(get<lw_shared_ptr<query::result>>(std::move(query_result)),
+        read_command, options, now);
+}
+
+}
--- a/cql3/statements/strong_consistency/select_statement.hh
+++ b/cql3/statements/strong_consistency/select_statement.hh
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#pragma once
+
+#include "cql3/cql_statement.hh"
+#include "cql3/statements/select_statement.hh"
+
+namespace cql3::statements::strong_consistency {
+
+class select_statement : public cql3::statements::select_statement {
+    using result_message = cql_transport::messages::result_message;
+
+public:
+    using cql3::statements::select_statement::select_statement;
+
+    future<::shared_ptr<cql_transport::messages::result_message>> do_execute(query_processor& qp,
+        service::query_state& state, const query_options& options) const override;
+};
+
+}
--- a/cql3/statements/strong_consistency/statement_helpers.cc
+++ b/cql3/statements/strong_consistency/statement_helpers.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#include "statement_helpers.hh"
+
+#include "transport/messages/result_message_base.hh"
+#include "cql3/query_processor.hh"
+#include "replica/database.hh"
+#include "locator/tablet_replication_strategy.hh"
+
+namespace cql3::statements::strong_consistency {
+future<::shared_ptr<cql_transport::messages::result_message>> redirect_statement(query_processor& qp,
+        const query_options& options,
+        const locator::tablet_replica& target)
+{
+    const auto my_host_id = qp.db().real_database().get_token_metadata().get_topology().my_host_id();
+    if (target.host != my_host_id) {
+        throw exceptions::invalid_request_exception(format(
+            "Strongly consistent writes can be executed only on the leader node, "
+            "leader id {}, current host id {}",
+            target.host, my_host_id));
+    }
+    auto&& func_values_cache = const_cast<cql3::query_options&>(options).take_cached_pk_function_calls();
+    co_return qp.bounce_to_shard(target.shard, std::move(func_values_cache));
+}
+
+bool is_strongly_consistent(data_dictionary::database db, std::string_view ks_name) {
+    const auto* tablet_aware_rs = db.find_keyspace(ks_name).get_replication_strategy().maybe_as_tablet_aware();
+    return tablet_aware_rs && tablet_aware_rs->get_consistency() != data_dictionary::consistency_config_option::eventual;
+}
+
+}
--- a/cql3/statements/strong_consistency/statement_helpers.hh
+++ b/cql3/statements/strong_consistency/statement_helpers.hh
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#pragma once
+
+#include "cql3/cql_statement.hh"
+#include "locator/tablets.hh"
+
+namespace cql3::statements::strong_consistency {
+
+future<::shared_ptr<cql_transport::messages::result_message>> redirect_statement(
+    query_processor& qp,
+    const query_options& options,
+    const locator::tablet_replica& target);
+
+bool is_strongly_consistent(data_dictionary::database db, std::string_view ks_name);
+
+}
--- a/cql3/statements/update_statement.cc
+++ b/cql3/statements/update_statement.cc
@@ -13,7 +13,7 @@
 #include "cql3/expr/expression.hh"
 #include "cql3/expr/evaluate.hh"
 #include "cql3/expr/expr-utils.hh"
-#include "cql3/statements/strongly_consistent_modification_statement.hh"
+#include "cql3/statements/broadcast_modification_statement.hh"
 #include "service/broadcast_tables/experimental/lang.hh"
 #include "raw/update_statement.hh"

@@ -333,7 +333,7 @@ std::optional<expr::expression> get_value_condition(const expr::expression& the_
    return binop->rhs;
 }

-::shared_ptr<strongly_consistent_modification_statement>
+::shared_ptr<broadcast_modification_statement>
 update_statement::prepare_for_broadcast_tables() const {
    if (attrs) {
        if (attrs->is_time_to_live_set()) {
@@ -359,7 +359,7 @@ update_statement::prepare_for_broadcast_tables() const {
        .value_condition = get_value_condition(_condition),
    };

-    return ::make_shared<strongly_consistent_modification_statement>(
+    return ::make_shared<broadcast_modification_statement>(
        get_bound_terms(),
        s,
        query
--- a/cql3/statements/update_statement.hh
+++ b/cql3/statements/update_statement.hh
@@ -45,7 +45,7 @@ private:
    virtual void execute_operations_for_key(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params, const json_cache_opt& json_cache) const;

 public:
-    virtual ::shared_ptr<strongly_consistent_modification_statement> prepare_for_broadcast_tables() const override;
+    virtual ::shared_ptr<broadcast_modification_statement> prepare_for_broadcast_tables() const override;
 };

 /*
--- a/cql3/statements/view_prop_defs.cc
+++ b/cql3/statements/view_prop_defs.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#include "cql3/statements/view_prop_defs.hh"
+
+namespace cql3::statements {
+
+void view_prop_defs::validate_raw(op_type op, const data_dictionary::database db, sstring ks_name,
+        const schema::extensions_map& exts) const
+{
+    cf_properties::validate(db, std::move(ks_name), exts);
+
+    if (use_compact_storage()) {
+        throw exceptions::invalid_request_exception(format("Cannot use 'COMPACT STORAGE' when defining a materialized view"));
+    }
+
+    if (properties()->get_cdc_options(exts)) {
+        throw exceptions::invalid_request_exception("Cannot enable CDC for a materialized view");
+    }
+
+    if (op == op_type::create) {
+        const auto maybe_id = properties()->get_id();
+        if (maybe_id && db.try_find_table(*maybe_id)) {
+            const auto schema_ptr = db.find_schema(*maybe_id);
+            const auto& ks_name = schema_ptr->ks_name();
+            const auto& cf_name = schema_ptr->cf_name();
+
+            throw exceptions::invalid_request_exception(seastar::format("Table with ID {} already exists: {}.{}", *maybe_id, ks_name, cf_name));
+        }
+    }
+}
+
+void view_prop_defs::apply_to_builder(op_type op, schema_builder& builder, schema::extensions_map exts,
+        const data_dictionary::database db, sstring ks_name, bool is_colocated) const
+{
+    _properties->apply_to_builder(builder, exts, db, std::move(ks_name), !is_colocated);
+
+    if (op == op_type::create) {
+        const auto maybe_id = properties()->get_id();
+        if (maybe_id) {
+            builder.set_uuid(*maybe_id);
+        }
+    }
+
+    if (op == op_type::alter) {
+        if (builder.get_gc_grace_seconds() == 0) {
+            throw exceptions::invalid_request_exception(
+                    "Cannot alter gc_grace_seconds of a materialized view to 0, since this "
+                    "value is used to TTL undelivered updates. Setting gc_grace_seconds too "
+                    "low might cause undelivered updates to expire before being replayed.");
+        }
+    }
+
+    if (builder.default_time_to_live().count() > 0) {
+        throw exceptions::invalid_request_exception(
+                "Cannot set or alter default_time_to_live for a materialized view. "
+                "Data in a materialized view always expire at the same time than "
+                "the corresponding data in the parent table.");
+    }
+}
+
+} // namespace cql3::statements
--- a/cql3/statements/view_prop_defs.hh
+++ b/cql3/statements/view_prop_defs.hh
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#pragma once
+
+#include "cql3/statements/cf_properties.hh"
+
+namespace cql3::statements {
+
+/// This type represents the possible properties of the following CQL statements:
+///
+/// * CREATE MATERIALIZED VIEW,
+/// * ALTER MATERIALIZED VIEW.
+///
+/// Since the sets of the valid properties may differ between those statements, this type
+/// is supposed to represent a superset of them.
+///
+/// This type does NOT guarantee that all of the necessary validation logic will be performed
+/// by it. It strives to do that, but you should keep this in mind. What does that mean?
+/// Some parts of validation may require more context that's not accessible from here.
+///
+/// As of yet, this type does not cover all of the validation logic that could be here either.
+class view_prop_defs : public cf_properties {
+public:
+    /// The type of a schema operation on a materialized view.
+    /// These values will be used to guide the validation logic.
+    enum class op_type {
+        create,
+        alter
+    };
+
+public:
+    template <typename... Args>
+    view_prop_defs(Args&&... args) : cf_properties(std::forward<Args>(args)...) {}
+
+    // Explicitly delete this method. It's declared in the inherited types.
+    // The user of this interface should use `validate_raw` instead.
+    void validate(const data_dictionary::database, sstring ks_name, const schema::extensions_map&) const = delete;
+
+    /// Validate the properties for the specified schema operation.
+    ///
+    /// The validation is *raw* because we mostly validate the properties in their string form (checking if
+    /// a property exists or not for instance) and only focus on the properties on their own, without
+    /// having access to any other information.
+    void validate_raw(op_type, const data_dictionary::database, sstring ks_name, const schema::extensions_map&) const;
+
+    /// Apply the properties to the provided schema_builder and validate them.
+    ///
+    /// NOTE: If the validation fails, this function will throw an exception. What's more important,
+    ///       however, is that the provided schema_builder might have already been modified by that
+    ///       point. Because of that, in presence of an exception, the schema builder should NOT be
+    ///       used anymore.
+    void apply_to_builder(op_type, schema_builder&, schema::extensions_map, const data_dictionary::database,
+            sstring ks_name, bool is_colocated) const;
+};
+
+} // namespace cql3::statements
--- a/db/batchlog_manager.cc
+++ b/db/batchlog_manager.cc
@@ -16,6 +16,7 @@
 #include <seastar/core/semaphore.hh>
 #include <seastar/core/metrics.hh>
 #include <seastar/core/coroutine.hh>
+#include <seastar/coroutine/maybe_yield.hh>
 #include <seastar/core/sleep.hh>
 #include <seastar/coroutine/parallel_for_each.hh>

@@ -54,8 +55,21 @@ int32_t batchlog_shard_of(db_clock::time_point written_at) {
    return hash & ((1ULL << batchlog_shard_bits) - 1);
 }

+bool is_batchlog_v1(const schema& schema) {
+    return schema.cf_name() == system_keyspace::BATCHLOG;
+}
+
 std::pair<partition_key, clustering_key>
 get_batchlog_key(const schema& schema, int32_t version, db::batchlog_stage stage, int32_t batchlog_shard, db_clock::time_point written_at, std::optional<utils::UUID> id) {
+    if (is_batchlog_v1(schema)) {
+        if (!id) {
+            on_internal_error(blogger, "get_batchlog_key(): key for batchlog v1 requires batchlog id");
+        }
+        auto pkey = partition_key::from_single_value(schema, {serialized(*id)});
+        auto ckey = clustering_key::make_empty();
+        return std::pair(std::move(pkey), std::move(ckey));
+    }
+
    auto pkey = partition_key::from_exploded(schema, {serialized(version), serialized(int8_t(stage)), serialized(batchlog_shard)});

    std::vector<bytes> ckey_components;
@@ -84,6 +98,14 @@ mutation get_batchlog_mutation_for(schema_ptr schema, managed_bytes data, int32_
    auto cdef_data = schema->get_column_definition(to_bytes("data"));
    m.set_cell(ckey, *cdef_data, atomic_cell::make_live(*cdef_data->type, timestamp, std::move(data)));

+    if (is_batchlog_v1(*schema)) {
+        auto cdef_version = schema->get_column_definition(to_bytes("version"));
+        m.set_cell(ckey, *cdef_version, atomic_cell::make_live(*cdef_version->type, timestamp, serialized(version)));
+
+        auto cdef_written_at = schema->get_column_definition(to_bytes("written_at"));
+        m.set_cell(ckey, *cdef_written_at, atomic_cell::make_live(*cdef_written_at->type, timestamp, serialized(now)));
+    }
+
    return m;
 }

@@ -121,9 +143,10 @@ mutation get_batchlog_delete_mutation(schema_ptr schema, int32_t version, db_clo
 const std::chrono::seconds db::batchlog_manager::replay_interval;
 const uint32_t db::batchlog_manager::page_size;

-db::batchlog_manager::batchlog_manager(cql3::query_processor& qp, db::system_keyspace& sys_ks, batchlog_manager_config config)
+db::batchlog_manager::batchlog_manager(cql3::query_processor& qp, db::system_keyspace& sys_ks, gms::feature_service& fs, batchlog_manager_config config)
        : _qp(qp)
        , _sys_ks(sys_ks)
+        , _fs(fs)
        , _replay_timeout(config.replay_timeout)
        , _replay_rate(config.replay_rate)
        , _delay(config.delay)
@@ -299,23 +322,156 @@ future<> db::batchlog_manager::maybe_migrate_v1_to_v2() {
    });
 }

-future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches(post_replay_cleanup cleanup) {
-    co_await maybe_migrate_v1_to_v2();
+namespace {

-    typedef db_clock::rep clock_type;
+using clock_type = db_clock::rep;

+struct replay_stats {
+    std::optional<db_clock::time_point> min_too_fresh;
+    bool need_cleanup = false;
+};
+
+} // anonymous namespace
+
+static future<db::all_batches_replayed> process_batch(
+        cql3::query_processor& qp,
+        db::batchlog_manager::stats& stats,
+        db::batchlog_manager::post_replay_cleanup cleanup,
+        utils::rate_limiter& limiter,
+        schema_ptr schema,
+        std::unordered_map<int32_t, replay_stats>& replay_stats_per_shard,
+        const db_clock::time_point now,
+        db_clock::duration replay_timeout,
+        std::chrono::seconds write_timeout,
+        const cql3::untyped_result_set::row& row) {
+    const bool is_v1 = db::is_batchlog_v1(*schema);
+    const auto stage = is_v1 ? db::batchlog_stage::initial : static_cast<db::batchlog_stage>(row.get_as<int8_t>("stage"));
+    const auto batch_shard = is_v1 ? 0 : row.get_as<int32_t>("shard");
+    auto written_at = row.get_as<db_clock::time_point>("written_at");
+    auto id = row.get_as<utils::UUID>("id");
+    // enough time for the actual write + batchlog entry mutation delivery (two separate requests).
+    auto timeout = replay_timeout;
+
+    if (utils::get_local_injector().is_enabled("skip_batch_replay")) {
+        blogger.debug("Skipping batch replay due to skip_batch_replay injection");
+        co_return db::all_batches_replayed::no;
+    }
+
+    auto data = row.get_blob_unfragmented("data");
+
+    blogger.debug("Replaying batch {} from stage {} and batch shard {}", id, int32_t(stage), batch_shard);
+
+    utils::chunked_vector<mutation> mutations;
+    bool send_failed = false;
+
+    auto& shard_written_at = replay_stats_per_shard.try_emplace(batch_shard, replay_stats{}).first->second;
+
+    try {
+        utils::chunked_vector<std::pair<canonical_mutation, schema_ptr>> fms;
+        auto in = ser::as_input_stream(data);
+        while (in.size()) {
+            auto fm = ser::deserialize(in, std::type_identity<canonical_mutation>());
+            const auto tbl = qp.db().try_find_table(fm.column_family_id());
+            if (!tbl) {
+                continue;
+            }
+            if (written_at <= tbl->get_truncation_time()) {
+                continue;
+            }
+            schema_ptr s = tbl->schema();
+            if (s->tombstone_gc_options().mode() == tombstone_gc_mode::repair) {
+                timeout = std::min(timeout, std::chrono::duration_cast<db_clock::duration>(s->tombstone_gc_options().propagation_delay_in_seconds()));
+            }
+            fms.emplace_back(std::move(fm), std::move(s));
+        }
+
+        if (now < written_at + timeout) {
+            blogger.debug("Skipping replay of {}, too fresh", id);
+
+            shard_written_at.min_too_fresh = std::min(shard_written_at.min_too_fresh.value_or(written_at), written_at);
+
+            co_return db::all_batches_replayed::no;
+        }
+
+        auto size = data.size();
+
+        for (const auto& [fm, s] : fms) {
+            mutations.emplace_back(fm.to_mutation(s));
+            co_await coroutine::maybe_yield();
+        }
+
+        if (!mutations.empty()) {
+            const auto ttl = [written_at]() -> clock_type {
+                /*
+                * Calculate ttl for the mutations' hints (and reduce ttl by the time the mutations spent in the batchlog).
+                * This ensures that deletes aren't "undone" by an old batch replay.
+                */
+                auto unadjusted_ttl = std::numeric_limits<gc_clock::rep>::max();
+                warn(unimplemented::cause::HINT);
+#if 0
+                for (auto& m : *mutations) {
+                    unadjustedTTL = Math.min(unadjustedTTL, HintedHandOffManager.calculateHintTTL(mutation));
+                }
+#endif
+                return unadjusted_ttl - std::chrono::duration_cast<gc_clock::duration>(db_clock::now() - written_at).count();
+            }();
+
+            if (ttl > 0) {
+                // Origin does the send manually, however I can't see a super great reason to do so.
+                // Our normal write path does not add much redundancy to the dispatch, and rate is handled after send
+                // in both cases.
+                // FIXME: verify that the above is reasonably true.
+                co_await limiter.reserve(size);
+                stats.write_attempts += mutations.size();
+                auto timeout = db::timeout_clock::now() + write_timeout;
+                if (cleanup) {
+                    co_await qp.proxy().send_batchlog_replay_to_all_replicas(mutations, timeout);
+                } else {
+                    co_await qp.proxy().send_batchlog_replay_to_all_replicas(std::move(mutations), timeout);
+                }
+            }
+        }
+    } catch (data_dictionary::no_such_keyspace& ex) {
+        // should probably ignore and drop the batch
+    } catch (const data_dictionary::no_such_column_family&) {
+        // As above -- we should drop the batch if the table doesn't exist anymore.
+    } catch (...) {
+        blogger.warn("Replay failed (will retry): {}", std::current_exception());
+        // timeout, overload etc.
+        // Do _not_ remove the batch, assuning we got a node write error.
+        // Since we don't have hints (which origin is satisfied with),
+        // we have to resort to keeping this batch to next lap.
+        if (is_v1 || !cleanup || stage == db::batchlog_stage::failed_replay) {
+            co_return db::all_batches_replayed::no;
+        }
+        send_failed = true;
+    }
+
+    auto& sp = qp.proxy();
+
+    if (send_failed) {
+        blogger.debug("Moving batch {} to stage failed_replay", id);
+        auto m = get_batchlog_mutation_for(schema, mutations, netw::messaging_service::current_version, db::batchlog_stage::failed_replay, written_at, id);
+        co_await sp.mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
+    }
+
+    // delete batch
+    auto m = get_batchlog_delete_mutation(schema, netw::messaging_service::current_version, stage, written_at, id);
+    co_await qp.proxy().mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
+
+    shard_written_at.need_cleanup = true;
+
+    co_return db::all_batches_replayed(!send_failed);
+}
+
+future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches_v1(post_replay_cleanup) {
    db::all_batches_replayed all_replayed = all_batches_replayed::yes;
    // rate limit is in bytes per second. Uses Double.MAX_VALUE if disabled (set to 0 in cassandra.yaml).
    // max rate is scaled by the number of nodes in the cluster (same as for HHOM - see CASSANDRA-5272).
    auto throttle = _replay_rate / _qp.proxy().get_token_metadata_ptr()->count_normal_token_owners();
-    auto limiter = make_lw_shared<utils::rate_limiter>(throttle);
+    utils::rate_limiter limiter(throttle);

-    auto schema = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG_V2);
-
-    struct replay_stats {
-        std::optional<db_clock::time_point> min_too_fresh;
-        bool need_cleanup = false;
-    };
+    auto schema = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG);

    std::unordered_map<int32_t, replay_stats> replay_stats_per_shard;

@@ -323,125 +479,49 @@ future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches
    // same across a while prefix of written_at (across all ids).
    const auto now = db_clock::now();

-    auto batch = [this, cleanup, limiter, schema, &all_replayed, &replay_stats_per_shard, now] (const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
-        const auto stage = static_cast<batchlog_stage>(row.get_as<int8_t>("stage"));
-        const auto batch_shard = row.get_as<int32_t>("shard");
-        auto written_at = row.get_as<db_clock::time_point>("written_at");
-        auto id = row.get_as<utils::UUID>("id");
-        // enough time for the actual write + batchlog entry mutation delivery (two separate requests).
-        auto timeout = _replay_timeout;
+    auto batch = [this, &limiter, schema, &all_replayed, &replay_stats_per_shard, now] (const cql3::untyped_result_set::row& row) mutable -> future<stop_iteration> {
+        all_replayed = all_replayed && co_await process_batch(_qp, _stats, post_replay_cleanup::no, limiter, schema, replay_stats_per_shard, now, _replay_timeout, write_timeout, row);
+        co_return stop_iteration::no;
+    };

-        if (utils::get_local_injector().is_enabled("skip_batch_replay")) {
-            blogger.debug("Skipping batch replay due to skip_batch_replay injection");
-            all_replayed = all_batches_replayed::no;
-            co_return stop_iteration::no;
-        }
+    co_await with_gate(_gate, [this, &all_replayed, batch = std::move(batch)] () mutable -> future<> {
+        blogger.debug("Started replayAllFailedBatches");
+        co_await utils::get_local_injector().inject("add_delay_to_batch_replay", std::chrono::milliseconds(1000));

-        auto data = row.get_blob_unfragmented("data");
+        auto schema = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG);

-        blogger.debug("Replaying batch {} from stage {} and batch shard {}", id, int32_t(stage), batch_shard);
+        co_await _qp.query_internal(
+                format("SELECT * FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG),
+                db::consistency_level::ONE,
+                {},
+                page_size,
+                batch);

-        utils::chunked_vector<mutation> mutations;
-        bool send_failed = false;
+        blogger.debug("Finished replayAllFailedBatches with all_replayed: {}", all_replayed);
+    });

-        auto& shard_written_at = replay_stats_per_shard.try_emplace(batch_shard, replay_stats{}).first->second;
+    co_return all_replayed;
+}

-        try {
-            utils::chunked_vector<std::pair<canonical_mutation, schema_ptr>> fms;
-            auto in = ser::as_input_stream(data);
-            while (in.size()) {
-                auto fm = ser::deserialize(in, std::type_identity<canonical_mutation>());
-                const auto tbl = _qp.db().try_find_table(fm.column_family_id());
-                if (!tbl) {
-                    continue;
-                }
-                if (written_at <= tbl->get_truncation_time()) {
-                    continue;
-                }
-                schema_ptr s = tbl->schema();
-                if (s->tombstone_gc_options().mode() == tombstone_gc_mode::repair) {
-                    timeout = std::min(timeout, std::chrono::duration_cast<db_clock::duration>(s->tombstone_gc_options().propagation_delay_in_seconds()));
-                }
-                fms.emplace_back(std::move(fm), std::move(s));
-            }
+future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches_v2(post_replay_cleanup cleanup) {
+    co_await maybe_migrate_v1_to_v2();

-            if (now < written_at + timeout) {
-                blogger.debug("Skipping replay of {}, too fresh", id);
+    db::all_batches_replayed all_replayed = all_batches_replayed::yes;
+    // rate limit is in bytes per second. Uses Double.MAX_VALUE if disabled (set to 0 in cassandra.yaml).
+    // max rate is scaled by the number of nodes in the cluster (same as for HHOM - see CASSANDRA-5272).
+    auto throttle = _replay_rate / _qp.proxy().get_token_metadata_ptr()->count_normal_token_owners();
+    utils::rate_limiter limiter(throttle);

-                shard_written_at.min_too_fresh = std::min(shard_written_at.min_too_fresh.value_or(written_at), written_at);
+    auto schema = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG_V2);

-                co_return stop_iteration::no;
-            }
+    std::unordered_map<int32_t, replay_stats> replay_stats_per_shard;

-            auto size = data.size();
-
-            for (const auto& [fm, s] : fms) {
-                mutations.emplace_back(fm.to_mutation(s));
-                co_await maybe_yield();
-            }
-
-            if (!mutations.empty()) {
-                const auto ttl = [written_at]() -> clock_type {
-                    /*
-                    * Calculate ttl for the mutations' hints (and reduce ttl by the time the mutations spent in the batchlog).
-                    * This ensures that deletes aren't "undone" by an old batch replay.
-                    */
-                    auto unadjusted_ttl = std::numeric_limits<gc_clock::rep>::max();
-                    warn(unimplemented::cause::HINT);
-#if 0
-                    for (auto& m : *mutations) {
-                        unadjustedTTL = Math.min(unadjustedTTL, HintedHandOffManager.calculateHintTTL(mutation));
-                    }
-#endif
-                    return unadjusted_ttl - std::chrono::duration_cast<gc_clock::duration>(db_clock::now() - written_at).count();
-                }();
-
-                if (ttl > 0) {
-                    // Origin does the send manually, however I can't see a super great reason to do so.
-                    // Our normal write path does not add much redundancy to the dispatch, and rate is handled after send
-                    // in both cases.
-                    // FIXME: verify that the above is reasonably true.
-                    co_await limiter->reserve(size);
-                    _stats.write_attempts += mutations.size();
-                    auto timeout = db::timeout_clock::now() + write_timeout;
-                    if (cleanup) {
-                        co_await _qp.proxy().send_batchlog_replay_to_all_replicas(mutations, timeout);
-                    } else {
-                        co_await _qp.proxy().send_batchlog_replay_to_all_replicas(std::move(mutations), timeout);
-                    }
-                }
-            }
-        } catch (data_dictionary::no_such_keyspace& ex) {
-            // should probably ignore and drop the batch
-        } catch (const data_dictionary::no_such_column_family&) {
-            // As above -- we should drop the batch if the table doesn't exist anymore.
-        } catch (...) {
-            blogger.warn("Replay failed (will retry): {}", std::current_exception());
-            all_replayed = all_batches_replayed::no;
-            // timeout, overload etc.
-            // Do _not_ remove the batch, assuning we got a node write error.
-            // Since we don't have hints (which origin is satisfied with),
-            // we have to resort to keeping this batch to next lap.
-            if (!cleanup || stage == batchlog_stage::failed_replay) {
-                co_return stop_iteration::no;
-            }
-            send_failed = true;
-        }
-
-        auto& sp = _qp.proxy();
-
-        if (send_failed) {
-            blogger.debug("Moving batch {} to stage failed_replay", id);
-            auto m = get_batchlog_mutation_for(schema, mutations, netw::messaging_service::current_version, batchlog_stage::failed_replay, written_at, id);
-            co_await sp.mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
-        }
-
-        // delete batch
-        auto m = get_batchlog_delete_mutation(schema, netw::messaging_service::current_version, stage, written_at, id);
-        co_await _qp.proxy().mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
-
-        shard_written_at.need_cleanup = true;
+    // Use a stable `now` across all batches, so skip/replay decisions are the
+    // same across a while prefix of written_at (across all ids).
+    const auto now = db_clock::now();

+    auto batch = [this, cleanup, &limiter, schema, &all_replayed, &replay_stats_per_shard, now] (const cql3::untyped_result_set::row& row) mutable -> future<stop_iteration> {
+        all_replayed = all_replayed && co_await process_batch(_qp, _stats, cleanup, limiter, schema, replay_stats_per_shard, now, _replay_timeout, write_timeout, row);
        co_return stop_iteration::no;
    };

@@ -500,3 +580,10 @@ future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches

    co_return all_replayed;
 }
+
+future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches(post_replay_cleanup cleanup) {
+    if (_fs.batchlog_v2) {
+        return replay_all_failed_batches_v2(cleanup);
+    }
+    return replay_all_failed_batches_v1(cleanup);
+}
--- a/db/batchlog_manager.hh
+++ b/db/batchlog_manager.hh
@@ -27,6 +27,12 @@ class query_processor;

 } // namespace cql3

+namespace gms {
+
+class feature_service;
+
+} // namespace gms
+
 namespace db {

 class system_keyspace;
@@ -49,6 +55,11 @@ class batchlog_manager : public peering_sharded_service<batchlog_manager> {
 public:
    using post_replay_cleanup = bool_class<class post_replay_cleanup_tag>;

+    struct stats {
+        uint64_t write_attempts = 0;
+    };
+
+
 private:
    static constexpr std::chrono::seconds replay_interval = std::chrono::seconds(60);
    static constexpr uint32_t page_size = 128; // same as HHOM, for now, w/out using any heuristics. TODO: set based on avg batch size.
@@ -56,14 +67,13 @@ private:

    using clock_type = lowres_clock;

-    struct stats {
-        uint64_t write_attempts = 0;
-    } _stats;
+    stats _stats;

    seastar::metrics::metric_groups _metrics;

    cql3::query_processor& _qp;
    db::system_keyspace& _sys_ks;
+    gms::feature_service& _fs;
    db_clock::duration _replay_timeout;
    uint64_t _replay_rate;
    std::chrono::milliseconds _delay;
@@ -84,12 +94,14 @@ private:

    future<> maybe_migrate_v1_to_v2();

+    future<all_batches_replayed> replay_all_failed_batches_v1(post_replay_cleanup cleanup);
+    future<all_batches_replayed> replay_all_failed_batches_v2(post_replay_cleanup cleanup);
    future<all_batches_replayed> replay_all_failed_batches(post_replay_cleanup cleanup);
 public:
    // Takes a QP, not a distributes. Because this object is supposed
    // to be per shard and does no dispatching beyond delegating the the
    // shard qp (which is what you feed here).
-    batchlog_manager(cql3::query_processor&, db::system_keyspace& sys_ks, batchlog_manager_config config);
+    batchlog_manager(cql3::query_processor&, db::system_keyspace& sys_ks, gms::feature_service& fs, batchlog_manager_config config);

    // abort the replay loop and return its future.
    future<> drain();
@@ -102,7 +114,7 @@ public:
        return _last_replay;
    }

-    const stats& stats() const {
+    const stats& get_stats() const {
        return _stats;
    }
 private:
--- a/db/cache_mutation_reader.hh
+++ b/db/cache_mutation_reader.hh
@@ -323,6 +323,9 @@ void cache_mutation_reader::touch_partition() {

 inline
 future<> cache_mutation_reader::fill_buffer() {
+    if (const auto& ex = get_abort_exception(); ex) {
+        return make_exception_future<>(ex);
+    }
    if (_state == state::before_static_row) {
        touch_partition();
        auto after_static_row = [this] {
--- a/db/commitlog/commitlog.cc
+++ b/db/commitlog/commitlog.cc
@@ -502,6 +502,9 @@ public:
    void flush_segments(uint64_t size_to_remove);
    void check_no_data_older_than_allowed();

+    // whitebox testing
+    std::function<future<>()> _oversized_pre_wait_memory_func;
+
 private:
    class shutdown_marker{};

@@ -1597,8 +1600,15 @@ future<> db::commitlog::segment_manager::oversized_allocation(entry_writer& writ

    scope_increment_counter allocating(totals.active_allocations);

+    // #27992 - whitebox testing. signal we are trying to lock out 
+    // all allocators
+    if (_oversized_pre_wait_memory_func) {
+        co_await _oversized_pre_wait_memory_func();
+    }
+
    auto permit = co_await std::move(fut);
-    SCYLLA_ASSERT(_request_controller.available_units() == 0);
+    // #27992 - task reordering _can_ force the available units to negative. this is ok.
+    SCYLLA_ASSERT(_request_controller.available_units() <= 0);

    decltype(permit) fake_permit; // can't have allocate+sync release semaphore.
    bool failed = false;
@@ -1859,13 +1869,15 @@ future<> db::commitlog::segment_manager::oversized_allocation(entry_writer& writ
            }
        }
    }
-    SCYLLA_ASSERT(_request_controller.available_units() == 0);
+
+    auto avail = _request_controller.available_units();
+    SCYLLA_ASSERT(avail <= 0);
    SCYLLA_ASSERT(permit.count() == max_request_controller_units());
    auto nw = _request_controller.waiters();
    permit.return_all();
    // #20633 cannot guarantee controller avail is now full, since we could have had waiters when doing
    // return all -> now will be less avail
-    SCYLLA_ASSERT(nw > 0 || _request_controller.available_units() == ssize_t(max_request_controller_units()));
+    SCYLLA_ASSERT(nw > 0 || _request_controller.available_units() == (avail + ssize_t(max_request_controller_units())));

    if (!failed) {
        clogger.trace("Oversized allocation succeeded.");
@@ -1974,13 +1986,13 @@ future<> db::commitlog::segment_manager::replenish_reserve() {
            }
            continue;
        } catch (shutdown_marker&) {
-            _reserve_segments.abort(std::current_exception());
            break;
        } catch (...) {
            clogger.warn("Exception in segment reservation: {}", std::current_exception());
        }
        co_await sleep(100ms);
    }
+    _reserve_segments.abort(std::make_exception_ptr(shutdown_marker()));
 }

 future<std::vector<db::commitlog::descriptor>>
@@ -3949,6 +3961,9 @@ void db::commitlog::update_max_data_lifetime(std::optional<uint64_t> commitlog_d
    _segment_manager->cfg.commitlog_data_max_lifetime_in_seconds = commitlog_data_max_lifetime_in_seconds;
 }

+void db::commitlog::set_oversized_pre_wait_memory_func(std::function<future<>()> f) {
+    _segment_manager->_oversized_pre_wait_memory_func = std::move(f);
+}

 future<std::vector<sstring>> db::commitlog::get_segments_to_replay() const {
    return _segment_manager->get_segments_to_replay();
--- a/db/commitlog/commitlog.hh
+++ b/db/commitlog/commitlog.hh
@@ -385,6 +385,9 @@ public:
    // (Re-)set data mix lifetime.
    void update_max_data_lifetime(std::optional<uint64_t> commitlog_data_max_lifetime_in_seconds);

+    // Whitebox testing. Do not use for production
+    void set_oversized_pre_wait_memory_func(std::function<future<>()>);
+
    using commit_load_reader_func = std::function<future<>(buffer_and_replay_position)>;

    class segment_error : public std::exception {};
--- a/db/config.cc
+++ b/db/config.cc
@@ -1291,7 +1291,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
    , ignore_dead_nodes_for_replace(this, "ignore_dead_nodes_for_replace", value_status::Used, "", "List dead nodes to ignore for replace operation using a comma-separated list of host IDs. E.g., scylla --ignore-dead-nodes-for-replace 8d5ed9f4-7764-4dbd-bad8-43fddce94b7c,125ed9f4-7777-1dbn-mac8-43fddce9123e")
    , override_decommission(this, "override_decommission", value_status::Deprecated, false, "Set true to force a decommissioned node to join the cluster (cannot be set if consistent-cluster-management is enabled).")
    , enable_repair_based_node_ops(this, "enable_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, true, "Set true to use enable repair based node operations instead of streaming based.")
-    , allowed_repair_based_node_ops(this, "allowed_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, "replace,removenode,rebuild,bootstrap,decommission", "A comma separated list of node operations which are allowed to enable repair based node operations. The operations can be bootstrap, replace, removenode, decommission and rebuild.")
+    , allowed_repair_based_node_ops(this, "allowed_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, "replace,removenode,rebuild", "A comma separated list of node operations which are allowed to enable repair based node operations. The operations can be bootstrap, replace, removenode, decommission and rebuild.")
    , enable_compacting_data_for_streaming_and_repair(this, "enable_compacting_data_for_streaming_and_repair", liveness::LiveUpdate, value_status::Used, true, "Enable the compacting reader, which compacts the data for streaming and repair (load'n'stream included) before sending it to, or synchronizing it with peers. Can reduce the amount of data to be processed by removing dead data, but adds CPU overhead.")
    , enable_tombstone_gc_for_streaming_and_repair(this, "enable_tombstone_gc_for_streaming_and_repair", liveness::LiveUpdate, value_status::Used, false,
            "If the compacting reader is enabled for streaming and repair (see enable_compacting_data_for_streaming_and_repair), allow it to garbage-collect tombstones."
@@ -1341,7 +1341,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
    , sstable_compression_user_table_options(this, "sstable_compression_user_table_options", value_status::Used, compression_parameters{compression_parameters::algorithm::lz4_with_dicts},
        "Server-global user table compression options. If enabled, all user tables"
        "will be compressed using the provided options, unless overridden"
-        "by compression options in the table schema. The available options are:\n"
+        "by compression options in the table schema. User tables are all tables in non-system keyspaces. The available options are:\n"
        "* sstable_compression: The compression algorithm to use. Supported values: LZ4Compressor, LZ4WithDictsCompressor (default), SnappyCompressor, DeflateCompressor, ZstdCompressor, ZstdWithDictsCompressor, '' (empty string; disables compression).\n"
        "* chunk_length_in_kb: (Default: 4) The size of chunks to compress in kilobytes. Allowed values are powers of two between 1 and 128.\n"
        "* crc_check_chance: (Default: 1.0) Not implemented (option value is ignored).\n"
@@ -1447,6 +1447,10 @@ db::config::config(std::shared_ptr<db::extensions> exts)
            "SELECT statements with aggregation or GROUP BYs or a secondary index may use this page size for their internal reading data, not the page size specified in the query options.")
    , alternator_port(this, "alternator_port", value_status::Used, 0, "Alternator API port.")
    , alternator_https_port(this, "alternator_https_port", value_status::Used, 0, "Alternator API HTTPS port.")
+    , alternator_port_proxy_protocol(this, "alternator_port_proxy_protocol", value_status::Used, 0,
+        "Port on which the Alternator API listens for clients using proxy protocol v2. Disabled (0) by default.")
+    , alternator_https_port_proxy_protocol(this, "alternator_https_port_proxy_protocol", value_status::Used, 0,
+        "Port on which the Alternator HTTPS API listens for clients using proxy protocol v2. Disabled (0) by default.")
    , alternator_address(this, "alternator_address", value_status::Used, "0.0.0.0", "Alternator API listening address.")
    , alternator_enforce_authorization(this, "alternator_enforce_authorization", liveness::LiveUpdate, value_status::Used, false, "Enforce checking the authorization header for every request in Alternator.")
    , alternator_warn_authorization(this, "alternator_warn_authorization", liveness::LiveUpdate, value_status::Used, false, "Count and log warnings about failed authentication or authorization")
@@ -1566,6 +1570,8 @@ db::config::config(std::shared_ptr<db::extensions> exts)
            "\tdisabled: New keyspaces use vnodes by default, unless enabled by the tablets={'enabled':true} option\n"
            "\tenabled:  New keyspaces use tablets by default, unless disabled by the tablets={'enabled':false} option\n"
            "\tenforced: New keyspaces must use tablets. Tablets cannot be disabled using the CREATE KEYSPACE option")
+    , auto_repair_enabled_default(this, "auto_repair_enabled_default", liveness::LiveUpdate, value_status::Used, false, "Set true to enable auto repair for tablet tables by default. The value will be overridden by the per keyspace or per table configuration which is not implemented yet.")
+    , auto_repair_threshold_default_in_seconds(this, "auto_repair_threshold_default_in_seconds", liveness::LiveUpdate, value_status::Used, 24 * 3600 , "Set the default time in seconds for the auto repair threshold for tablet tables. If the time since last repair is bigger than the configured time, the tablet is eligible for auto repair. The value will be overridden by the per keyspace or per table configuration which is not implemented yet.")
    , view_flow_control_delay_limit_in_ms(this, "view_flow_control_delay_limit_in_ms", liveness::LiveUpdate, value_status::Used, 1000,
        "The maximal amount of time that materialized-view update flow control may delay responses "
        "to try to slow down the client and prevent buildup of unfinished view updates. "
@@ -1578,7 +1584,14 @@ db::config::config(std::shared_ptr<db::extensions> exts)
    , enable_create_table_with_compact_storage(this, "enable_create_table_with_compact_storage", liveness::LiveUpdate, value_status::Used, false, "Enable the deprecated feature of CREATE TABLE WITH COMPACT STORAGE.  This feature will eventually be removed in a future version.")
    , rf_rack_valid_keyspaces(this, "rf_rack_valid_keyspaces", liveness::MustRestart, value_status::Used, false,
        "Enforce RF-rack-valid keyspaces. Additionally, if there are existing RF-rack-invalid "
-        "keyspaces, attempting to start a node with this option ON will fail.")
+        "keyspaces, attempting to start a node with this option ON will fail. "
+        "DEPRECATED. Use enforce_rack_list instead.")
+    , enforce_rack_list(this, "enforce_rack_list", liveness::MustRestart, value_status::Used, false,
+            "Enforce rack list for tablet keyspaces. "
+            "When the option is on, CREATE STATEMENT expands numeric rfs to rack lists "
+            "and ALTER STATEMENT is allowed only when rack lists are used in all DCs."
+            "Additionally, if there are existing tablet keyspaces with numeric rf in any DC "
+            "attempting to start a node with this option ON will fail.")
    // FIXME: make frequency per table in order to reduce work in each iteration.
    // Bigger tables will take longer to be resized. similar-sized tables can be batched into same iteration.
    , tablet_load_stats_refresh_interval_in_seconds(this, "tablet_load_stats_refresh_interval_in_seconds", liveness::LiveUpdate, value_status::Used, 60,
@@ -1779,6 +1792,21 @@ const db::extensions& db::config::extensions() const {
    return *_extensions;
 }

+compression_parameters db::config::get_sstable_compression_user_table_options(bool dicts_feature_enabled) const {
+    if (sstable_compression_user_table_options.is_set()
+            || dicts_feature_enabled
+            || !sstable_compression_user_table_options().uses_dictionary_compressor()) {
+        return sstable_compression_user_table_options();
+    } else {
+        // Fall back to non-dict if dictionary compression is not enabled cluster-wide.
+        auto options = sstable_compression_user_table_options();
+        auto params = options.get_options();
+        auto algo = compression_parameters::non_dict_equivalent(options.get_algorithm());
+        params[compression_parameters::SSTABLE_COMPRESSION] = sstring(compression_parameters::algorithm_to_name(algo));
+        return compression_parameters{params};
+    }
+}
+
 std::map<sstring, db::experimental_features_t::feature> db::experimental_features_t::map() {
    // We decided against using the construct-on-first-use idiom here:
    // https://github.com/scylladb/scylla/pull/5369#discussion_r353614807
--- a/db/config.hh
+++ b/db/config.hh
@@ -419,7 +419,13 @@ public:
    named_value<bool> enable_sstables_mc_format;
    named_value<bool> enable_sstables_md_format;
    named_value<sstring> sstable_format;
+
+    // NOTE: Do not use this option directly.
+    // Use get_sstable_compression_user_table_options() instead.
    named_value<compression_parameters> sstable_compression_user_table_options;
+
+    compression_parameters get_sstable_compression_user_table_options(bool dicts_feature_enabled) const;
+
    named_value<bool> sstable_compression_dictionaries_allow_in_ddl;
    named_value<bool> sstable_compression_dictionaries_enable_writing;
    named_value<float> sstable_compression_dictionaries_memory_budget_fraction;
@@ -464,6 +470,8 @@ public:

    named_value<uint16_t> alternator_port;
    named_value<uint16_t> alternator_https_port;
+    named_value<uint16_t> alternator_port_proxy_protocol;
+    named_value<uint16_t> alternator_https_port_proxy_protocol;
    named_value<sstring> alternator_address;
    named_value<bool> alternator_enforce_authorization;
    named_value<bool> alternator_warn_authorization;
@@ -569,6 +577,8 @@ public:
    named_value<double> topology_barrier_stall_detector_threshold_seconds;
    named_value<bool> enable_tablets;
    named_value<enum_option<tablets_mode_t>> tablets_mode_for_new_keyspaces;
+    named_value<bool> auto_repair_enabled_default;
+    named_value<int32_t> auto_repair_threshold_default_in_seconds;

    bool enable_tablets_by_default() const noexcept {
        switch (tablets_mode_for_new_keyspaces()) {
@@ -595,6 +605,7 @@ public:
    named_value<bool> enable_create_table_with_compact_storage;

    named_value<bool> rf_rack_valid_keyspaces;
+    named_value<bool> enforce_rack_list;

    named_value<uint32_t> tablet_load_stats_refresh_interval_in_seconds;
    named_value<bool> force_capacity_based_balancing;
--- a/db/consistency_level.cc
+++ b/db/consistency_level.cc
@@ -31,19 +31,23 @@ size_t quorum_for(const locator::effective_replication_map& erm) {
    return replication_factor ? (replication_factor / 2) + 1 : 0;
 }

-size_t local_quorum_for(const locator::effective_replication_map& erm, const sstring& dc) {
+static size_t get_replication_factor_for_dc(const locator::effective_replication_map& erm, const sstring& dc) {
    using namespace locator;

    const auto& rs = erm.get_replication_strategy();

    if (rs.get_type() == replication_strategy_type::network_topology) {
-        const network_topology_strategy* nrs =
+        const network_topology_strategy* nts =
            static_cast<const network_topology_strategy*>(&rs);
-        size_t replication_factor = nrs->get_replication_factor(dc);
-        return replication_factor ? (replication_factor / 2) + 1 : 0;
+        return nts->get_replication_factor(dc);
    }

-    return quorum_for(erm);
+    return erm.get_replication_factor();
+}
+
+size_t local_quorum_for(const locator::effective_replication_map& erm, const sstring& dc) {
+    auto rf = get_replication_factor_for_dc(erm, dc);
+    return rf ? (rf / 2) + 1 : 0;
 }

 size_t block_for_local_serial(const locator::effective_replication_map& erm) {
@@ -188,18 +192,30 @@ void assure_sufficient_live_nodes(
        return pending <= live ? live - pending : 0;
    };

+    auto make_rf_zero_error_msg = [cl] (const sstring& local_dc) {
+        return format("Cannot achieve consistency level {} in datacenter '{}' with replication factor 0. "
+                      "Ensure the keyspace is replicated to this datacenter or use a non-local consistency level.", cl, local_dc);
+    };
+
    const auto& topo = erm.get_topology();
+    const sstring& local_dc = topo.get_datacenter();

    switch (cl) {
    case consistency_level::ANY:
        // local hint is acceptable, and local node is always live
        break;
    case consistency_level::LOCAL_ONE:
+        if (size_t local_rf = get_replication_factor_for_dc(erm, local_dc); local_rf == 0) {
+            throw exceptions::unavailable_exception(make_rf_zero_error_msg(local_dc), cl, 1, 0);
+        }
        if (topo.count_local_endpoints(live_endpoints) < topo.count_local_endpoints(pending_endpoints) + 1) {
            throw exceptions::unavailable_exception(cl, 1, 0);
        }
        break;
    case consistency_level::LOCAL_QUORUM: {
+        if (size_t local_rf = get_replication_factor_for_dc(erm, local_dc); local_rf == 0) {
+            throw exceptions::unavailable_exception(make_rf_zero_error_msg(local_dc), cl, need, 0);
+        }
        size_t local_live = topo.count_local_endpoints(live_endpoints);
        size_t pending = topo.count_local_endpoints(pending_endpoints);
        if (local_live < need + pending) {
--- a/db/object_storage_endpoint_param.cc
+++ b/db/object_storage_endpoint_param.cc
@@ -14,15 +14,20 @@
 #include <boost/lexical_cast.hpp>

 #include "utils/s3/creds.hh"
+#include "utils/http.hh"
 #include "object_storage_endpoint_param.hh"

 using namespace std::string_literals;

+static auto format_url(std::string_view host, unsigned port, bool use_https) {
+    return fmt::format("{}://{}:{}", use_https ? "https" : "http", host, port);
+}
+
 db::object_storage_endpoint_param::object_storage_endpoint_param(s3_storage s)
    : _data(std::move(s))
 {}
 db::object_storage_endpoint_param::object_storage_endpoint_param(std::string endpoint, s3::endpoint_config config)
-    : object_storage_endpoint_param(s3_storage{std::move(endpoint), std::move(config)})
+    : object_storage_endpoint_param(s3_storage{format_url(endpoint, config.port, config.use_https), std::move(config.region), std::move(config.role_arn), true /* legacy_format */})
 {}
 db::object_storage_endpoint_param::object_storage_endpoint_param(gs_storage s)
    : _data(std::move(s))
@@ -32,13 +37,29 @@ db::object_storage_endpoint_param::object_storage_endpoint_param() = default;
 db::object_storage_endpoint_param::object_storage_endpoint_param(const object_storage_endpoint_param&) = default;

 std::string db::object_storage_endpoint_param::s3_storage::to_json_string() const {
+    if (!legacy_format) {
+        return fmt::format("{{ \"type\": \"s3\", \"aws_region\": \"{}\", \"iam_role_arn\": \"{}\" }}",
+            region, iam_role_arn
+        );
+    }
+
+    auto url = utils::http::parse_simple_url(endpoint);
    return fmt::format("{{ \"port\": {}, \"use_https\": {}, \"aws_region\": \"{}\", \"iam_role_arn\": \"{}\" }}",
-        config.port, config.use_https, config.region, config.role_arn
+        url.port, url.is_https(), region, iam_role_arn
    );
 }

 std::string db::object_storage_endpoint_param::s3_storage::key() const {
-    return endpoint;
+    // The `endpoint` is full URL all the time, so only return it as a key
+    // if it wasn't configured "the old way". In the latter case, split the
+    // URL and return its host part to mimic the old behavior.
+
+    if (!legacy_format) {
+        return endpoint;
+    }
+
+    auto url = utils::http::parse_simple_url(endpoint);
+    return url.host;
 }

 std::string db::object_storage_endpoint_param::gs_storage::to_json_string() const {
@@ -99,8 +120,6 @@ const std::string& db::object_storage_endpoint_param::type() const {

 db::object_storage_endpoint_param db::object_storage_endpoint_param::decode(const YAML::Node& node) {
    auto name = node["name"];
-    auto aws_region = node["aws_region"];
-    auto iam_role_arn = node["iam_role_arn"];
    auto type = node["type"];

    auto get_opt = [](auto& node, const std::string& key, auto def) {
@@ -108,13 +127,20 @@ db::object_storage_endpoint_param db::object_storage_endpoint_param::decode(cons
        return tmp ? tmp.template as<std::decay_t<decltype(def)>>() : def;
    };
    // aws s3 endpoint. 
-    if (!type || type.as<std::string>() == s3_type || aws_region || iam_role_arn) {
+    if (!type || type.as<std::string>() == s3_type) {
        s3_storage ep;
-        ep.endpoint = name.as<std::string>();
-        ep.config.port = node["port"].as<unsigned>();
-        ep.config.use_https = node["https"].as<bool>(false);
-        ep.config.region = aws_region ? aws_region.as<std::string>() : std::getenv("AWS_DEFAULT_REGION");
-        ep.config.role_arn = iam_role_arn ? iam_role_arn.as<std::string>() : "";
+        auto endpoint = name.as<std::string>();
+        ep.legacy_format = (!endpoint.starts_with("http://") && !endpoint.starts_with("https://"));
+
+        if (!ep.legacy_format) {
+            ep.endpoint = std::move(endpoint);
+        } else {
+            ep.endpoint = format_url(endpoint, node["port"].as<unsigned>(), node["https"].as<bool>(false));
+        }
+
+        auto aws_region = node["aws_region"];
+        ep.region = aws_region ? aws_region.as<std::string>() : std::getenv("AWS_DEFAULT_REGION");
+        ep.iam_role_arn = get_opt(node, "iam_role_arn", ""s);

        return object_storage_endpoint_param{std::move(ep)};
    }
--- a/db/object_storage_endpoint_param.hh
+++ b/db/object_storage_endpoint_param.hh
@@ -25,7 +25,9 @@ class object_storage_endpoint_param {
 public:
    struct s3_storage {
        std::string endpoint;
-        s3::endpoint_config config;
+        std::string region;
+        std::string iam_role_arn;
+        bool legacy_format; // FIXME convert it to bool_class after seastar#3198

        std::strong_ordering operator<=>(const s3_storage&) const = default;
        std::string to_json_string() const;
--- a/db/row_cache.cc
+++ b/db/row_cache.cc
@@ -850,7 +850,7 @@ mutation_reader row_cache::make_nonpopulating_reader(schema_ptr schema, reader_p
                    std::move(permit),
                    e.key(),
                    query::clustering_key_filter_ranges(slice.row_ranges(*schema, e.key().key())),
-                    e.partition().read(_tracker.region(), _tracker.memtable_cleaner(), nullptr, phase_of(pos)),
+                    e.partition().read(_tracker.region(), _tracker.memtable_cleaner(), &_tracker, phase_of(pos)),
                    false,
                    _tracker.region(),
                    _read_section,
--- a/db/schema_applier.cc
+++ b/db/schema_applier.cc
@@ -961,15 +961,15 @@ public:

        auto include_pending_changes = [&table_schemas](schema_diff_per_shard d) -> future<> {
            for (auto& schema : d.dropped) {
-                co_await maybe_yield();
+                co_await coroutine::maybe_yield();
                table_schemas.erase(schema->id());
            }
            for (auto& change : d.altered) {
-                co_await maybe_yield();
+                co_await coroutine::maybe_yield();
                table_schemas.insert_or_assign(change.new_schema->id(), change.new_schema);
            }
            for (auto& schema : d.created) {
-                co_await maybe_yield();
+                co_await coroutine::maybe_yield();
                table_schemas.insert_or_assign(schema->id(), schema);
            }
        };
@@ -1139,14 +1139,17 @@ future<> schema_applier::finalize_tables_and_views() {
    // was already dropped (see https://github.com/scylladb/scylla/issues/5614)
    for (auto& dropped_view : diff.tables_and_views.local().views.dropped) {
        auto s = dropped_view.get();
+        co_await _ss.local().on_cleanup_for_drop_table(s->id());
        co_await replica::database::cleanup_drop_table_on_all_shards(sharded_db, _sys_ks, true, diff.table_shards[s->id()]);
    }
    for (auto& dropped_table : diff.tables_and_views.local().tables.dropped) {
        auto s = dropped_table.get();
+        co_await _ss.local().on_cleanup_for_drop_table(s->id());
        co_await replica::database::cleanup_drop_table_on_all_shards(sharded_db, _sys_ks, true, diff.table_shards[s->id()]);
    }
    for (auto& dropped_cdc : diff.tables_and_views.local().cdc.dropped) {
        auto s = dropped_cdc.get();
+        co_await _ss.local().on_cleanup_for_drop_table(s->id());
        co_await replica::database::cleanup_drop_table_on_all_shards(sharded_db, _sys_ks, true, diff.table_shards[s->id()]);
    }

--- a/db/schema_tables.cc
+++ b/db/schema_tables.cc
@@ -96,16 +96,16 @@ static logging::logger diff_logger("schema_diff");
 /** system.schema_* tables used to store keyspace/table/type attributes prior to C* 3.0 */
 namespace db {
 namespace {
-    const auto set_use_schema_commitlog = schema_builder::register_static_configurator([](const sstring& ks_name, const sstring& cf_name, schema_static_props& props) {
-        if (ks_name == schema_tables::NAME) {
-            props.enable_schema_commitlog();
+    const auto set_use_schema_commitlog = schema_builder::register_schema_initializer([](schema_builder& builder) {
+        if (builder.ks_name() == schema_tables::NAME) {
+            builder.enable_schema_commitlog();
        }
    });
    const auto set_group0_table_options =
-        schema_builder::register_static_configurator([](const sstring& ks_name, const sstring& cf_name, schema_static_props& props) {
-            if (ks_name == schema_tables::NAME) {
+        schema_builder::register_schema_initializer([](schema_builder& builder) {
+            if (builder.ks_name() == schema_tables::NAME) {
                // all schema tables are group0 tables
-                props.is_group0_table = true;
+                builder.set_is_group0_table(true);
            }
        });
 }
--- a/db/snapshot-ctl.cc
+++ b/db/snapshot-ctl.cc
@@ -65,7 +65,7 @@ future<> snapshot_ctl::run_snapshot_modify_operation(noncopyable_function<future
    });
 }

-future<> snapshot_ctl::take_snapshot(sstring tag, std::vector<sstring> keyspace_names, skip_flush sf) {
+future<> snapshot_ctl::take_snapshot(sstring tag, std::vector<sstring> keyspace_names, snapshot_options opts) {
    if (tag.empty()) {
        throw std::runtime_error("You must supply a snapshot name.");
    }
@@ -74,21 +74,21 @@ future<> snapshot_ctl::take_snapshot(sstring tag, std::vector<sstring> keyspace_
        std::ranges::copy(_db.local().get_keyspaces() | std::views::keys, std::back_inserter(keyspace_names));
    };

-    return run_snapshot_modify_operation([tag = std::move(tag), keyspace_names = std::move(keyspace_names), sf, this] () mutable {
-        return do_take_snapshot(std::move(tag), std::move(keyspace_names), sf);
+    return run_snapshot_modify_operation([tag = std::move(tag), keyspace_names = std::move(keyspace_names), opts, this] () mutable {
+        return do_take_snapshot(std::move(tag), std::move(keyspace_names), opts);
    });
 }

-future<> snapshot_ctl::do_take_snapshot(sstring tag, std::vector<sstring> keyspace_names, skip_flush sf) {
+future<> snapshot_ctl::do_take_snapshot(sstring tag, std::vector<sstring> keyspace_names, snapshot_options opts) {
    co_await coroutine::parallel_for_each(keyspace_names, [tag, this] (const auto& ks_name) {
        return check_snapshot_not_exist(ks_name, tag);
    });
-    co_await coroutine::parallel_for_each(keyspace_names, [this, tag = std::move(tag), sf] (const auto& ks_name) {
-        return replica::database::snapshot_keyspace_on_all_shards(_db, ks_name, tag, bool(sf));
+    co_await coroutine::parallel_for_each(keyspace_names, [this, tag = std::move(tag), opts] (const auto& ks_name) {
+        return replica::database::snapshot_keyspace_on_all_shards(_db, ks_name, tag, opts);
    });
 }

-future<> snapshot_ctl::take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, skip_flush sf) {
+future<> snapshot_ctl::take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts) {
    if (ks_name.empty()) {
        throw std::runtime_error("You must supply a keyspace name");
    }
@@ -99,14 +99,14 @@ future<> snapshot_ctl::take_column_family_snapshot(sstring ks_name, std::vector<
        throw std::runtime_error("You must supply a snapshot name.");
    }

-    return run_snapshot_modify_operation([this, ks_name = std::move(ks_name), tables = std::move(tables), tag = std::move(tag), sf] () mutable {
-        return do_take_column_family_snapshot(std::move(ks_name), std::move(tables), std::move(tag), sf);
+    return run_snapshot_modify_operation([this, ks_name = std::move(ks_name), tables = std::move(tables), tag = std::move(tag), opts] () mutable {
+        return do_take_column_family_snapshot(std::move(ks_name), std::move(tables), std::move(tag), opts);
    });
 }

-future<> snapshot_ctl::do_take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, skip_flush sf) {
+future<> snapshot_ctl::do_take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts) {
    co_await check_snapshot_not_exist(ks_name, tag, tables);
-    co_await replica::database::snapshot_tables_on_all_shards(_db, ks_name, std::move(tables), std::move(tag), bool(sf));
+    co_await replica::database::snapshot_tables_on_all_shards(_db, ks_name, std::move(tables), std::move(tag), opts);
 }

 future<> snapshot_ctl::clear_snapshot(sstring tag, std::vector<sstring> keyspace_names, sstring cf_name) {
--- a/db/snapshot-ctl.hh
+++ b/db/snapshot-ctl.hh
@@ -38,10 +38,14 @@ class backup_task_impl;

 } // snapshot namespace

+struct snapshot_options {
+    bool skip_flush = false;
+    gc_clock::time_point created_at = gc_clock::now();
+    std::optional<gc_clock::time_point> expires_at;
+};
+
 class snapshot_ctl : public peering_sharded_service<snapshot_ctl> {
 public:
-    using skip_flush = bool_class<class skip_flush_tag>;
-
    struct table_snapshot_details {
        int64_t total;
        int64_t live;
@@ -70,8 +74,8 @@ public:
     *
     * @param tag the tag given to the snapshot; may not be null or empty
     */
-    future<> take_snapshot(sstring tag, skip_flush sf = skip_flush::no) {
-        return take_snapshot(tag, {}, sf);
+    future<> take_snapshot(sstring tag, snapshot_options opts = {}) {
+        return take_snapshot(tag, {}, opts);
    }

    /**
@@ -80,7 +84,7 @@ public:
     * @param tag the tag given to the snapshot; may not be null or empty
     * @param keyspace_names the names of the keyspaces to snapshot; empty means "all"
     */
-    future<> take_snapshot(sstring tag, std::vector<sstring> keyspace_names, skip_flush sf = skip_flush::no);
+    future<> take_snapshot(sstring tag, std::vector<sstring> keyspace_names, snapshot_options opts = {});

    /**
     * Takes the snapshot of multiple tables. A snapshot name must be specified.
@@ -89,7 +93,7 @@ public:
     * @param tables a vector of tables names to snapshot
     * @param tag the tag given to the snapshot; may not be null or empty
     */
-    future<> take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, skip_flush sf = skip_flush::no);
+    future<> take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts = {});

    /**
     * Remove the snapshot with the given name from the given keyspaces.
@@ -127,8 +131,8 @@ private:

    friend class snapshot::backup_task_impl;

-    future<> do_take_snapshot(sstring tag, std::vector<sstring> keyspace_names, skip_flush sf = skip_flush::no);
-    future<> do_take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, skip_flush sf = skip_flush::no);
+    future<> do_take_snapshot(sstring tag, std::vector<sstring> keyspace_names, snapshot_options opts = {}  );
+    future<> do_take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts = {});
 };

 }
--- a/db/system_distributed_keyspace.cc
+++ b/db/system_distributed_keyspace.cc
@@ -42,11 +42,11 @@ extern logging::logger cdc_log;

 namespace db {
 namespace {
-    const auto set_wait_for_sync_to_commitlog = schema_builder::register_static_configurator([](const sstring& ks_name, const sstring& cf_name, schema_static_props& props) {
-        if ((ks_name == system_distributed_keyspace::NAME_EVERYWHERE && cf_name == system_distributed_keyspace::CDC_GENERATIONS_V2) ||
-            (ks_name == system_distributed_keyspace::NAME && cf_name == system_distributed_keyspace::CDC_TOPOLOGY_DESCRIPTION))
+    const auto set_wait_for_sync_to_commitlog = schema_builder::register_schema_initializer([](schema_builder& builder) {
+        if ((builder.ks_name() == system_distributed_keyspace::NAME_EVERYWHERE && builder.cf_name() == system_distributed_keyspace::CDC_GENERATIONS_V2) ||
+            (builder.ks_name() == system_distributed_keyspace::NAME && builder.cf_name() == system_distributed_keyspace::CDC_TOPOLOGY_DESCRIPTION))
        {
-            props.wait_for_sync_to_commitlog = true;
+            builder.set_wait_for_sync_to_commitlog(true);
        }
    });
 }
--- a/db/system_keyspace.cc
+++ b/db/system_keyspace.cc
@@ -66,24 +66,24 @@ static thread_local auto sstableinfo_type = user_type_impl::get_instance(

 namespace db {
 namespace {
-    const auto set_null_sharder = schema_builder::register_static_configurator([](const sstring& ks_name, const sstring& cf_name, schema_static_props& props) {
+    const auto set_null_sharder = schema_builder::register_schema_initializer([](schema_builder& builder) {
        // tables in the "system" keyspace which need to use null sharder
        static const std::unordered_set<sstring> tables = {
                // empty
        };
-        if (ks_name == system_keyspace::NAME && tables.contains(cf_name)) {
-            props.use_null_sharder = true;
+        if (builder.ks_name() == system_keyspace::NAME && tables.contains(builder.cf_name())) {
+            builder.set_use_null_sharder(true);
        }
    });
-    const auto set_wait_for_sync_to_commitlog = schema_builder::register_static_configurator([](const sstring& ks_name, const sstring& cf_name, schema_static_props& props) {
+    const auto set_wait_for_sync_to_commitlog = schema_builder::register_schema_initializer([](schema_builder& builder) {
        static const std::unordered_set<sstring> tables = {
            system_keyspace::PAXOS,
        };
-        if (ks_name == system_keyspace::NAME && tables.contains(cf_name)) {
-            props.wait_for_sync_to_commitlog = true;
+        if (builder.ks_name() == system_keyspace::NAME && tables.contains(builder.cf_name())) {
+            builder.set_wait_for_sync_to_commitlog(true);
        }
    });
-    const auto set_use_schema_commitlog = schema_builder::register_static_configurator([](const sstring& ks_name, const sstring& cf_name, schema_static_props& props) {
+    const auto set_use_schema_commitlog = schema_builder::register_schema_initializer([](schema_builder& builder) {
        static const std::unordered_set<sstring> tables = {
            schema_tables::SCYLLA_TABLE_SCHEMA_HISTORY,
            system_keyspace::BROADCAST_KV_STORE,
@@ -108,18 +108,18 @@ namespace {
            system_keyspace::ROLE_MEMBERS,
            system_keyspace::ROLE_ATTRIBUTES,
            system_keyspace::ROLE_PERMISSIONS,
-            system_keyspace::v3::CDC_LOCAL,
+            system_keyspace::CDC_LOCAL,
            system_keyspace::DICTS,
            system_keyspace::VIEW_BUILDING_TASKS,
            system_keyspace::CLIENT_ROUTES,
        };
-        if (ks_name == system_keyspace::NAME && tables.contains(cf_name)) {
-            props.enable_schema_commitlog();
+        if (builder.ks_name() == system_keyspace::NAME && tables.contains(builder.cf_name())) {
+            builder.enable_schema_commitlog();
        }
    });

    const auto set_group0_table_options =
-        schema_builder::register_static_configurator([](const sstring& ks_name, const sstring& cf_name, schema_static_props& props) {
+        schema_builder::register_schema_initializer([](schema_builder& builder) {
            static const std::unordered_set<sstring> tables = {
                // scylla_local may store a replicated tombstone related to schema
                // (see `make_group0_schema_version_mutation`), so we include it in the group0 tables list.
@@ -140,9 +140,10 @@ namespace {
                system_keyspace::DICTS,
                system_keyspace::VIEW_BUILDING_TASKS,
                system_keyspace::CLIENT_ROUTES,
+                system_keyspace::REPAIR_TASKS,
            };
-            if (ks_name == system_keyspace::NAME && tables.contains(cf_name)) {
-                props.is_group0_table = true;
+            if (builder.ks_name() == system_keyspace::NAME && tables.contains(builder.cf_name())) {
+                builder.set_is_group0_table(true);
            }
        });
 }
@@ -490,6 +491,24 @@ schema_ptr system_keyspace::repair_history() {
    return schema;
 }

+schema_ptr system_keyspace::repair_tasks() {
+    static thread_local auto schema = [] {
+        auto id = generate_legacy_id(NAME, REPAIR_TASKS);
+        return schema_builder(NAME, REPAIR_TASKS, std::optional(id))
+            .with_column("task_uuid", uuid_type, column_kind::partition_key)
+            .with_column("operation", utf8_type, column_kind::clustering_key)
+            // First and last token for of the tablet
+            .with_column("first_token", long_type, column_kind::clustering_key)
+            .with_column("last_token", long_type, column_kind::clustering_key)
+            .with_column("timestamp", timestamp_type)
+            .with_column("table_uuid", uuid_type, column_kind::static_column)
+            .set_comment("Record tablet repair tasks")
+            .with_hash_version()
+            .build();
+    }();
+    return schema;
+}
+
 schema_ptr system_keyspace::built_indexes() {
    static thread_local auto built_indexes = [] {
        schema_builder builder(generate_legacy_id(NAME, BUILT_INDEXES), NAME, BUILT_INDEXES,
@@ -899,7 +918,7 @@ schema_ptr system_keyspace::corrupt_data() {
    return scylla_local;
 }

-schema_ptr system_keyspace::v3::batches() {
+schema_ptr system_keyspace::batches() {
    static thread_local auto schema = [] {
        schema_builder builder(generate_legacy_id(NAME, BATCHES), NAME, BATCHES,
        // partition key
@@ -927,53 +946,7 @@ schema_ptr system_keyspace::v3::batches() {
    return schema;
 }

-schema_ptr system_keyspace::v3::built_indexes() {
-    // identical to ours, but ours otoh is a mix-in of the 3.x series cassandra one
-    return db::system_keyspace::built_indexes();
-}
-
-schema_ptr system_keyspace::v3::local() {
-    static thread_local auto schema = [] {
-        schema_builder builder(generate_legacy_id(NAME, LOCAL), NAME, LOCAL,
-        // partition key
-        {{"key", utf8_type}},
-        // clustering key
-        {},
-        // regular columns
-        {
-                {"bootstrapped", utf8_type},
-                {"broadcast_address", inet_addr_type},
-                {"cluster_name", utf8_type},
-                {"cql_version", utf8_type},
-                {"data_center", utf8_type},
-                {"gossip_generation", int32_type},
-                {"host_id", uuid_type},
-                {"listen_address", inet_addr_type},
-                {"native_protocol_version", utf8_type},
-                {"partitioner", utf8_type},
-                {"rack", utf8_type},
-                {"release_version", utf8_type},
-                {"rpc_address", inet_addr_type},
-                {"schema_version", uuid_type},
-                {"thrift_version", utf8_type},
-                {"tokens", set_type_impl::get_instance(utf8_type, true)},
-                {"truncated_at", map_type_impl::get_instance(uuid_type, bytes_type, true)},
-        },
-        // static columns
-        {},
-        // regular column name type
-        utf8_type,
-        // comment
-        "information about the local node"
-       );
-       builder.set_gc_grace_seconds(0);
-       builder.with_hash_version();
-       return builder.build(schema_builder::compact_storage::no);
-    }();
-    return schema;
-}
-
-schema_ptr system_keyspace::v3::truncated() {
+schema_ptr system_keyspace::truncated() {
    static thread_local auto local = [] {
        schema_builder builder(generate_legacy_id(NAME, TRUNCATED), NAME, TRUNCATED,
        // partition key
@@ -1003,7 +976,7 @@ schema_ptr system_keyspace::v3::truncated() {

 thread_local data_type replay_position_type = tuple_type_impl::get_instance({long_type, int32_type});

-schema_ptr system_keyspace::v3::commitlog_cleanups() {
+schema_ptr system_keyspace::commitlog_cleanups() {
    static thread_local auto local = [] {
        schema_builder builder(generate_legacy_id(NAME, COMMITLOG_CLEANUPS), NAME, COMMITLOG_CLEANUPS,
        // partition key
@@ -1030,47 +1003,7 @@ schema_ptr system_keyspace::v3::commitlog_cleanups() {
    return local;
 }

-schema_ptr system_keyspace::v3::peers() {
-    // identical
-    return db::system_keyspace::peers();
-}
-
-schema_ptr system_keyspace::v3::peer_events() {
-    // identical
-    return db::system_keyspace::peer_events();
-}
-
-schema_ptr system_keyspace::v3::range_xfers() {
-    // identical
-    return db::system_keyspace::range_xfers();
-}
-
-schema_ptr system_keyspace::v3::compaction_history() {
-    // identical
-    return db::system_keyspace::compaction_history();
-}
-
-schema_ptr system_keyspace::v3::sstable_activity() {
-    // identical
-    return db::system_keyspace::sstable_activity();
-}
-
-schema_ptr system_keyspace::v3::size_estimates() {
-    // identical
-    return db::system_keyspace::size_estimates();
-}
-
-schema_ptr system_keyspace::v3::large_partitions() {
-    // identical
-    return db::system_keyspace::large_partitions();
-}
-
-schema_ptr system_keyspace::v3::scylla_local() {
-    // identical
-    return db::system_keyspace::scylla_local();
-}
-
-schema_ptr system_keyspace::v3::available_ranges() {
+schema_ptr system_keyspace::available_ranges() {
    static thread_local auto schema = [] {
        schema_builder builder(generate_legacy_id(NAME, AVAILABLE_RANGES), NAME, AVAILABLE_RANGES,
        // partition key
@@ -1093,7 +1026,7 @@ schema_ptr system_keyspace::v3::available_ranges() {
    return schema;
 }

-schema_ptr system_keyspace::v3::views_builds_in_progress() {
+schema_ptr system_keyspace::views_builds_in_progress() {
    static thread_local auto schema = [] {
        schema_builder builder(generate_legacy_id(NAME, VIEWS_BUILDS_IN_PROGRESS), NAME, VIEWS_BUILDS_IN_PROGRESS,
        // partition key
@@ -1116,7 +1049,7 @@ schema_ptr system_keyspace::v3::views_builds_in_progress() {
    return schema;
 }

-schema_ptr system_keyspace::v3::built_views() {
+schema_ptr system_keyspace::built_views() {
    static thread_local auto schema = [] {
        schema_builder builder(generate_legacy_id(NAME, BUILT_VIEWS), NAME, BUILT_VIEWS,
        // partition key
@@ -1139,7 +1072,7 @@ schema_ptr system_keyspace::v3::built_views() {
    return schema;
 }

-schema_ptr system_keyspace::v3::scylla_views_builds_in_progress() {
+schema_ptr system_keyspace::scylla_views_builds_in_progress() {
    static thread_local auto schema = [] {
        auto id = generate_legacy_id(NAME, SCYLLA_VIEWS_BUILDS_IN_PROGRESS);
        return schema_builder(NAME, SCYLLA_VIEWS_BUILDS_IN_PROGRESS, std::make_optional(id))
@@ -1155,7 +1088,7 @@ schema_ptr system_keyspace::v3::scylla_views_builds_in_progress() {
    return schema;
 }

-/*static*/ schema_ptr system_keyspace::v3::cdc_local() {
+/*static*/ schema_ptr system_keyspace::cdc_local() {
    static thread_local auto cdc_local = [] {
        schema_builder builder(generate_legacy_id(NAME, CDC_LOCAL), NAME, CDC_LOCAL,
        // partition key
@@ -1781,7 +1714,9 @@ std::unordered_set<dht::token> decode_tokens(const set_type_impl::native_type& t
    std::unordered_set<dht::token> tset;
    for (auto& t: tokens) {
        auto str = value_cast<sstring>(t);
-        SCYLLA_ASSERT(str == dht::token::from_sstring(str).to_sstring());
+        if (str != dht::token::from_sstring(str).to_sstring()) {
+            on_internal_error(slogger, format("decode_tokens: invalid token string '{}'", str));
+        }
        tset.insert(dht::token::from_sstring(str));
    }
    return tset;
@@ -2161,21 +2096,21 @@ future<> system_keyspace::update_cdc_generation_id(cdc::generation_id gen_id) {
    co_await std::visit(make_visitor(
    [this] (cdc::generation_id_v1 id) -> future<> {
        co_await execute_cql(
-                format("INSERT INTO system.{} (key, streams_timestamp) VALUES (?, ?)", v3::CDC_LOCAL),
-                sstring(v3::CDC_LOCAL), id.ts);
+                format("INSERT INTO system.{} (key, streams_timestamp) VALUES (?, ?)", CDC_LOCAL),
+                sstring(CDC_LOCAL), id.ts);
    },
    [this] (cdc::generation_id_v2 id) -> future<> {
        co_await execute_cql(
-                format("INSERT INTO system.{} (key, streams_timestamp, uuid) VALUES (?, ?, ?)", v3::CDC_LOCAL),
-                sstring(v3::CDC_LOCAL), id.ts, id.id);
+                format("INSERT INTO system.{} (key, streams_timestamp, uuid) VALUES (?, ?, ?)", CDC_LOCAL),
+                sstring(CDC_LOCAL), id.ts, id.id);
    }
    ), gen_id);
 }

 future<std::optional<cdc::generation_id>> system_keyspace::get_cdc_generation_id() {
    auto msg = co_await execute_cql(
-            format("SELECT streams_timestamp, uuid FROM system.{} WHERE key = ?", v3::CDC_LOCAL),
-            sstring(v3::CDC_LOCAL));
+            format("SELECT streams_timestamp, uuid FROM system.{} WHERE key = ?", CDC_LOCAL),
+            sstring(CDC_LOCAL));

    if (msg->empty()) {
        co_return std::nullopt;
@@ -2201,19 +2136,19 @@ static const sstring CDC_REWRITTEN_KEY = "rewritten";
 future<> system_keyspace::cdc_set_rewritten(std::optional<cdc::generation_id_v1> gen_id) {
    if (gen_id) {
        return execute_cql(
-                format("INSERT INTO system.{} (key, streams_timestamp) VALUES (?, ?)", v3::CDC_LOCAL),
+                format("INSERT INTO system.{} (key, streams_timestamp) VALUES (?, ?)", CDC_LOCAL),
                CDC_REWRITTEN_KEY, gen_id->ts).discard_result();
    } else {
        // Insert just the row marker.
        return execute_cql(
-                format("INSERT INTO system.{} (key) VALUES (?)", v3::CDC_LOCAL),
+                format("INSERT INTO system.{} (key) VALUES (?)", CDC_LOCAL),
                CDC_REWRITTEN_KEY).discard_result();
    }
 }

 future<bool> system_keyspace::cdc_is_rewritten() {
    // We don't care about the actual timestamp; it's additional information for debugging purposes.
-    return execute_cql(format("SELECT key FROM system.{} WHERE key = ?", v3::CDC_LOCAL), CDC_REWRITTEN_KEY)
+    return execute_cql(format("SELECT key FROM system.{} WHERE key = ?", CDC_LOCAL), CDC_REWRITTEN_KEY)
            .then([] (::shared_ptr<cql3::untyped_result_set> msg) {
        return !msg->empty();
    });
@@ -2356,11 +2291,12 @@ std::vector<schema_ptr> system_keyspace::all_tables(const db::config& cfg) {
                    corrupt_data(),
                    scylla_local(), db::schema_tables::scylla_table_schema_history(),
                    repair_history(),
-                    v3::views_builds_in_progress(), v3::built_views(),
-                    v3::scylla_views_builds_in_progress(),
-                    v3::truncated(),
-                    v3::commitlog_cleanups(),
-                    v3::cdc_local(),
+                    repair_tasks(),
+                    views_builds_in_progress(), built_views(),
+                    scylla_views_builds_in_progress(),
+                    truncated(),
+                    commitlog_cleanups(),
+                    cdc_local(),
                    raft(), raft_snapshots(), raft_snapshot_config(), group0_history(), discovery(),
                    topology(), cdc_generations_v3(), topology_requests(), service_levels_v2(), view_build_status_v2(),
                    dicts(), view_building_tasks(), client_routes(), cdc_streams_state(), cdc_streams_history()
@@ -2383,7 +2319,7 @@ static bool maybe_write_in_user_memory(schema_ptr s) {
    return (s.get() == system_keyspace::batchlog().get())
            || (s.get() == system_keyspace::batchlog_v2().get())
            || (s.get() == system_keyspace::paxos().get())
-            || s == system_keyspace::v3::scylla_views_builds_in_progress();
+            || s == system_keyspace::scylla_views_builds_in_progress();
 }

 future<> system_keyspace::make(
@@ -2599,6 +2535,32 @@ future<> system_keyspace::get_repair_history(::table_id table_id, repair_history
    });
 }

+future<utils::chunked_vector<canonical_mutation>> system_keyspace::get_update_repair_task_mutations(const repair_task_entry& entry, api::timestamp_type ts) {
+    // Default to timeout the repair task entries in 10 days, this should be enough time for the management tools to query
+    constexpr int ttl = 10 * 24 * 3600;
+    sstring req = format("INSERT INTO system.{} (task_uuid, operation, first_token, last_token, timestamp, table_uuid) VALUES (?, ?, ?, ?, ?, ?) USING TTL {}", REPAIR_TASKS, ttl);
+    auto muts = co_await _qp.get_mutations_internal(req, internal_system_query_state(), ts,
+            {entry.task_uuid.uuid(), repair_task_operation_to_string(entry.operation),
+            entry.first_token, entry.last_token, entry.timestamp, entry.table_uuid.uuid()});
+    utils::chunked_vector<canonical_mutation> cmuts(muts.begin(), muts.end());
+    co_return cmuts;
+}
+
+future<> system_keyspace::get_repair_task(tasks::task_id task_uuid, repair_task_consumer f) {
+    sstring req = format("SELECT * from system.{} WHERE task_uuid = {}", REPAIR_TASKS, task_uuid);
+    co_await _qp.query_internal(req, [&f] (const cql3::untyped_result_set::row& row) mutable -> future<stop_iteration> {
+        repair_task_entry ent;
+        ent.task_uuid = tasks::task_id(row.get_as<utils::UUID>("task_uuid"));
+        ent.operation = repair_task_operation_from_string(row.get_as<sstring>("operation"));
+        ent.first_token = row.get_as<int64_t>("first_token");
+        ent.last_token = row.get_as<int64_t>("last_token");
+        ent.timestamp = row.get_as<db_clock::time_point>("timestamp");
+        ent.table_uuid = ::table_id(row.get_as<utils::UUID>("table_uuid"));
+        co_await f(std::move(ent));
+        co_return stop_iteration::no;
+    });
+}
+
 future<gms::generation_type> system_keyspace::increment_and_get_generation() {
    auto req = format("SELECT gossip_generation FROM system.{} WHERE key='{}'", LOCAL, LOCAL);
    auto rs = co_await _qp.execute_internal(req, cql3::query_processor::cache_internal::yes);
@@ -2643,7 +2605,7 @@ mutation system_keyspace::make_size_estimates_mutation(const sstring& ks, std::v

 future<> system_keyspace::register_view_for_building(sstring ks_name, sstring view_name, const dht::token& token) {
    sstring req = format("INSERT INTO system.{} (keyspace_name, view_name, generation_number, cpu_id, first_token) VALUES (?, ?, ?, ?, ?)",
-            v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS);
+            SCYLLA_VIEWS_BUILDS_IN_PROGRESS);
    return execute_cql(
            std::move(req),
            std::move(ks_name),
@@ -2659,7 +2621,7 @@ future<> system_keyspace::register_view_for_building_for_all_shards(sstring ks_n
    // before all shards are registered.
    // if another shard has already registered, this won't overwrite its status. if it hasn't registered, we insert
    // a status with first_token=null and next_token=null, indicating it hasn't made progress.
-    auto&& schema = db::system_keyspace::v3::scylla_views_builds_in_progress();
+    auto&& schema = db::system_keyspace::scylla_views_builds_in_progress();
    auto timestamp = api::new_timestamp();
    mutation m{schema, partition_key::from_single_value(*schema, utf8_type->decompose(ks_name))};

@@ -2677,7 +2639,7 @@ future<> system_keyspace::register_view_for_building_for_all_shards(sstring ks_n

 future<> system_keyspace::update_view_build_progress(sstring ks_name, sstring view_name, const dht::token& token) {
    sstring req = format("INSERT INTO system.{} (keyspace_name, view_name, next_token, cpu_id) VALUES (?, ?, ?, ?)",
-            v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS);
+            SCYLLA_VIEWS_BUILDS_IN_PROGRESS);
    return execute_cql(
            std::move(req),
            std::move(ks_name),
@@ -2688,14 +2650,14 @@ future<> system_keyspace::update_view_build_progress(sstring ks_name, sstring vi

 future<> system_keyspace::remove_view_build_progress_across_all_shards(sstring ks_name, sstring view_name) {
    return execute_cql(
-            format("DELETE FROM system.{} WHERE keyspace_name = ? AND view_name = ?", v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS),
+            format("DELETE FROM system.{} WHERE keyspace_name = ? AND view_name = ?", SCYLLA_VIEWS_BUILDS_IN_PROGRESS),
            std::move(ks_name),
            std::move(view_name)).discard_result();
 }

 future<> system_keyspace::remove_view_build_progress(sstring ks_name, sstring view_name) {
    return execute_cql(
-            format("DELETE FROM system.{} WHERE keyspace_name = ? AND view_name = ? AND cpu_id = ?", v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS),
+            format("DELETE FROM system.{} WHERE keyspace_name = ? AND view_name = ? AND cpu_id = ?", SCYLLA_VIEWS_BUILDS_IN_PROGRESS),
            std::move(ks_name),
            std::move(view_name),
            int32_t(this_shard_id())).discard_result();
@@ -2703,20 +2665,20 @@ future<> system_keyspace::remove_view_build_progress(sstring ks_name, sstring vi

 future<> system_keyspace::mark_view_as_built(sstring ks_name, sstring view_name) {
    return execute_cql(
-            format("INSERT INTO system.{} (keyspace_name, view_name) VALUES (?, ?)", v3::BUILT_VIEWS),
+            format("INSERT INTO system.{} (keyspace_name, view_name) VALUES (?, ?)", BUILT_VIEWS),
            std::move(ks_name),
            std::move(view_name)).discard_result();
 }

 future<> system_keyspace::remove_built_view(sstring ks_name, sstring view_name) {
    return execute_cql(
-            format("DELETE FROM system.{} WHERE keyspace_name = ? AND view_name = ?", v3::BUILT_VIEWS),
+            format("DELETE FROM system.{} WHERE keyspace_name = ? AND view_name = ?", BUILT_VIEWS),
            std::move(ks_name),
            std::move(view_name)).discard_result();
 }

 future<std::vector<system_keyspace::view_name>> system_keyspace::load_built_views() {
-    return execute_cql(format("SELECT * FROM system.{}", v3::BUILT_VIEWS)).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
+    return execute_cql(format("SELECT * FROM system.{}", BUILT_VIEWS)).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
        return *cql_result
                | std::views::transform([] (const cql3::untyped_result_set::row& row) {
            auto ks_name = row.get_as<sstring>("keyspace_name");
@@ -2728,7 +2690,7 @@ future<std::vector<system_keyspace::view_name>> system_keyspace::load_built_view

 future<std::vector<system_keyspace::view_build_progress>> system_keyspace::load_view_build_progress() {
    return execute_cql(format("SELECT keyspace_name, view_name, first_token, next_token, cpu_id FROM system.{}",
-            v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS)).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
+            SCYLLA_VIEWS_BUILDS_IN_PROGRESS)).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
        std::vector<view_build_progress> progress;
        for (auto& row : *cql_result) {
            auto ks_name = row.get_as<sstring>("keyspace_name");
@@ -3181,6 +3143,8 @@ future<service::topology> system_keyspace::load_topology_state(const std::unorde
        co_return ret;
    }

+    const bool strongly_consistent_tables = _db.features().strongly_consistent_tables;
+
    for (auto& row : *rs) {
        if (!row.has("host_id")) {
            // There are no clustering rows, only the static row.
@@ -3229,7 +3193,7 @@ future<service::topology> system_keyspace::load_topology_state(const std::unorde
                    };
                }
            } else if (must_have_tokens(nstate)) {
-                on_fatal_internal_error(slogger, format(
+                on_internal_error(slogger, format(
                        "load_topology_state: node {} in {} state but missing ring slice", host_id, nstate));
            }
        }
@@ -3249,7 +3213,7 @@ future<service::topology> system_keyspace::load_topology_state(const std::unorde
            supported_features = decode_features(deserialize_set_column(*topology(), row, "supported_features"));
        }

-        if (row.has("topology_request")) {
+        if (row.has("topology_request") && nstate != service::node_state::left) {
            auto req = service::topology_request_from_string(row.get_as<sstring>("topology_request"));
            ret.requests.emplace(host_id, req);
            switch(req) {
@@ -3311,7 +3275,7 @@ future<service::topology> system_keyspace::load_topology_state(const std::unorde
            // Currently, at most one node at a time can be in transitioning state.
            if (!map->empty()) {
                const auto& [other_id, other_rs] = *map->begin();
-                on_fatal_internal_error(slogger, format(
+                on_internal_error(slogger, format(
                    "load_topology_state: found two nodes in transitioning state: {} in {} state and {} in {} state",
                    other_id, other_rs.state, host_id, nstate));
            }
@@ -3369,8 +3333,7 @@ future<service::topology> system_keyspace::load_topology_state(const std::unorde
                format("SELECT count(range_end) as cnt FROM {}.{} WHERE key = '{}' AND id = ?",
                        NAME, CDC_GENERATIONS_V3, cdc::CDC_GENERATIONS_V3_KEY),
                gen_id.id);
-            SCYLLA_ASSERT(gen_rows);
-            if (gen_rows->empty()) {
+            if (!gen_rows || gen_rows->empty()) {
                on_internal_error(slogger, format(
                    "load_topology_state: last committed CDC generation time UUID ({}) present, but data missing", gen_id.id));
            }
@@ -3417,7 +3380,9 @@ future<service::topology> system_keyspace::load_topology_state(const std::unorde
            ret.session = service::session_id(some_row.get_as<utils::UUID>("session"));
        }

-        if (some_row.has("tablet_balancing_enabled")) {
+        if (strongly_consistent_tables) {
+            ret.tablet_balancing_enabled = false;
+        } else if (some_row.has("tablet_balancing_enabled")) {
            ret.tablet_balancing_enabled = some_row.get_as<bool>("tablet_balancing_enabled");
        } else {
            ret.tablet_balancing_enabled = true;
@@ -3799,4 +3764,35 @@ future<> system_keyspace::apply_mutation(mutation m) {
    return _qp.proxy().mutate_locally(m, {}, db::commitlog::force_sync(m.schema()->static_props().wait_for_sync_to_commitlog), db::no_timeout);
 }

+// The names are persisted in system tables so should not be changed.
+static const std::unordered_map<system_keyspace::repair_task_operation, sstring> repair_task_operation_to_name = {
+    {system_keyspace::repair_task_operation::requested, "requested"},
+    {system_keyspace::repair_task_operation::finished, "finished"},
+};
+
+static const std::unordered_map<sstring, system_keyspace::repair_task_operation> repair_task_operation_from_name = std::invoke([] {
+    std::unordered_map<sstring, system_keyspace::repair_task_operation> result;
+    for (auto&& [v, s] : repair_task_operation_to_name) {
+        result.emplace(s, v);
+    }
+    return result;
+});
+
+sstring system_keyspace::repair_task_operation_to_string(system_keyspace::repair_task_operation op) {
+    auto i = repair_task_operation_to_name.find(op);
+    if (i == repair_task_operation_to_name.end()) {
+        on_internal_error(slogger, format("Invalid repair task operation: {}", static_cast<int>(op)));
+    }
+    return i->second;
+}
+
+system_keyspace::repair_task_operation system_keyspace::repair_task_operation_from_string(const sstring& name) {
+    return repair_task_operation_from_name.at(name);
+}
+
 } // namespace db
+
+auto fmt::formatter<db::system_keyspace::repair_task_operation>::format(const db::system_keyspace::repair_task_operation& op, fmt::format_context& ctx) const
+        -> decltype(ctx.out()) {
+    return fmt::format_to(ctx.out(), "{}", db::system_keyspace::repair_task_operation_to_string(op));
+}
--- a/db/system_keyspace.hh
+++ b/db/system_keyspace.hh
@@ -57,6 +57,8 @@ namespace paxos {
 struct topology_request_state;

 class group0_guard;
+
+class raft_group0_client;
 }

 namespace netw {
@@ -125,6 +127,8 @@ class system_keyspace : public seastar::peering_sharded_service<system_keyspace>

    static schema_ptr raft_snapshot_config();
    static schema_ptr local();
+    static schema_ptr truncated();
+    static schema_ptr commitlog_cleanups();
    static schema_ptr peers();
    static schema_ptr peer_events();
    static schema_ptr range_xfers();
@@ -135,7 +139,10 @@ class system_keyspace : public seastar::peering_sharded_service<system_keyspace>
    static schema_ptr large_rows();
    static schema_ptr large_cells();
    static schema_ptr corrupt_data();
-    static schema_ptr scylla_local();
+    static schema_ptr batches();
+    static schema_ptr available_ranges();
+    static schema_ptr built_views();
+    static schema_ptr cdc_local();
    future<> force_blocking_flush(sstring cfname);
    // This function is called when the system.peers table is read,
    // and it fixes some types of inconsistencies that can occur
@@ -185,6 +192,7 @@ public:
    static constexpr auto RAFT_SNAPSHOTS = "raft_snapshots";
    static constexpr auto RAFT_SNAPSHOT_CONFIG = "raft_snapshot_config";
    static constexpr auto REPAIR_HISTORY = "repair_history";
+    static constexpr auto REPAIR_TASKS = "repair_tasks";
    static constexpr auto GROUP0_HISTORY = "group0_history";
    static constexpr auto DISCOVERY = "discovery";
    static constexpr auto BROADCAST_KV_STORE = "broadcast_kv_store";
@@ -201,6 +209,14 @@ public:
    static constexpr auto VIEW_BUILDING_TASKS = "view_building_tasks";
    static constexpr auto CLIENT_ROUTES = "client_routes";
    static constexpr auto VERSIONS = "versions";
+    static constexpr auto BATCHES = "batches";
+    static constexpr auto AVAILABLE_RANGES = "available_ranges";
+    static constexpr auto VIEWS_BUILDS_IN_PROGRESS = "views_builds_in_progress";
+    static constexpr auto BUILT_VIEWS = "built_views";
+    static constexpr auto SCYLLA_VIEWS_BUILDS_IN_PROGRESS = "scylla_views_builds_in_progress";
+    static constexpr auto CDC_LOCAL = "cdc_local";
+    static constexpr auto CDC_TIMESTAMPS = "cdc_timestamps";
+    static constexpr auto CDC_STREAMS = "cdc_streams";

    // auth
    static constexpr auto ROLES = "roles";
@@ -208,42 +224,6 @@ public:
    static constexpr auto ROLE_ATTRIBUTES = "role_attributes";
    static constexpr auto ROLE_PERMISSIONS = "role_permissions";

-    struct v3 {
-        static constexpr auto BATCHES = "batches";
-        static constexpr auto PAXOS = "paxos";
-        static constexpr auto BUILT_INDEXES = "IndexInfo";
-        static constexpr auto LOCAL = "local";
-        static constexpr auto PEERS = "peers";
-        static constexpr auto PEER_EVENTS = "peer_events";
-        static constexpr auto RANGE_XFERS = "range_xfers";
-        static constexpr auto COMPACTION_HISTORY = "compaction_history";
-        static constexpr auto SSTABLE_ACTIVITY = "sstable_activity";
-        static constexpr auto SIZE_ESTIMATES = "size_estimates";
-        static constexpr auto AVAILABLE_RANGES = "available_ranges";
-        static constexpr auto VIEWS_BUILDS_IN_PROGRESS = "views_builds_in_progress";
-        static constexpr auto BUILT_VIEWS = "built_views";
-        static constexpr auto SCYLLA_VIEWS_BUILDS_IN_PROGRESS = "scylla_views_builds_in_progress";
-        static constexpr auto CDC_LOCAL = "cdc_local";
-        static schema_ptr batches();
-        static schema_ptr built_indexes();
-        static schema_ptr local();
-        static schema_ptr truncated();
-        static schema_ptr commitlog_cleanups();
-        static schema_ptr peers();
-        static schema_ptr peer_events();
-        static schema_ptr range_xfers();
-        static schema_ptr compaction_history();
-        static schema_ptr sstable_activity();
-        static schema_ptr size_estimates();
-        static schema_ptr large_partitions();
-        static schema_ptr scylla_local();
-        static schema_ptr available_ranges();
-        static schema_ptr views_builds_in_progress();
-        static schema_ptr built_views();
-        static schema_ptr scylla_views_builds_in_progress();
-        static schema_ptr cdc_local();
-    };
-
    // Partition estimates for a given range of tokens.
    struct range_estimates {
        schema_ptr schema;
@@ -261,9 +241,11 @@ public:
    static schema_ptr batchlog_v2();
    static schema_ptr paxos();
    static schema_ptr built_indexes(); // TODO (from Cassandra): make private
+    static schema_ptr scylla_local();
    static schema_ptr raft();
    static schema_ptr raft_snapshots();
    static schema_ptr repair_history();
+    static schema_ptr repair_tasks();
    static schema_ptr group0_history();
    static schema_ptr discovery();
    static schema_ptr broadcast_kv_store();
@@ -279,6 +261,8 @@ public:
    static schema_ptr dicts();
    static schema_ptr view_building_tasks();
    static schema_ptr client_routes();
+    static schema_ptr views_builds_in_progress();
+    static schema_ptr scylla_views_builds_in_progress();

    // auth
    static schema_ptr roles();
@@ -403,6 +387,22 @@ public:
        int64_t range_end;
    };

+    enum class repair_task_operation {
+        requested,
+        finished,
+    };
+    static sstring repair_task_operation_to_string(repair_task_operation op);
+    static repair_task_operation repair_task_operation_from_string(const sstring& name);
+
+    struct repair_task_entry {
+        tasks::task_id task_uuid;
+        repair_task_operation operation;
+        int64_t first_token;
+        int64_t last_token;
+        db_clock::time_point timestamp;
+        table_id table_uuid;
+    };
+
    struct topology_requests_entry {
        utils::UUID id;
        utils::UUID initiating_host;
@@ -424,6 +424,10 @@ public:
    using repair_history_consumer = noncopyable_function<future<>(const repair_history_entry&)>;
    future<> get_repair_history(table_id, repair_history_consumer f);

+    future<utils::chunked_vector<canonical_mutation>> get_update_repair_task_mutations(const repair_task_entry& entry, api::timestamp_type ts);
+    using repair_task_consumer = noncopyable_function<future<>(const repair_task_entry&)>;
+    future<> get_repair_task(tasks::task_id task_uuid, repair_task_consumer f);
+
    future<> save_truncation_record(const replica::column_family&, db_clock::time_point truncated_at, db::replay_position);
    future<replay_positions> get_truncated_positions(table_id);
    future<> drop_truncation_rp_records();
@@ -733,3 +737,8 @@ public:
 }; // class system_keyspace

 } // namespace db
+
+template <>
+struct fmt::formatter<db::system_keyspace::repair_task_operation> : fmt::formatter<string_view> {
+    auto format(const db::system_keyspace::repair_task_operation&, fmt::format_context& ctx) const -> decltype(ctx.out());
+};
--- a/db/view/build_progress_virtual_reader.hh
+++ b/db/view/build_progress_virtual_reader.hh
@@ -195,7 +195,7 @@ public:
        return mutation_reader(std::make_unique<build_progress_reader>(
                s,
                std::move(permit),
-                _db.find_column_family(s->ks_name(), system_keyspace::v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS),
+                _db.find_column_family(s->ks_name(), system_keyspace::SCYLLA_VIEWS_BUILDS_IN_PROGRESS),
                range,
                slice,
                std::move(trace_state),
--- a/db/view/view.cc
+++ b/db/view/view.cc
@@ -57,7 +57,7 @@
 #include "locator/network_topology_strategy.hh"
 #include "mutation/mutation.hh"
 #include "mutation/mutation_partition.hh"
-#include "seastar/core/on_internal_error.hh"
+#include <seastar/core/on_internal_error.hh>
 #include "service/migration_manager.hh"
 #include "service/raft/raft_group0_client.hh"
 #include "service/storage_proxy.hh"
@@ -2244,7 +2244,7 @@ future<> view_builder::start_in_background(service::migration_manager& mm, utils
        // Guard the whole startup routine with a semaphore,
        // so that it's not intercepted by `on_drop_view`, `on_create_view`
        // or `on_update_view` events.
-        auto units = co_await get_units(_sem, 1);
+        auto units = co_await get_units(_sem, view_builder_semaphore_units);
        // Wait for schema agreement even if we're a seed node.
        co_await mm.wait_for_schema_agreement(_db, db::timeout_clock::time_point::max(), &_as);

@@ -2659,7 +2659,7 @@ future<> view_builder::add_new_view(view_ptr view, build_step& step) {
        co_await utils::get_local_injector().inject("add_new_view_pause_last_shard", utils::wait_for_message(5min));
    }

-    co_await _sys_ks.register_view_for_building_for_all_shards(view->ks_name(), view->cf_name(), step.current_token());
+    co_await _sys_ks.register_view_for_building(view->ks_name(), view->cf_name(), step.current_token());
    step.build_status.emplace(step.build_status.begin(), view_build_status{view, step.current_token(), std::nullopt});
 }

@@ -2667,40 +2667,74 @@ static bool should_ignore_tablet_keyspace(const replica::database& db, const sst
    return db.features().view_building_coordinator && db.has_keyspace(ks_name) && db.find_keyspace(ks_name).uses_tablets();
 }

-void view_builder::on_create_view(const sstring& ks_name, const sstring& view_name) {
+future<> view_builder::dispatch_create_view(sstring ks_name, sstring view_name) {
    if (should_ignore_tablet_keyspace(_db, ks_name)) {
+        return make_ready_future<>();
+    }
+    return with_semaphore(_sem, view_builder_semaphore_units, [this, ks_name = std::move(ks_name), view_name = std::move(view_name)] () mutable {
+        // This runs on shard 0 only; seed the global rows before broadcasting.
+        return handle_seed_view_build_progress(ks_name, view_name).then([this, ks_name = std::move(ks_name), view_name = std::move(view_name)] () mutable {
+            return container().invoke_on_all([ks_name = std::move(ks_name), view_name = std::move(view_name)] (view_builder& vb) mutable {
+                return vb.handle_create_view_local(std::move(ks_name), std::move(view_name));
+            });
+        });
+    });
+}
+
+future<> view_builder::handle_seed_view_build_progress(sstring ks_name, sstring view_name) {
+    auto view = view_ptr(_db.find_schema(ks_name, view_name));
+    auto& step = get_or_create_build_step(view->view_info()->base_id());
+    return _sys_ks.register_view_for_building_for_all_shards(view->ks_name(), view->cf_name(), step.current_token());
+}
+
+future<> view_builder::handle_create_view_local(sstring ks_name, sstring view_name){
+    if (this_shard_id() == 0) { 
+        return handle_create_view_local_impl(std::move(ks_name), std::move(view_name));
+    } else {
+        return with_semaphore(_sem, view_builder_semaphore_units, [this, ks_name = std::move(ks_name), view_name = std::move(view_name)] () mutable {
+            return handle_create_view_local_impl(std::move(ks_name), std::move(view_name));
+        });
+    }
+}
+
+future<> view_builder::handle_create_view_local_impl(sstring ks_name, sstring view_name) {
+    auto view = view_ptr(_db.find_schema(ks_name, view_name));
+    auto& step = get_or_create_build_step(view->view_info()->base_id());
+    return when_all(step.base->await_pending_writes(), step.base->await_pending_streams()).discard_result().then([this, &step] {
+        return flush_base(step.base, _as);
+    }).then([this, view, &step] () {
+        // This resets the build step to the current token. It may result in views currently
+        // being built to receive duplicate updates, but it simplifies things as we don't have
+        // to keep around a list of new views to build the next time the reader crosses a token
+        // threshold.
+        return initialize_reader_at_current_token(step).then([this, view, &step] () mutable {
+            return add_new_view(view, step);
+        }).then_wrapped([this, view] (future<>&& f) {
+            try {
+                f.get();
+            } catch (abort_requested_exception&) {
+                vlogger.debug("Aborted while setting up view for building {}.{}", view->ks_name(), view->cf_name());
+            } catch (raft::request_aborted&) {
+                vlogger.debug("Aborted while setting up view for building {}.{}", view->ks_name(), view->cf_name());
+            } catch (...) {
+                vlogger.error("Error setting up view for building {}.{}: {}", view->ks_name(), view->cf_name(), std::current_exception());
+            }
+
+            // Waited on indirectly in stop().
+            static_cast<void>(_build_step.trigger());
+        });
+    });
+}
+
+void view_builder::on_create_view(const sstring& ks_name, const sstring& view_name) {
+    if (this_shard_id() != 0) {
        return;
    }

-    // Do it in the background, serialized.
-    (void)with_semaphore(_sem, 1, [ks_name, view_name, this] {
-        auto view = view_ptr(_db.find_schema(ks_name, view_name));
-        auto& step = get_or_create_build_step(view->view_info()->base_id());
-        return when_all(step.base->await_pending_writes(), step.base->await_pending_streams()).discard_result().then([this, &step] {
-            return flush_base(step.base, _as);
-        }).then([this, view, &step] () mutable {
-            // This resets the build step to the current token. It may result in views currently
-            // being built to receive duplicate updates, but it simplifies things as we don't have
-            // to keep around a list of new views to build the next time the reader crosses a token
-            // threshold.
-          return initialize_reader_at_current_token(step).then([this, view, &step] () mutable {
-            return add_new_view(view, step).then_wrapped([this, view] (future<>&& f) {
-                try {
-                    f.get();
-                } catch (abort_requested_exception&) {
-                    vlogger.debug("Aborted while setting up view for building {}.{}", view->ks_name(), view->cf_name());
-                } catch (raft::request_aborted&) {
-                    vlogger.debug("Aborted while setting up view for building {}.{}", view->ks_name(), view->cf_name());
-                } catch (...) {
-                    vlogger.error("Error setting up view for building {}.{}: {}", view->ks_name(), view->cf_name(), std::current_exception());
-                }
-
-                // Waited on indirectly in stop().
-                (void)_build_step.trigger();
-            });
-          });
-        });
-    }).handle_exception_type([] (replica::no_such_column_family&) { });
+    // Do it in the background, serialized and broadcast from shard 0.
+    static_cast<void>(dispatch_create_view(ks_name, view_name).handle_exception([ks_name, view_name] (std::exception_ptr ep) {
+        vlogger.warn("Failed to dispatch view creation {}.{}: {}", ks_name, view_name, ep);
+    }));
 }

 void view_builder::on_update_view(const sstring& ks_name, const sstring& view_name, bool) {
@@ -2709,7 +2743,7 @@ void view_builder::on_update_view(const sstring& ks_name, const sstring& view_na
    }

    // Do it in the background, serialized.
-    (void)with_semaphore(_sem, 1, [ks_name, view_name, this] {
+    (void)with_semaphore(_sem, view_builder_semaphore_units, [ks_name, view_name, this] {
        auto view = view_ptr(_db.find_schema(ks_name, view_name));
        auto step_it = _base_to_build_step.find(view->view_info()->base_id());
        if (step_it == _base_to_build_step.end()) {
@@ -2724,45 +2758,75 @@ void view_builder::on_update_view(const sstring& ks_name, const sstring& view_na
    }).handle_exception_type([] (replica::no_such_column_family&) { });
 }

-void view_builder::on_drop_view(const sstring& ks_name, const sstring& view_name) {
+future<> view_builder::dispatch_drop_view(sstring ks_name, sstring view_name) {
    if (should_ignore_tablet_keyspace(_db, ks_name)) {
+        return make_ready_future<>();
+    }
+
+    return with_semaphore(_sem, view_builder_semaphore_units, [this, ks_name = std::move(ks_name), view_name = std::move(view_name)] () mutable {
+        // This runs on shard 0 only; broadcast local cleanup before global cleanup.
+        return container().invoke_on_all([ks_name, view_name] (view_builder& vb) mutable {
+            return vb.handle_drop_view_local(std::move(ks_name), std::move(view_name));
+        }).then([this, ks_name = std::move(ks_name), view_name = std::move(view_name)] () mutable {
+            return handle_drop_view_global_cleanup(std::move(ks_name), std::move(view_name));
+        });
+    });
+}
+
+future<> view_builder::handle_drop_view_local(sstring ks_name, sstring view_name) {
+    if (this_shard_id() == 0) { 
+        return handle_drop_view_local_impl(std::move(ks_name), std::move(view_name));
+    } else {
+        return with_semaphore(_sem, view_builder_semaphore_units, [this, ks_name = std::move(ks_name), view_name = std::move(view_name)] () mutable {
+            return handle_drop_view_local_impl(std::move(ks_name), std::move(view_name));
+        });
+    }
+}
+
+future<> view_builder::handle_drop_view_local_impl(sstring ks_name, sstring view_name) {
+    vlogger.info0("Stopping to build view {}.{}", ks_name, view_name);
+    // The view is absent from the database at this point, so find it by brute force.
+    ([&, this] {
+        for (auto& [_, step] : _base_to_build_step) {
+            if (step.build_status.empty() || step.build_status.front().view->ks_name() != ks_name) {
+                continue;
+            }
+            for (auto it = step.build_status.begin(); it != step.build_status.end(); ++it) {
+                if (it->view->cf_name() == view_name) {
+                    _built_views.erase(it->view->id());
+                    step.build_status.erase(it);
+                    return;
+                }
+            }
+        }
+    })();
+    return make_ready_future<>();  
+}
+
+future<> view_builder::handle_drop_view_global_cleanup(sstring ks_name, sstring view_name) {
+    if (this_shard_id() != 0) {
+        return make_ready_future<>();
+    }
+    vlogger.info0("Starting view global cleanup {}.{}", ks_name, view_name);
+    return when_all_succeed(
+                _sys_ks.remove_view_build_progress_across_all_shards(ks_name, view_name),
+                _sys_ks.remove_built_view(ks_name, view_name),
+                remove_view_build_status(ks_name, view_name))
+                    .discard_result()
+                    .handle_exception([ks_name, view_name] (std::exception_ptr ep) {
+        vlogger.warn("Failed to cleanup view {}.{}: {}", ks_name, view_name, ep);
+    });
+}
+
+void view_builder::on_drop_view(const sstring& ks_name, const sstring& view_name) {
+    if (this_shard_id() != 0) {
        return;
    }

-    vlogger.info0("Stopping to build view {}.{}", ks_name, view_name);
-    // Do it in the background, serialized.
-    (void)with_semaphore(_sem, 1, [ks_name, view_name, this] {
-        // The view is absent from the database at this point, so find it by brute force.
-        ([&, this] {
-            for (auto& [_, step] : _base_to_build_step) {
-                if (step.build_status.empty() || step.build_status.front().view->ks_name() != ks_name) {
-                    continue;
-                }
-                for (auto it = step.build_status.begin(); it != step.build_status.end(); ++it) {
-                    if (it->view->cf_name() == view_name) {
-                        _built_views.erase(it->view->id());
-                        step.build_status.erase(it);
-                        return;
-                    }
-                }
-            }
-        })();
-        if (this_shard_id() != 0) {
-            // Shard 0 can't remove the entry in the build progress system table on behalf of the
-            // current shard, since shard 0 may have already processed the notification, and this
-            // shard may since have updated the system table if the drop happened concurrently
-            // with the build.
-            return _sys_ks.remove_view_build_progress(ks_name, view_name);
-        }
-        return when_all_succeed(
-                    _sys_ks.remove_view_build_progress(ks_name, view_name),
-                    _sys_ks.remove_built_view(ks_name, view_name),
-                    remove_view_build_status(ks_name, view_name))
-                        .discard_result()
-                        .handle_exception([ks_name, view_name] (std::exception_ptr ep) {
-            vlogger.warn("Failed to cleanup view {}.{}: {}", ks_name, view_name, ep);
-        });
-    }).handle_exception_type([] (replica::no_such_keyspace&) {});
+    // Do it in the background, serialized and broadcast from shard 0.
+    static_cast<void>(dispatch_drop_view(ks_name, view_name).handle_exception([ks_name, view_name] (std::exception_ptr ep) {
+        vlogger.warn("Failed to dispatch view drop {}.{}: {}", ks_name, view_name, ep);
+    }));
 }

 future<> view_builder::do_build_step() {
@@ -2773,7 +2837,7 @@ future<> view_builder::do_build_step() {
    return seastar::async(std::move(attr), [this] {
        exponential_backoff_retry r(1s, 1min);
        while (!_base_to_build_step.empty() && !_as.abort_requested()) {
-            auto units = get_units(_sem, 1).get();
+            auto units = get_units(_sem, view_builder_semaphore_units).get();
            ++_stats.steps_performed;
            try {
                execute(_current_step->second, exponential_backoff_retry(1s, 1min));
@@ -3633,20 +3697,20 @@ sstring build_status_to_sstring(build_status status) {
    on_internal_error(vlogger, fmt::format("Unknown view build status: {}", (int)status));
 }

-void validate_view_keyspace(const data_dictionary::database& db, std::string_view keyspace_name) {
-    const bool tablet_views_enabled = db.features().views_with_tablets;
-    // Note: if the configuration option `rf_rack_valid_keyspaces` is enabled, we can be
-    //       sure that all tablet-based keyspaces are RF-rack-valid. We check that
-    //       at start-up and then we don't allow for creating RF-rack-invalid keyspaces.
-    const bool rf_rack_valid_keyspaces = db.get_config().rf_rack_valid_keyspaces();
-    const bool required_config = tablet_views_enabled && rf_rack_valid_keyspaces;
+void validate_view_keyspace(const data_dictionary::database& db, std::string_view keyspace_name, locator::token_metadata_ptr tmptr) {
+    const auto& rs = db.find_keyspace(keyspace_name).get_replication_strategy();

-    const bool uses_tablets = db.find_keyspace(keyspace_name).get_replication_strategy().uses_tablets();
-
-    if (!required_config && uses_tablets) {
+    if (rs.uses_tablets() && !db.features().views_with_tablets) {
        throw std::logic_error("Materialized views and secondary indexes are not supported on base tables with tablets. "
-                "To be able to use them, enable the configuration option `rf_rack_valid_keyspaces` and make sure "
-                "that the cluster feature `VIEWS_WITH_TABLETS` is enabled.");
+                "To be able to use them, make sure all nodes in the cluster are upgraded.");
+    }
+
+    try {
+        locator::assert_rf_rack_valid_keyspace(keyspace_name, tmptr, rs);
+    } catch (const std::invalid_argument& e) {
+        throw std::logic_error(fmt::format(
+            "Materialized views and secondary indexes are not supported on the keyspace '{}': {}",
+            keyspace_name, e.what()));
    }
 }

--- a/db/view/view.hh
+++ b/db/view/view.hh
@@ -9,6 +9,7 @@
 #pragma once

 #include "gc_clock.hh"
+#include "locator/token_metadata_fwd.hh"
 #include "query/query-request.hh"
 #include "schema/schema_fwd.hh"
 #include "readers/mutation_reader.hh"
@@ -318,7 +319,7 @@ endpoints_to_update get_view_natural_endpoint(
 ///
 /// Preconditions:
 /// * The provided `keyspace_name` must correspond to an existing keyspace.
-void validate_view_keyspace(const data_dictionary::database&, std::string_view keyspace_name);
+void validate_view_keyspace(const data_dictionary::database&, std::string_view keyspace_name, locator::token_metadata_ptr tmptr);

 }

--- a/db/view/view_builder.hh
+++ b/db/view/view_builder.hh
@@ -169,10 +169,11 @@ class view_builder final : public service::migration_listener::only_view_notific
    base_to_build_step_type _base_to_build_step;
    base_to_build_step_type::iterator _current_step = _base_to_build_step.end();
    serialized_action _build_step{std::bind(&view_builder::do_build_step, this)};
+    static constexpr size_t view_builder_semaphore_units = 1;
    // Ensures bookkeeping operations are serialized, meaning that while we execute
    // a build step we don't consider newly added or removed views. This simplifies
    // the algorithms. Also synchronizes an operation wrt. a call to stop().
-    seastar::named_semaphore _sem{1, named_semaphore_exception_factory{"view builder"}};
+    seastar::named_semaphore _sem{view_builder_semaphore_units, named_semaphore_exception_factory{"view builder"}};
    seastar::abort_source _as;
    future<> _started = make_ready_future<>();
    // Used to coordinate between shards the conclusion of the build process for a particular view.
@@ -266,6 +267,14 @@ private:
    future<> maybe_mark_view_as_built(view_ptr, dht::token);
    future<> mark_as_built(view_ptr);
    void setup_metrics();
+    future<> dispatch_create_view(sstring ks_name, sstring view_name);
+    future<> dispatch_drop_view(sstring ks_name, sstring view_name);
+    future<> handle_seed_view_build_progress(sstring ks_name, sstring view_name);
+    future<> handle_create_view_local(sstring ks_name, sstring view_name);
+    future<> handle_drop_view_local(sstring ks_name, sstring view_name);
+    future<> handle_create_view_local_impl(sstring ks_name, sstring view_name);
+    future<> handle_drop_view_local_impl(sstring ks_name, sstring view_name);
+    future<> handle_drop_view_global_cleanup(sstring ks_name, sstring view_name);

    template <typename Func1, typename Func2>
    future<> write_view_build_status(Func1&& fn_group0, Func2&& fn_sys_dist) {
--- a/db/view/view_building_worker.hh
+++ b/db/view/view_building_worker.hh
@@ -17,7 +17,7 @@
 #include "locator/abstract_replication_strategy.hh"
 #include "locator/tablets.hh"
 #include "raft/raft.hh"
-#include "seastar/core/gate.hh"
+#include <seastar/core/gate.hh>
 #include "db/view/view_building_state.hh"
 #include "sstables/shared_sstable.hh"
 #include "utils/UUID.hh"
--- a/db/view/view_update_generator.cc
+++ b/db/view/view_update_generator.cc
@@ -102,13 +102,13 @@ view_update_generator::view_update_generator(replica::database& db, sharded<serv
        , _early_abort_subscription(as.subscribe([this] () noexcept { do_abort(); }))
 {
    setup_metrics();
-    discover_staging_sstables();
    _db.plug_view_update_generator(*this);
 }

 view_update_generator::~view_update_generator() {}

 future<> view_update_generator::start() {
+    discover_staging_sstables();
    _started = seastar::async([this]() mutable {
        auto drop_sstable_references = defer([&] () noexcept {
            // Clear sstable references so sstables_manager::stop() doesn't hang.
--- a/db/virtual_tables.cc
+++ b/db/virtual_tables.cc
@@ -68,6 +68,8 @@ public:
            .with_column("peer", inet_addr_type, column_kind::partition_key)
            .with_column("dc", utf8_type)
            .with_column("up", boolean_type)
+            .with_column("draining", boolean_type)
+            .with_column("excluded", boolean_type)
            .with_column("status", utf8_type)
            .with_column("load", utf8_type)
            .with_column("tokens", int32_type)
@@ -107,8 +109,11 @@ public:

                if (tm.get_topology().has_node(hostid)) {
                    // Not all entries in gossiper are present in the topology
-                    sstring dc = tm.get_topology().get_location(hostid).dc;
+                    auto& node = tm.get_topology().get_node(hostid);
+                    sstring dc = node.dc_rack().dc;
                    set_cell(cr, "dc", dc);
+                    set_cell(cr, "draining", node.is_draining());
+                    set_cell(cr, "excluded", node.is_excluded());
                }

                if (ownership.contains(eps.get_ip())) {
@@ -1134,6 +1139,8 @@ public:
            set_cell(r.cells(), "dc", node.dc());
            set_cell(r.cells(), "rack", node.rack());
            set_cell(r.cells(), "up", _gossiper.local().is_alive(host));
+            set_cell(r.cells(), "draining", node.is_draining());
+            set_cell(r.cells(), "excluded", node.is_excluded());
            if (auto ip = _gossiper.local().get_address_map().find(host)) {
                set_cell(r.cells(), "ip", data_value(inet_address(*ip)));
            }
@@ -1144,6 +1151,9 @@ public:
            if (stats && stats->capacity.contains(host)) {
                auto capacity = stats->capacity.at(host);
                set_cell(r.cells(), "storage_capacity", data_value(int64_t(capacity)));
+                if (auto ts_iter = stats->tablet_stats.find(host); ts_iter != stats->tablet_stats.end()) {
+                    set_cell(r.cells(), "effective_capacity", data_value(int64_t(ts_iter->second.effective_capacity)));
+                }

                if (auto utilization = load.get_allocated_utilization(host)) {
                    set_cell(r.cells(), "storage_allocated_utilization", data_value(double(*utilization)));
@@ -1168,9 +1178,12 @@ private:
            .with_column("rack", utf8_type)
            .with_column("ip", inet_addr_type)
            .with_column("up", boolean_type)
+            .with_column("draining", boolean_type)
+            .with_column("excluded", boolean_type)
            .with_column("tablets_allocated", long_type)
            .with_column("tablets_allocated_per_shard", double_type)
            .with_column("storage_capacity", long_type)
+            .with_column("effective_capacity", long_type)
            .with_column("storage_allocated_load", long_type)
            .with_column("storage_allocated_utilization", double_type)
            .with_column("storage_load", long_type)
@@ -1332,8 +1345,8 @@ public:

 private:
    static schema_ptr build_schema() {
-        auto id = generate_legacy_id(system_keyspace::NAME, "cdc_timestamps");
-        return schema_builder(system_keyspace::NAME, "cdc_timestamps", std::make_optional(id))
+        auto id = generate_legacy_id(system_keyspace::NAME, system_keyspace::CDC_TIMESTAMPS);
+        return schema_builder(system_keyspace::NAME, system_keyspace::CDC_TIMESTAMPS, std::make_optional(id))
            .with_column("keyspace_name", utf8_type, column_kind::partition_key)
            .with_column("table_name", utf8_type, column_kind::partition_key)
            .with_column("timestamp", reversed_type_impl::get_instance(timestamp_type), column_kind::clustering_key)
@@ -1415,8 +1428,8 @@ public:
    }
 private:
    static schema_ptr build_schema() {
-        auto id = generate_legacy_id(system_keyspace::NAME, "cdc_streams");
-        return schema_builder(system_keyspace::NAME, "cdc_streams", std::make_optional(id))
+        auto id = generate_legacy_id(system_keyspace::NAME, system_keyspace::CDC_STREAMS);
+        return schema_builder(system_keyspace::NAME, system_keyspace::CDC_STREAMS, std::make_optional(id))
            .with_column("keyspace_name", utf8_type, column_kind::partition_key)
            .with_column("table_name", utf8_type, column_kind::partition_key)
            .with_column("timestamp", timestamp_type, column_kind::clustering_key)
@@ -1484,7 +1497,7 @@ future<> initialize_virtual_tables(
    co_await add_table(std::make_unique<cdc_streams_table>(db, ss));

    db.find_column_family(system_keyspace::size_estimates()).set_virtual_reader(mutation_source(db::size_estimates::virtual_reader(db, sys_ks.local())));
-    db.find_column_family(system_keyspace::v3::views_builds_in_progress()).set_virtual_reader(mutation_source(db::view::build_progress_virtual_reader(db)));
+    db.find_column_family(system_keyspace::views_builds_in_progress()).set_virtual_reader(mutation_source(db::view::build_progress_virtual_reader(db)));
    db.find_column_family(system_keyspace::built_indexes()).set_virtual_reader(mutation_source(db::index::built_indexes_virtual_reader(db)));
 }

--- a/dist/common/kernel_conf/scylla_tune_sched
+++ b/dist/common/kernel_conf/scylla_tune_sched
@@ -7,7 +7,6 @@
 # SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0

 import os
-import sys
 import errno
 import logging

--- a/dist/common/scripts/scylla-blocktune
+++ b/dist/common/scripts/scylla-blocktune
@@ -8,7 +8,7 @@
 # SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0

 import argparse
-from scylla_blocktune import *
+from scylla_blocktune import tune_yaml, tune_fs, tune_dev

 if __name__ == "__main__":
    ap = argparse.ArgumentParser('Tune filesystems for ScyllaDB')
--- a/dist/common/scripts/scylla_coredump_setup
+++ b/dist/common/scripts/scylla_coredump_setup
@@ -14,7 +14,9 @@ import subprocess
 import time
 import tempfile
 import shutil
-from scylla_util import *
+import re
+import distro
+from scylla_util import out, is_debian_variant, is_suse_variant, pkg_install, is_redhat_variant, systemd_unit, is_gentoo
 from subprocess import run


@@ -112,6 +114,10 @@ WantedBy=local-fs.target scylla-server.service
 '''[1:-1]
            with open('/etc/systemd/system/var-lib-systemd-coredump.mount', 'w') as f:
                f.write(dot_mount)
+            # in case we have old mounts in deleted state hanging around from older installation
+            # systemd doesn't seem to be able to deal with those properly, and assume they are still active
+            # and doesn't do anything about them
+            run('umount  /var/lib/systemd/coredump', shell=True, check=False)
            os.makedirs('/var/lib/scylla/coredump', exist_ok=True)
            systemd_unit.reload()
            systemd_unit('var-lib-systemd-coredump.mount').enable()
--- a/dist/common/scripts/scylla_cpuscaling_setup
+++ b/dist/common/scripts/scylla_cpuscaling_setup
@@ -10,9 +10,8 @@
 import os
 import sys
 import argparse
-import shlex
-import distro
-from scylla_util import *
+import shutil
+from scylla_util import is_debian_variant, pkg_install, systemd_unit, sysconfig_parser, is_gentoo, is_arch, is_amzn2, is_suse_variant, is_redhat_variant

 UNIT_DATA= '''
 [Unit]
--- a/Show More
+++ b/Show More