Enable prometheus_allow_protobuf by default

Change the prometheus_allow_protobuf configuration to true by default. This allows ScyllaDB server to serve Prometheus protobuf format (enables native histogram support) if asked so by the monitoring server. Update config help text/docs to reflect protobuf support (drop “experimental” wording). Add cluster tests to validate the default is enabled, can be overridden, and /metrics returns protobuf when requested via Accept header (and falls back to text when disabled). Fixes #27817 co-Author: mykaul <mykaul@scylladb.com> Signed-off-by: Amnon Heiman <amnon@scylladb.com>
2026-05-21 23:32:15 +00:00 · 2026-01-19 09:40:49 +02:00
524 changed files with 4550 additions and 18072 deletions
--- a/.github/workflows/add-label-when-promoted.yaml
+++ b/.github/workflows/add-label-when-promoted.yaml
@@ -10,9 +10,6 @@ on:
    types: [labeled, unlabeled]
    branches: [master, next, enterprise]

-env:
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
-
 jobs:
  check-commit:
    runs-on: ubuntu-latest
@@ -33,7 +30,7 @@ jobs:
            echo "DEFAULT_BRANCH=master" >> $GITHUB_ENV
          fi
      - name: Checkout repository
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@v4
        with:
          repository: ${{ github.repository }}
          ref: ${{ env.DEFAULT_BRANCH }}
--- a/.github/workflows/backport-pr-fixes-validation.yaml
+++ b/.github/workflows/backport-pr-fixes-validation.yaml
@@ -5,15 +5,12 @@ on:
    types: [opened, reopened, edited]
    branches: [branch-*]

-env:
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
-
 jobs:
  check-fixes-prefix:
    runs-on: ubuntu-latest
    steps:
      - name: Check PR body for "Fixes" prefix patterns
-        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+        uses: actions/github-script@v7
        with:
          script: |
            const body = context.payload.pull_request.body;
@@ -21,7 +18,7 @@ jobs:
            
            // Regular expression pattern to check for "Fixes" prefix
            // Adjusted to dynamically insert the repository full name
-            const pattern = `Fixes:? ((?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)|(?:https://scylladb\\.atlassian\\.net/browse/)?([A-Z]+-\\d+))`;
+            const pattern = `Fixes:? ((?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)|([A-Z]+-\\d+))`;
            const regex = new RegExp(pattern);
            
            if (!regex.test(body)) {
--- a/.github/workflows/build-scylla.yaml
+++ b/.github/workflows/build-scylla.yaml
@@ -12,9 +12,6 @@ on:
        description: 'the md5sum for scylla executable'
        value: ${{ jobs.build.outputs.md5sum }}

-env:
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
-
 jobs:
  read-toolchain:
    uses: ./.github/workflows/read-toolchain.yaml
@@ -27,7 +24,7 @@ jobs:
    outputs:
      md5sum: ${{ steps.checksum.outputs.md5sum }}
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Generate the building system
--- a/.github/workflows/call_backport_with_jira.yaml
+++ b/.github/workflows/call_backport_with_jira.yaml
@@ -1,53 +0,0 @@
-name: Backport with Jira Integration
-
-on:
-  push:
-    branches:
-      - master
-      - next-*.*
-      - branch-*.*
-  pull_request_target:
-    types: [labeled, closed]
-    branches: 
-      - master
-      - next
-      - next-*.*
-      - branch-*.*
-
-jobs:
-  backport-on-push:
-    if: github.event_name == 'push'
-    uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
-    with:
-      event_type: 'push'
-      base_branch: ${{ github.ref }}
-      commits: ${{ github.event.before }}..${{ github.sha }}
-    secrets:
-      gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
-      jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
-
-  backport-on-label:
-    if: github.event_name == 'pull_request_target' && github.event.action == 'labeled'
-    uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
-    with:
-      event_type: 'labeled'
-      base_branch: refs/heads/${{ github.event.pull_request.base.ref }}
-      pull_request_number: ${{ github.event.pull_request.number }}
-      head_commit: ${{ github.event.pull_request.base.sha }}
-      label_name: ${{ github.event.label.name }}
-      pr_state: ${{ github.event.pull_request.state }}
-    secrets:
-      gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
-      jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
-
-  backport-chain:
-    if: github.event_name == 'pull_request_target' && github.event.action == 'closed' && github.event.pull_request.merged == true
-    uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
-    with:
-      event_type: 'chain'
-      base_branch: refs/heads/${{ github.event.pull_request.base.ref }}
-      pull_request_number: ${{ github.event.pull_request.number }}
-      pr_body: ${{ github.event.pull_request.body }}
-    secrets:
-      gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
-      jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
--- a/.github/workflows/check-license-header.yaml
+++ b/.github/workflows/check-license-header.yaml
@@ -9,7 +9,6 @@ env:
  HEADER_CHECK_LINES: 10
  LICENSE: "LicenseRef-ScyllaDB-Source-Available-1.0"
  CHECKED_EXTENSIONS: ".cc .hh .py"
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true

 jobs:
  check-license-headers:
@@ -20,7 +19,7 @@ jobs:

    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -41,7 +40,7 @@ jobs:

      - name: Comment on PR if check fails
        if: failure()
-        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+        uses: actions/github-script@v7
        with:
          script: |
            const license = '${{ env.LICENSE }}';
--- a/.github/workflows/clang-nightly.yaml
+++ b/.github/workflows/clang-nightly.yaml
@@ -9,7 +9,6 @@ env:
  # use the development branch explicitly
  CLANG_VERSION: 21
  BUILD_DIR: build
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true

 permissions: {}

@@ -33,7 +32,7 @@ jobs:
    steps:
      - run: |
          sudo dnf -y install git
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@v4
        with:
          submodules: true
      - name: Install build dependencies
--- a/.github/workflows/clang-tidy.yaml
+++ b/.github/workflows/clang-tidy.yaml
@@ -18,7 +18,6 @@ env:
  BUILD_TYPE: RelWithDebInfo
  BUILD_DIR: build
  CLANG_TIDY_CHECKS: '-*,bugprone-use-after-move'
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true

 permissions: {}

@@ -43,7 +42,7 @@ jobs:
          IMAGE: ${{ needs.read-toolchain.image }}
        run: |
          echo ${{ needs.read-toolchain.image }}
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@v4
        with:
          submodules: true
      - run: |
--- a/.github/workflows/codespell.yaml
+++ b/.github/workflows/codespell.yaml
@@ -4,15 +4,13 @@ on:
    branches:
      - master
 permissions: {}
-env:
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
 jobs:
  codespell:
    name: Check for spelling errors
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-      - uses: codespell-project/actions-codespell@8f01853be192eb0f849a5c7d721450e7a467c579 # v2.2
+      - uses: actions/checkout@v4
+      - uses: codespell-project/actions-codespell@master
        with:
          only_warn: 1
          ignore_words_list: "ans,datas,fo,ser,ue,crate,nd,reenable,strat,stap,te,raison,iif,tread"
--- a/.github/workflows/conflict_reminder.yaml
+++ b/.github/workflows/conflict_reminder.yaml
@@ -12,16 +12,13 @@ on:
  schedule:
    - cron: '0 10 * * 1'  # Runs every Monday at 10:00am

-env:
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
-
 jobs:
  notify_conflict_prs:
    runs-on: ubuntu-latest

    steps:
      - name: Notify PR Authors of Conflicts
-        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+        uses: actions/github-script@v7
        with:
          script: |
            console.log("Starting conflict reminder script...");
--- a/.github/workflows/differential-shellcheck.yaml
+++ b/.github/workflows/differential-shellcheck.yaml
@@ -13,9 +13,6 @@ on:
 permissions:
  contents: read

-env:
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
-
 jobs:
  lint:
    runs-on: ubuntu-latest
@@ -24,12 +21,12 @@ jobs:
      security-events: write

    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Differential ShellCheck
-        uses: redhat-plumbers-in-action/differential-shellcheck@d965e66ec0b3b2f821f75c8eff9b12442d9a7d1e # v5.5.6
+        uses: redhat-plumbers-in-action/differential-shellcheck@v5
        with:
          severity: warning
          token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/docs-pages.yaml
+++ b/.github/workflows/docs-pages.yaml
@@ -5,7 +5,6 @@ name: "Docs / Publish"
 env:
  FLAG: ${{ github.repository == 'scylladb/scylla-enterprise' && 'enterprise' || 'opensource' }}
  DEFAULT_BRANCH: ${{ github.repository == 'scylladb/scylla-enterprise' && 'enterprise' || 'master' }}
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true

 on:
  push:
@@ -24,13 +23,13 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@v4
        with:
          ref: ${{ env.DEFAULT_BRANCH }}
          persist-credentials: false
          fetch-depth: 0
      - name: Set up Python
-        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+        uses: actions/setup-python@v5
        with:
          python-version: "3.10"
      - name: Set up env
--- a/.github/workflows/docs-pr.yaml
+++ b/.github/workflows/docs-pr.yaml
@@ -7,7 +7,6 @@ permissions:

 env:
  FLAG: ${{ github.repository == 'scylladb/scylla-enterprise' && 'enterprise' || 'opensource' }}
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true

 on:
  pull_request:
@@ -23,12 +22,12 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@v4
        with:
          persist-credentials: false
          fetch-depth: 0
      - name: Set up Python
-        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        uses: actions/setup-python@v5
        with:
          python-version: "3.10"
      - name: Set up env
--- a/.github/workflows/docs-validate-metrics.yml
+++ b/.github/workflows/docs-validate-metrics.yml
@@ -3,9 +3,6 @@ name: Docs / Validate metrics
 permissions:
  contents: read

-env:
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
-
 on:
  pull_request:
    branches:
@@ -24,12 +21,12 @@ jobs:

    steps:
    - name: Checkout code
-      uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      uses: actions/checkout@v4
      with:
        submodules: true

    - name: Set up Python
-      uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+      uses: actions/setup-python@v6
      with:
        python-version: '3.10'

--- a/.github/workflows/iwyu.yaml
+++ b/.github/workflows/iwyu.yaml
@@ -13,7 +13,6 @@ env:
  # supposed to be processed by idl-compiler.py, so we don't check them using the cleaner
  CLEANER_DIRS: test/unit exceptions alternator api auth cdc compaction db dht gms index lang message mutation mutation_writer node_ops raft redis replica service
  SEASTAR_BAD_INCLUDE_OUTPUT_PATH: build/seastar-bad-include.log
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true

 permissions: {}

@@ -32,7 +31,7 @@ jobs:
    runs-on: ubuntu-latest
    container: ${{ needs.read-toolchain.outputs.image }}
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@v4
        with:
          submodules: true
      - run: |
@@ -91,7 +90,7 @@ jobs:
            | tee "$SEASTAR_BAD_INCLUDE_OUTPUT_PATH"
      - run: |
          echo "::remove-matcher owner=seastar-bad-include::"
-      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+      - uses: actions/upload-artifact@v4
        with:
          name: Logs
          path: |
--- a/.github/workflows/make-pr-ready-for-review.yaml
+++ b/.github/workflows/make-pr-ready-for-review.yaml
@@ -7,7 +7,6 @@ on:

 env:
  DEFAULT_BRANCH: 'master'
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true

 jobs:
  mark-ready:
@@ -18,7 +17,7 @@ jobs:

    steps:
      - name: Checkout repository
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@v4
        with:
          repository: ${{ github.repository }}
          ref: ${{ env.DEFAULT_BRANCH }}
--- a/.github/workflows/pr-require-backport-label.yaml
+++ b/.github/workflows/pr-require-backport-label.yaml
@@ -5,8 +5,6 @@ on:
    branches:
      - master
      - next
-env:
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
 jobs:
  label:
    if: github.event.pull_request.draft == false
@@ -17,7 +15,7 @@ jobs:
    steps:
      - name: Wait for label to be added
        run: sleep 1m
-      - uses: mheap/github-action-required-labels@0ac283b4e65c1fb28ce6079dea5546ceca98ccbe # v5.5.2
+      - uses: mheap/github-action-required-labels@v5
        with:
          mode: minimum
          count: 1
--- a/.github/workflows/read-toolchain.yaml
+++ b/.github/workflows/read-toolchain.yaml
@@ -7,9 +7,6 @@ on:
        description: "the toolchain docker image"
        value: ${{ jobs.read-toolchain.outputs.image }}

-env:
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
-
 jobs:
  read-toolchain:
    runs-on: ubuntu-latest
@@ -18,7 +15,7 @@ jobs:
    outputs:
      image: ${{ steps.read.outputs.image }}
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@v4
        with:
          sparse-checkout: tools/toolchain/image
          sparse-checkout-cone-mode: false
--- a/.github/workflows/seastar.yaml
+++ b/.github/workflows/seastar.yaml
@@ -13,7 +13,6 @@ concurrency:

 env:
  BUILD_DIR: build
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true

 jobs:
  read-toolchain:
@@ -30,12 +29,12 @@ jobs:
          - RelWithDebInfo
          - Dev
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@v4
        with:
          submodules: true
      - run: |
          rm -rf seastar
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@v4
        with:
          repository: scylladb/seastar
          submodules: true
--- a/.github/workflows/sync-labels.yaml
+++ b/.github/workflows/sync-labels.yaml
@@ -7,9 +7,6 @@ on:
  issues:
    types: [labeled, unlabeled]

-env:
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
-
 jobs:
  label-sync:
    if: ${{ github.repository == 'scylladb/scylladb' }}
@@ -24,7 +21,7 @@ jobs:
          GITHUB_CONTEXT: ${{ toJson(github) }}
        run: echo "$GITHUB_CONTEXT"
      - name: Checkout repository
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@v4
        with:
          sparse-checkout: |
            .github/scripts/sync_labels.py
--- a/.github/workflows/trigger-scylla-ci.yaml
+++ b/.github/workflows/trigger-scylla-ci.yaml
@@ -9,57 +9,16 @@ on:

 jobs:
  trigger-jenkins:
-    if: (github.event_name == 'issue_comment' && github.event.comment.user.login != 'scylladbbot') || github.event.label.name == 'conflicts'
+    if: (github.event.comment.user.login != 'scylladbbot' && contains(github.event.comment.body, '@scylladbbot') && contains(github.event.comment.body, 'trigger-ci')) || github.event.label.name == 'conflicts'
    runs-on: ubuntu-latest
    steps:
-      - name: Verify Org Membership
-        id: verify_author
-        env:
-          EVENT_NAME: ${{ github.event_name }}
-          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
-          PR_ASSOCIATION: ${{ github.event.pull_request.author_association }}
-          COMMENT_AUTHOR: ${{ github.event.comment.user.login }}
-          COMMENT_ASSOCIATION: ${{ github.event.comment.author_association }}
-        shell: bash
-        run: |
-          if [[ "$EVENT_NAME" == "pull_request_target" ]]; then
-            AUTHOR="$PR_AUTHOR"
-            ASSOCIATION="$PR_ASSOCIATION"
-          else
-            AUTHOR="$COMMENT_AUTHOR"
-            ASSOCIATION="$COMMENT_ASSOCIATION"
-          fi
-          ORG="scylladb"
-          if gh api "/orgs/${ORG}/members/${AUTHOR}" --silent 2>/dev/null; then
-            echo "member=true" >> $GITHUB_OUTPUT
-          else
-            echo "::warning::${AUTHOR} is not a member of ${ORG}; skipping CI trigger."
-            echo "member=false" >> $GITHUB_OUTPUT
-          fi
-
-      - name: Validate Comment Trigger
-        if: github.event_name == 'issue_comment'
-        id: verify_comment
-        env:
-          COMMENT_BODY: ${{ github.event.comment.body }}
-        shell: bash
-        run: |
-          CLEAN_BODY=$(echo "$COMMENT_BODY" | grep -v '^[[:space:]]*>')
-
-          if echo "$CLEAN_BODY" | grep -qi '@scylladbbot' && echo "$CLEAN_BODY" | grep -qi 'trigger-ci'; then
-            echo "trigger=true" >> $GITHUB_OUTPUT
-          else
-            echo "trigger=false" >> $GITHUB_OUTPUT
-          fi
-
      - name: Trigger Scylla-CI-Route Jenkins Job
-        if: steps.verify_author.outputs.member == 'true' && (github.event_name == 'pull_request_target' || steps.verify_comment.outputs.trigger == 'true')
        env:
          JENKINS_USER: ${{ secrets.JENKINS_USERNAME }}
          JENKINS_API_TOKEN: ${{ secrets.JENKINS_TOKEN }}
          JENKINS_URL: "https://jenkins.scylladb.com"
-          PR_NUMBER: "${{ github.event.issue.number || github.event.pull_request.number }}"
-          PR_REPO_NAME: "${{ github.event.repository.full_name }}"
        run: |
+          PR_NUMBER=${{ github.event.issue.number }}
+          PR_REPO_NAME=${{ github.event.repository.full_name }}
          curl -X POST "$JENKINS_URL/job/releng/job/Scylla-CI-Route/buildWithParameters?PR_NUMBER=$PR_NUMBER&PR_REPO_NAME=$PR_REPO_NAME" \
-            --user "$JENKINS_USER:$JENKINS_API_TOKEN" --fail
+          --user "$JENKINS_USER:$JENKINS_API_TOKEN" --fail -i -v
--- a/.github/workflows/trigger_ci.yaml
+++ b/.github/workflows/trigger_ci.yaml
@@ -5,10 +5,7 @@ on:
    types: [opened, reopened, synchronize]
  issue_comment:
    types: [created]
-
-env:
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
-
+    
 jobs:
  trigger-ci:
    runs-on: ubuntu-latest
@@ -18,7 +15,7 @@ jobs:
          GITHUB_CONTEXT: ${{ toJson(github) }}
        run: echo "$GITHUB_CONTEXT"
      - name: Checkout PR code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@v3
        with:
          fetch-depth: 0  # Needed to access full history
          ref: ${{ github.event.pull_request.head.ref }}
--- a/.github/workflows/urgent_issue_reminder.yml
+++ b/.github/workflows/urgent_issue_reminder.yml
@@ -4,16 +4,13 @@ on:
  schedule:
    - cron: '10 8 * * *' # Runs daily at 8 AM

-env:
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
-
 jobs:
  reminder:
    runs-on: ubuntu-latest

    steps:
    - name: Send reminders
-      uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
+      uses: actions/github-script@v7
      with:
        script: |
          const labelFilters = ['P0', 'P1', 'Field-Tier1','status/release blocker', 'status/regression']; 
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "seastar"]
 	path = seastar
-	url = ../scylla-seastar
+	url = ../seastar
 	ignore = dirty
 [submodule "swagger-ui"]
 	path = swagger-ui
--- a/2
+++ b/2
@@ -78,7 +78,7 @@ fi

 # Default scylla product/version tags
 PRODUCT=scylla
-VERSION=2026.1.3
+VERSION=2026.1.0-dev

 if test -f version
 then
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
@@ -17,7 +17,6 @@
 #include "auth/service.hh"
 #include "db/config.hh"
 #include "db/view/view_build_status.hh"
-#include "locator/tablets.hh"
 #include "mutation/tombstone.hh"
 #include "locator/abstract_replication_strategy.hh"
 #include "utils/log.hh"
@@ -1876,34 +1875,23 @@ future<executor::request_return_type> executor::create_table_on_shard0(service::
        auto ts = group0_guard.write_timestamp();
        utils::chunked_vector<mutation> schema_mutations;
        auto ksm = create_keyspace_metadata(keyspace_name, _proxy, _gossiper, ts, tags_map, _proxy.features(), tablets_mode);
-        locator::replication_strategy_params params(ksm->strategy_options(), ksm->initial_tablets(), ksm->consistency_option());
-        const auto& topo = _proxy.local_db().get_token_metadata().get_topology();
-        auto rs = locator::abstract_replication_strategy::create_replication_strategy(ksm->strategy_name(), params, topo);
        // Alternator Streams doesn't yet work when the table uses tablets (#23838)
        if (stream_specification && stream_specification->IsObject()) {
            auto stream_enabled = rjson::find(*stream_specification, "StreamEnabled");
            if (stream_enabled && stream_enabled->IsBool() && stream_enabled->GetBool()) {
+                locator::replication_strategy_params params(ksm->strategy_options(), ksm->initial_tablets(), ksm->consistency_option());
+                const auto& topo = _proxy.local_db().get_token_metadata().get_topology();
+                auto rs = locator::abstract_replication_strategy::create_replication_strategy(ksm->strategy_name(), params, topo);
                if (rs->uses_tablets()) {
                    co_return api_error::validation("Streams not yet supported on a table using tablets (issue #23838). "
                    "If you want to use streams, create a table with vnodes by setting the tag 'system:initial_tablets' set to 'none'.");
                }
            }
        }
-        // Creating an index in tablets mode requires the keyspace to be RF-rack-valid.
-        // GSI and LSI indexes are based on materialized views which require RF-rack-validity to avoid consistency issues.
-        if (!view_builders.empty() || _proxy.data_dictionary().get_config().rf_rack_valid_keyspaces()) {
-            try {
-                locator::assert_rf_rack_valid_keyspace(keyspace_name, _proxy.local_db().get_token_metadata_ptr(), *rs);
-            } catch (const std::invalid_argument& ex) {
-                if (!view_builders.empty()) {
-                    co_return api_error::validation(fmt::format("GlobalSecondaryIndexes and LocalSecondaryIndexes on a table "
-                        "using tablets require the number of racks in the cluster to be either 1 or 3"));
-                } else {
-                    co_return api_error::validation(fmt::format("Cannot create table '{}' with tablets: the configuration "
-                        "option 'rf_rack_valid_keyspaces' is enabled, which enforces that tables using tablets can only be created in clusters "
-                        "that have either 1 or 3 racks", table_name));
-                }
-            }
+        // Creating an index in tablets mode requires the rf_rack_valid_keyspaces option to be enabled.
+        // GSI and LSI indexes are based on materialized views which require this option to avoid consistency issues.
+        if (!view_builders.empty() && ksm->uses_tablets() && !_proxy.data_dictionary().get_config().rf_rack_valid_keyspaces()) {
+            co_return api_error::validation("GlobalSecondaryIndexes and LocalSecondaryIndexes with tablets require the rf_rack_valid_keyspaces option to be enabled.");
        }
        try {
            schema_mutations = service::prepare_new_keyspace_announcement(_proxy.local_db(), ksm, ts);
@@ -2126,12 +2114,9 @@ future<executor::request_return_type> executor::update_table(client_state& clien
                            co_return api_error::validation(fmt::format(
                                "LSI {} already exists in table {}, can't use same name for GSI", index_name, table_name));
                        }
-                        try {
-                            locator::assert_rf_rack_valid_keyspace(keyspace_name, p.local().local_db().get_token_metadata_ptr(),
-                                    p.local().local_db().find_keyspace(keyspace_name).get_replication_strategy());
-                        } catch (const std::invalid_argument& ex) {
-                            co_return api_error::validation(fmt::format("GlobalSecondaryIndexes on a table "
-                                "using tablets require the number of racks in the cluster to be either 1 or 3"));
+                        if (p.local().local_db().find_keyspace(keyspace_name).get_replication_strategy().uses_tablets() &&
+                                !p.local().data_dictionary().get_config().rf_rack_valid_keyspaces()) {
+                            co_return api_error::validation("GlobalSecondaryIndexes with tablets require the rf_rack_valid_keyspaces option to be enabled.");
                        }

                        elogger.trace("Adding GSI {}", index_name);
@@ -3464,11 +3449,7 @@ future<executor::request_return_type> executor::batch_write_item(client_state& c
    if (should_add_wcu) {
        rjson::add(ret, "ConsumedCapacity", std::move(consumed_capacity));
    }
-    auto duration = std::chrono::steady_clock::now() - start_time;
-    _stats.api_operations.batch_write_item_latency.mark(duration);
-    for (const auto& w : per_table_wcu) {
-        w.first->api_operations.batch_write_item_latency.mark(duration);
-    }
+    _stats.api_operations.batch_write_item_latency.mark(std::chrono::steady_clock::now() - start_time);
    co_return rjson::print(std::move(ret));
 }

@@ -4979,12 +4960,7 @@ future<executor::request_return_type> executor::batch_get_item(client_state& cli
    if (!some_succeeded && eptr) {
        co_await coroutine::return_exception_ptr(std::move(eptr));
    }
-    auto duration = std::chrono::steady_clock::now() - start_time;
-    _stats.api_operations.batch_get_item_latency.mark(duration);
-    for (const table_requests& rs : requests) {
-        lw_shared_ptr<stats> per_table_stats = get_stats_from_schema(_proxy, *rs.schema);
-        per_table_stats->api_operations.batch_get_item_latency.mark(duration);
-    }
+    _stats.api_operations.batch_get_item_latency.mark(std::chrono::steady_clock::now() - start_time);
    if (is_big(response)) {
        co_return make_streamed(std::move(response));
    } else {
--- a/alternator/ttl.cc
+++ b/alternator/ttl.cc
@@ -767,7 +767,7 @@ static future<bool> scan_table(
                // by tasking another node to take over scanning of the dead node's primary
                // ranges. What we do here is that this node will also check expiration
                // on its *secondary* ranges - but only those whose primary owner is down.
-                auto tablet_secondary_replica = tablet_map.get_secondary_replica(*tablet, erm->get_topology()); // throws if no secondary replica
+                auto tablet_secondary_replica = tablet_map.get_secondary_replica(*tablet); // throws if no secondary replica
                if (tablet_secondary_replica.host == my_host_id && tablet_secondary_replica.shard == this_shard_id()) {
                    if (!gossiper.is_alive(tablet_primary_replica.host)) {
                        co_await scan_tablet(*tablet, proxy, abort_source, page_sem, expiration_stats, scan_ctx, tablet_map);
--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -3051,7 +3051,7 @@
                  },
                  {
                     "name":"incremental_mode",
-                     "description":"Set the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to incremental mode.",
+                     "description":"Set the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to 'disabled' mode.",
                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -515,15 +515,6 @@ void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>&
        auto sstables = parsed.GetArray() |
            std::views::transform([] (const auto& s) { return sstring(rjson::to_string_view(s)); }) |
            std::ranges::to<std::vector>();
-        apilog.info("Restore invoked with following parameters: keyspace={}, table={}, endpoint={}, bucket={}, prefix={}, sstables_count={}, scope={}, primary_replica_only={}",
-                    keyspace,
-                    table,
-                    endpoint,
-                    bucket,
-                    prefix,
-                    sstables.size(),
-                    scope,
-                    primary_replica_only);
        auto task_id = co_await sst_loader.local().download_new_sstables(keyspace, table, prefix, std::move(sstables), endpoint, bucket, scope, primary_replica_only);
        co_return json::json_return_type(fmt::to_string(task_id));
    });
@@ -893,9 +884,7 @@ rest_exclude_node(sharded<service::storage_service>& ss, std::unique_ptr<http::r
    }

    apilog.info("exclude_node: hosts={}", hosts);
-    co_await ss.local().run_with_no_api_lock([hosts = std::move(hosts)] (service::storage_service& ss) {
-        return ss.mark_excluded(hosts);
-    });
+    co_await ss.local().mark_excluded(hosts);
    co_return json_void();
 }

@@ -1805,100 +1794,84 @@ rest_bind(FuncType func, BindArgs&... args) {
    return std::bind_front(func, std::ref(args)...);
 }

-// Hold the storage_service async gate for the duration of async REST
-// handlers so stop() drains in-flight requests before teardown.
-// Synchronous handlers don't yield and need no gate.
-static seastar::httpd::future_json_function
-gated(sharded<service::storage_service>& ss, seastar::httpd::future_json_function fn) {
-    return [fn = std::move(fn), &ss](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
-        auto holder = ss.local().hold_async_gate();
-        co_return co_await fn(std::move(req));
-    };
-}
-
-static seastar::httpd::json_request_function
-gated(sharded<service::storage_service>&, seastar::httpd::json_request_function fn) {
-    return fn;
-}
-
 void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_service>& ss, service::raft_group0_client& group0_client) {
-    ss::get_token_endpoint.set(r, gated(ss, rest_bind(rest_get_token_endpoint, ctx, ss)));
-    ss::toppartitions_generic.set(r, gated(ss, rest_bind(rest_toppartitions_generic, ctx)));
-    ss::get_release_version.set(r, gated(ss, rest_bind(rest_get_release_version, ss)));
-    ss::get_scylla_release_version.set(r, gated(ss, rest_bind(rest_get_scylla_release_version, ss)));
-    ss::get_schema_version.set(r, gated(ss, rest_bind(rest_get_schema_version, ss)));
-    ss::get_range_to_endpoint_map.set(r, gated(ss, rest_bind(rest_get_range_to_endpoint_map, ctx, ss)));
-    ss::get_pending_range_to_endpoint_map.set(r, gated(ss, rest_bind(rest_get_pending_range_to_endpoint_map, ctx)));
-    ss::describe_ring.set(r, gated(ss, rest_bind(rest_describe_ring, ctx, ss)));
-    ss::get_current_generation_number.set(r, gated(ss, rest_bind(rest_get_current_generation_number, ss)));
-    ss::get_natural_endpoints.set(r, gated(ss, rest_bind(rest_get_natural_endpoints, ctx, ss)));
-    ss::get_natural_endpoints_v2.set(r, gated(ss, rest_bind(rest_get_natural_endpoints_v2, ctx, ss)));
-    ss::cdc_streams_check_and_repair.set(r, gated(ss, rest_bind(rest_cdc_streams_check_and_repair, ss)));
-    ss::cleanup_all.set(r, gated(ss, rest_bind(rest_cleanup_all, ctx, ss)));
-    ss::reset_cleanup_needed.set(r, gated(ss, rest_bind(rest_reset_cleanup_needed, ctx, ss)));
-    ss::force_flush.set(r, gated(ss, rest_bind(rest_force_flush, ctx)));
-    ss::force_keyspace_flush.set(r, gated(ss, rest_bind(rest_force_keyspace_flush, ctx)));
-    ss::decommission.set(r, gated(ss, rest_bind(rest_decommission, ss)));
-    ss::move.set(r, gated(ss, rest_bind(rest_move, ss)));
-    ss::remove_node.set(r, gated(ss, rest_bind(rest_remove_node, ss)));
-    ss::exclude_node.set(r, gated(ss, rest_bind(rest_exclude_node, ss)));
-    ss::get_removal_status.set(r, gated(ss, rest_bind(rest_get_removal_status, ss)));
-    ss::force_remove_completion.set(r, gated(ss, rest_bind(rest_force_remove_completion, ss)));
-    ss::set_logging_level.set(r, gated(ss, rest_bind(rest_set_logging_level)));
-    ss::get_logging_levels.set(r, gated(ss, rest_bind(rest_get_logging_levels)));
-    ss::get_operation_mode.set(r, gated(ss, rest_bind(rest_get_operation_mode, ss)));
-    ss::is_starting.set(r, gated(ss, rest_bind(rest_is_starting, ss)));
-    ss::get_drain_progress.set(r, gated(ss, rest_bind(rest_get_drain_progress, ss)));
-    ss::drain.set(r, gated(ss, rest_bind(rest_drain, ss)));
-    ss::stop_gossiping.set(r, gated(ss, rest_bind(rest_stop_gossiping, ss)));
-    ss::start_gossiping.set(r, gated(ss, rest_bind(rest_start_gossiping, ss)));
-    ss::is_gossip_running.set(r, gated(ss, rest_bind(rest_is_gossip_running, ss)));
-    ss::stop_daemon.set(r, gated(ss, rest_bind(rest_stop_daemon)));
-    ss::is_initialized.set(r, gated(ss, rest_bind(rest_is_initialized, ss)));
-    ss::join_ring.set(r, gated(ss, rest_bind(rest_join_ring)));
-    ss::is_joined.set(r, gated(ss, rest_bind(rest_is_joined, ss)));
-    ss::is_incremental_backups_enabled.set(r, gated(ss, rest_bind(rest_is_incremental_backups_enabled, ctx)));
-    ss::set_incremental_backups_enabled.set(r, gated(ss, rest_bind(rest_set_incremental_backups_enabled, ctx)));
-    ss::rebuild.set(r, gated(ss, rest_bind(rest_rebuild, ss)));
-    ss::bulk_load.set(r, gated(ss, rest_bind(rest_bulk_load)));
-    ss::bulk_load_async.set(r, gated(ss, rest_bind(rest_bulk_load_async)));
-    ss::reschedule_failed_deletions.set(r, gated(ss, rest_bind(rest_reschedule_failed_deletions)));
-    ss::sample_key_range.set(r, gated(ss, rest_bind(rest_sample_key_range)));
-    ss::reset_local_schema.set(r, gated(ss, rest_bind(rest_reset_local_schema, ss)));
-    ss::set_trace_probability.set(r, gated(ss, rest_bind(rest_set_trace_probability)));
-    ss::get_trace_probability.set(r, gated(ss, rest_bind(rest_get_trace_probability)));
-    ss::get_slow_query_info.set(r, gated(ss, rest_bind(rest_get_slow_query_info)));
-    ss::set_slow_query.set(r, gated(ss, rest_bind(rest_set_slow_query)));
-    ss::deliver_hints.set(r, gated(ss, rest_bind(rest_deliver_hints)));
-    ss::get_cluster_name.set(r, gated(ss, rest_bind(rest_get_cluster_name, ss)));
-    ss::get_partitioner_name.set(r, gated(ss, rest_bind(rest_get_partitioner_name, ss)));
-    ss::get_tombstone_warn_threshold.set(r, gated(ss, rest_bind(rest_get_tombstone_warn_threshold)));
-    ss::set_tombstone_warn_threshold.set(r, gated(ss, rest_bind(rest_set_tombstone_warn_threshold)));
-    ss::get_tombstone_failure_threshold.set(r, gated(ss, rest_bind(rest_get_tombstone_failure_threshold)));
-    ss::set_tombstone_failure_threshold.set(r, gated(ss, rest_bind(rest_set_tombstone_failure_threshold)));
-    ss::get_batch_size_failure_threshold.set(r, gated(ss, rest_bind(rest_get_batch_size_failure_threshold)));
-    ss::set_batch_size_failure_threshold.set(r, gated(ss, rest_bind(rest_set_batch_size_failure_threshold)));
-    ss::set_hinted_handoff_throttle_in_kb.set(r, gated(ss, rest_bind(rest_set_hinted_handoff_throttle_in_kb)));
-    ss::get_exceptions.set(r, gated(ss, rest_bind(rest_get_exceptions, ss)));
-    ss::get_total_hints_in_progress.set(r, gated(ss, rest_bind(rest_get_total_hints_in_progress)));
-    ss::get_total_hints.set(r, gated(ss, rest_bind(rest_get_total_hints)));
-    ss::get_ownership.set(r, gated(ss, rest_bind(rest_get_ownership, ctx, ss)));
-    ss::get_effective_ownership.set(r, gated(ss, rest_bind(rest_get_effective_ownership, ctx, ss)));
-    ss::retrain_dict.set(r, gated(ss, rest_bind(rest_retrain_dict, ctx, ss, group0_client)));
-    ss::estimate_compression_ratios.set(r, gated(ss, rest_bind(rest_estimate_compression_ratios, ctx, ss)));
-    ss::sstable_info.set(r, gated(ss, rest_bind(rest_sstable_info, ctx)));
-    ss::reload_raft_topology_state.set(r, gated(ss, rest_bind(rest_reload_raft_topology_state, ss, group0_client)));
-    ss::upgrade_to_raft_topology.set(r, gated(ss, rest_bind(rest_upgrade_to_raft_topology, ss)));
-    ss::raft_topology_upgrade_status.set(r, gated(ss, rest_bind(rest_raft_topology_upgrade_status, ss)));
-    ss::raft_topology_get_cmd_status.set(r, gated(ss, rest_bind(rest_raft_topology_get_cmd_status, ss)));
-    ss::move_tablet.set(r, gated(ss, rest_bind(rest_move_tablet, ctx, ss)));
-    ss::add_tablet_replica.set(r, gated(ss, rest_bind(rest_add_tablet_replica, ctx, ss)));
-    ss::del_tablet_replica.set(r, gated(ss, rest_bind(rest_del_tablet_replica, ctx, ss)));
-    ss::repair_tablet.set(r, gated(ss, rest_bind(rest_repair_tablet, ctx, ss)));
-    ss::tablet_balancing_enable.set(r, gated(ss, rest_bind(rest_tablet_balancing_enable, ss)));
-    ss::quiesce_topology.set(r, gated(ss, rest_bind(rest_quiesce_topology, ss)));
-    sp::get_schema_versions.set(r, gated(ss, rest_bind(rest_get_schema_versions, ss)));
-    ss::drop_quarantined_sstables.set(r, gated(ss, rest_bind(rest_drop_quarantined_sstables, ctx, ss)));
+    ss::get_token_endpoint.set(r, rest_bind(rest_get_token_endpoint, ctx, ss));
+    ss::toppartitions_generic.set(r, rest_bind(rest_toppartitions_generic, ctx));
+    ss::get_release_version.set(r, rest_bind(rest_get_release_version, ss));
+    ss::get_scylla_release_version.set(r, rest_bind(rest_get_scylla_release_version, ss));
+    ss::get_schema_version.set(r, rest_bind(rest_get_schema_version, ss));
+    ss::get_range_to_endpoint_map.set(r, rest_bind(rest_get_range_to_endpoint_map, ctx, ss));
+    ss::get_pending_range_to_endpoint_map.set(r, rest_bind(rest_get_pending_range_to_endpoint_map, ctx));
+    ss::describe_ring.set(r, rest_bind(rest_describe_ring, ctx, ss));
+    ss::get_current_generation_number.set(r, rest_bind(rest_get_current_generation_number, ss));
+    ss::get_natural_endpoints.set(r, rest_bind(rest_get_natural_endpoints, ctx, ss));
+    ss::get_natural_endpoints_v2.set(r, rest_bind(rest_get_natural_endpoints_v2, ctx, ss));
+    ss::cdc_streams_check_and_repair.set(r, rest_bind(rest_cdc_streams_check_and_repair, ss));
+    ss::cleanup_all.set(r, rest_bind(rest_cleanup_all, ctx, ss));
+    ss::reset_cleanup_needed.set(r, rest_bind(rest_reset_cleanup_needed, ctx, ss));
+    ss::force_flush.set(r, rest_bind(rest_force_flush, ctx));
+    ss::force_keyspace_flush.set(r, rest_bind(rest_force_keyspace_flush, ctx));
+    ss::decommission.set(r, rest_bind(rest_decommission, ss));
+    ss::move.set(r, rest_bind(rest_move, ss));
+    ss::remove_node.set(r, rest_bind(rest_remove_node, ss));
+    ss::exclude_node.set(r, rest_bind(rest_exclude_node, ss));
+    ss::get_removal_status.set(r, rest_bind(rest_get_removal_status, ss));
+    ss::force_remove_completion.set(r, rest_bind(rest_force_remove_completion, ss));
+    ss::set_logging_level.set(r, rest_bind(rest_set_logging_level));
+    ss::get_logging_levels.set(r, rest_bind(rest_get_logging_levels));
+    ss::get_operation_mode.set(r, rest_bind(rest_get_operation_mode, ss));
+    ss::is_starting.set(r, rest_bind(rest_is_starting, ss));
+    ss::get_drain_progress.set(r, rest_bind(rest_get_drain_progress, ss));
+    ss::drain.set(r, rest_bind(rest_drain, ss));
+    ss::stop_gossiping.set(r, rest_bind(rest_stop_gossiping, ss));
+    ss::start_gossiping.set(r, rest_bind(rest_start_gossiping, ss));
+    ss::is_gossip_running.set(r, rest_bind(rest_is_gossip_running, ss));
+    ss::stop_daemon.set(r, rest_bind(rest_stop_daemon));
+    ss::is_initialized.set(r, rest_bind(rest_is_initialized, ss));
+    ss::join_ring.set(r, rest_bind(rest_join_ring));
+    ss::is_joined.set(r, rest_bind(rest_is_joined, ss));
+    ss::is_incremental_backups_enabled.set(r, rest_bind(rest_is_incremental_backups_enabled, ctx));
+    ss::set_incremental_backups_enabled.set(r, rest_bind(rest_set_incremental_backups_enabled, ctx));
+    ss::rebuild.set(r, rest_bind(rest_rebuild, ss));
+    ss::bulk_load.set(r, rest_bind(rest_bulk_load));
+    ss::bulk_load_async.set(r, rest_bind(rest_bulk_load_async));
+    ss::reschedule_failed_deletions.set(r, rest_bind(rest_reschedule_failed_deletions));
+    ss::sample_key_range.set(r, rest_bind(rest_sample_key_range));
+    ss::reset_local_schema.set(r, rest_bind(rest_reset_local_schema, ss));
+    ss::set_trace_probability.set(r, rest_bind(rest_set_trace_probability));
+    ss::get_trace_probability.set(r, rest_bind(rest_get_trace_probability));
+    ss::get_slow_query_info.set(r, rest_bind(rest_get_slow_query_info));
+    ss::set_slow_query.set(r, rest_bind(rest_set_slow_query));
+    ss::deliver_hints.set(r, rest_bind(rest_deliver_hints));
+    ss::get_cluster_name.set(r, rest_bind(rest_get_cluster_name, ss));
+    ss::get_partitioner_name.set(r, rest_bind(rest_get_partitioner_name, ss));
+    ss::get_tombstone_warn_threshold.set(r, rest_bind(rest_get_tombstone_warn_threshold));
+    ss::set_tombstone_warn_threshold.set(r, rest_bind(rest_set_tombstone_warn_threshold));
+    ss::get_tombstone_failure_threshold.set(r, rest_bind(rest_get_tombstone_failure_threshold));
+    ss::set_tombstone_failure_threshold.set(r, rest_bind(rest_set_tombstone_failure_threshold));
+    ss::get_batch_size_failure_threshold.set(r, rest_bind(rest_get_batch_size_failure_threshold));
+    ss::set_batch_size_failure_threshold.set(r, rest_bind(rest_set_batch_size_failure_threshold));
+    ss::set_hinted_handoff_throttle_in_kb.set(r, rest_bind(rest_set_hinted_handoff_throttle_in_kb));
+    ss::get_exceptions.set(r, rest_bind(rest_get_exceptions, ss));
+    ss::get_total_hints_in_progress.set(r, rest_bind(rest_get_total_hints_in_progress));
+    ss::get_total_hints.set(r, rest_bind(rest_get_total_hints));
+    ss::get_ownership.set(r, rest_bind(rest_get_ownership, ctx, ss));
+    ss::get_effective_ownership.set(r, rest_bind(rest_get_effective_ownership, ctx, ss));
+    ss::retrain_dict.set(r, rest_bind(rest_retrain_dict, ctx, ss, group0_client));
+    ss::estimate_compression_ratios.set(r, rest_bind(rest_estimate_compression_ratios, ctx, ss));
+    ss::sstable_info.set(r, rest_bind(rest_sstable_info, ctx));
+    ss::reload_raft_topology_state.set(r, rest_bind(rest_reload_raft_topology_state, ss, group0_client));
+    ss::upgrade_to_raft_topology.set(r, rest_bind(rest_upgrade_to_raft_topology, ss));
+    ss::raft_topology_upgrade_status.set(r, rest_bind(rest_raft_topology_upgrade_status, ss));
+    ss::raft_topology_get_cmd_status.set(r, rest_bind(rest_raft_topology_get_cmd_status, ss));
+    ss::move_tablet.set(r, rest_bind(rest_move_tablet, ctx, ss));
+    ss::add_tablet_replica.set(r, rest_bind(rest_add_tablet_replica, ctx, ss));
+    ss::del_tablet_replica.set(r, rest_bind(rest_del_tablet_replica, ctx, ss));
+    ss::repair_tablet.set(r, rest_bind(rest_repair_tablet, ctx, ss));
+    ss::tablet_balancing_enable.set(r, rest_bind(rest_tablet_balancing_enable, ss));
+    ss::quiesce_topology.set(r, rest_bind(rest_quiesce_topology, ss));
+    sp::get_schema_versions.set(r, rest_bind(rest_get_schema_versions, ss));
+    ss::drop_quarantined_sstables.set(r, rest_bind(rest_drop_quarantined_sstables, ctx, ss));
 }

 void unset_storage_service(http_context& ctx, routes& r) {
@@ -2043,14 +2016,12 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
        auto tag = req->get_query_param("tag");
        auto column_families = split(req->get_query_param("cf"), ",");
        auto sfopt = req->get_query_param("sf");
-        db::snapshot_options opts = {
-            .skip_flush = strcasecmp(sfopt.c_str(), "true") == 0,
-        };
+        auto sf = db::snapshot_ctl::skip_flush(strcasecmp(sfopt.c_str(), "true") == 0);

        std::vector<sstring> keynames = split(req->get_query_param("kn"), ",");
        try {
            if (column_families.empty()) {
-                co_await snap_ctl.local().take_snapshot(tag, keynames, opts);
+                co_await snap_ctl.local().take_snapshot(tag, keynames, sf);
            } else {
                if (keynames.empty()) {
                    throw httpd::bad_param_exception("The keyspace of column families must be specified");
@@ -2058,7 +2029,7 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
                if (keynames.size() > 1) {
                    throw httpd::bad_param_exception("Only one keyspace allowed when specifying a column family");
                }
-                co_await snap_ctl.local().take_column_family_snapshot(keynames[0], column_families, tag, opts);
+                co_await snap_ctl.local().take_column_family_snapshot(keynames[0], column_families, tag, sf);
            }
            co_return json_void();
        } catch (...) {
@@ -2093,8 +2064,7 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
        auto info = parse_scrub_options(ctx, std::move(req));

        if (!info.snapshot_tag.empty()) {
-            db::snapshot_options opts = {.skip_flush = false};
-            co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, opts);
+            co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, db::snapshot_ctl::skip_flush::no);
        }

        compaction::compaction_stats stats;
--- a/api/tasks.cc
+++ b/api/tasks.cc
@@ -146,8 +146,7 @@ void set_tasks_compaction_module(http_context& ctx, routes& r, sharded<service::
        auto info = parse_scrub_options(ctx, std::move(req));

        if (!info.snapshot_tag.empty()) {
-            db::snapshot_options opts = {.skip_flush = false};
-            co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, opts);
+            co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, db::snapshot_ctl::skip_flush::no);
        }

        auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
--- a/audit/audit_syslog_storage_helper.cc
+++ b/audit/audit_syslog_storage_helper.cc
@@ -53,10 +53,10 @@ static std::string json_escape(std::string_view str) {

 }

-future<> audit_syslog_storage_helper::syslog_send_helper(temporary_buffer<char> msg) {
+future<> audit_syslog_storage_helper::syslog_send_helper(const sstring& msg) {
    try {
        auto lock = co_await get_units(_semaphore, 1, std::chrono::hours(1));
-        co_await _sender.send(_syslog_address, std::span(&msg, 1));
+        co_await _sender.send(_syslog_address, net::packet{msg.data(), msg.size()});
    }
    catch (const std::exception& e) {
        auto error_msg = seastar::format(
@@ -90,7 +90,7 @@ future<> audit_syslog_storage_helper::start(const db::config& cfg) {
        co_return;
    }

-    co_await syslog_send_helper(temporary_buffer<char>::copy_of("Initializing syslog audit backend."));
+    co_await syslog_send_helper("Initializing syslog audit backend.");
 }

 future<> audit_syslog_storage_helper::stop() {
@@ -120,7 +120,7 @@ future<> audit_syslog_storage_helper::write(const audit_info* audit_info,
                                    audit_info->table(),
                                    username);

-    co_await syslog_send_helper(std::move(msg).release());
+    co_await syslog_send_helper(msg);
 }

 future<> audit_syslog_storage_helper::write_login(const sstring& username,
@@ -139,7 +139,7 @@ future<> audit_syslog_storage_helper::write_login(const sstring& username,
                                    client_ip,
                                    username);

-    co_await syslog_send_helper(std::move(msg).release());
+    co_await syslog_send_helper(msg.c_str());
 }

 }
--- a/audit/audit_syslog_storage_helper.hh
+++ b/audit/audit_syslog_storage_helper.hh
@@ -26,7 +26,7 @@ class audit_syslog_storage_helper : public storage_helper {
    net::datagram_channel _sender;
    seastar::semaphore _semaphore;

-    future<> syslog_send_helper(seastar::temporary_buffer<char> msg);
+    future<> syslog_send_helper(const sstring& msg);
 public:
    explicit audit_syslog_storage_helper(cql3::query_processor&, service::migration_manager&);
    virtual ~audit_syslog_storage_helper();
--- a/auth/cache.cc
+++ b/auth/cache.cc
@@ -81,14 +81,24 @@ future<lw_shared_ptr<cache::role_record>> cache::fetch_role(const role_name_t& r
        static const sstring q = format("SELECT role, name, value FROM {}.{} WHERE role = ?", db::system_keyspace::NAME, ROLE_ATTRIBUTES_CF);
        auto rs = co_await fetch(q);
        for (const auto& r : *rs) {
-            if (!r.has("value")) {
-                continue;
-            }
            rec->attributes[r.get_as<sstring>("name")] =
                    r.get_as<sstring>("value");
            co_await coroutine::maybe_yield();
        }
    }
+    // permissions
+    {
+        static const sstring q = format("SELECT role, resource, permissions FROM {}.{} WHERE role = ?", db::system_keyspace::NAME, PERMISSIONS_CF);
+        auto rs = co_await fetch(q);
+        for (const auto& r : *rs) {
+            auto resource = r.get_as<sstring>("resource");
+            auto perms_strings = r.get_set<sstring>("permissions");
+            std::unordered_set<sstring> perms_set(perms_strings.begin(), perms_strings.end());
+            auto pset = permissions::from_strings(perms_set);
+            rec->permissions[std::move(resource)] = std::move(pset);
+            co_await coroutine::maybe_yield();
+        }
+    }
    co_return rec;
 }

--- a/auth/cache.hh
+++ b/auth/cache.hh
@@ -39,6 +39,7 @@ public:
        std::unordered_set<role_name_t> members;
        sstring salted_hash;
        std::unordered_map<sstring, sstring> attributes;
+        std::unordered_map<sstring, permission_set> permissions;
        version_tag_t version; // used for seamless cache reloads
    };

--- a/auth/default_authorizer.cc
+++ b/auth/default_authorizer.cc
@@ -174,11 +174,7 @@ default_authorizer::authorize(const role_or_anonymous& maybe_role, const resourc
    if (results->empty()) {
        co_return permissions::NONE;
    }
-    const auto& row = results->one();
-    if (!row.has(PERMISSIONS_NAME)) {
-        co_return permissions::NONE;
-    }
-    co_return permissions::from_strings(row.get_set<sstring>(PERMISSIONS_NAME));
+    co_return permissions::from_strings(results->one().get_set<sstring>(PERMISSIONS_NAME));
 }

 future<>
--- a/auth/ldap_role_manager.cc
+++ b/auth/ldap_role_manager.cc
@@ -32,8 +32,6 @@ namespace {

 logger mylog{"ldap_role_manager"}; // `log` is taken by math.

-constexpr std::string_view user_placeholder = "{USER}";
-
 struct url_desc_deleter {
    void operator()(LDAPURLDesc *p) {
        ldap_free_urldesc(p);
@@ -42,141 +40,9 @@ struct url_desc_deleter {

 using url_desc_ptr = std::unique_ptr<LDAPURLDesc, url_desc_deleter>;

-/// Escapes LDAP filter assertion value per RFC 4515 Section 3.
-/// The characters *, (, ), \, and NUL must be backslash-hex-escaped
-/// to prevent filter injection when interpolating untrusted input.
-sstring escape_filter_value(std::string_view value) {
-    size_t escapable_chars = 0;
-    for (unsigned char ch : value) {
-        switch (ch) {
-        case '*':
-        case '(':
-        case ')':
-        case '\\':
-        case '\0':
-            ++escapable_chars;
-            break;
-        default:
-            break;
-        }
-    }
-
-    if (escapable_chars == 0) {
-        return sstring(value);
-    }
-
-    sstring escaped(value.size() + escapable_chars * 2, 0);
-    size_t pos = 0;
-    for (unsigned char ch : value) {
-        switch (ch) {
-        case '*':
-            escaped[pos++] = '\\';
-            escaped[pos++] = '2';
-            escaped[pos++] = 'a';
-            break;
-        case '(':
-            escaped[pos++] = '\\';
-            escaped[pos++] = '2';
-            escaped[pos++] = '8';
-            break;
-        case ')':
-            escaped[pos++] = '\\';
-            escaped[pos++] = '2';
-            escaped[pos++] = '9';
-            break;
-        case '\\':
-            escaped[pos++] = '\\';
-            escaped[pos++] = '5';
-            escaped[pos++] = 'c';
-            break;
-        case '\0':
-            escaped[pos++] = '\\';
-            escaped[pos++] = '0';
-            escaped[pos++] = '0';
-            break;
-        default:
-            escaped[pos++] = static_cast<char>(ch);
-            break;
-        }
-    }
-
-    return escaped;
-}
-
-/// Percent-encodes characters that are not RFC 3986 "unreserved"
-/// (ALPHA / DIGIT / '-' / '.' / '_' / '~').
-///
-/// Uses explicit ASCII range checks instead of std::isalnum() because
-/// the latter is locale-dependent and could pass non-ASCII characters
-/// through unencoded under certain locale settings.
-///
-/// This is applied AFTER RFC 4515 filter escaping when the value is
-/// substituted into an LDAP URL.  It serves two purposes:
-///  1. Prevents URL-level metacharacters ('?', '#') from breaking
-///     the URL structure parsed by ldap_url_parse.
-///  2. Prevents percent-decoding (which ldap_url_parse performs on
-///     each component) from undoing the filter escaping, e.g. a
-///     literal "%2a" in the username would otherwise decode to '*'.
-sstring percent_encode_for_url(std::string_view value) {
-    static constexpr char hex[] = "0123456789ABCDEF";
-
-    size_t chars_to_encode = 0;
-    for (unsigned char ch : value) {
-        if (!((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')
-                || ch == '-' || ch == '.' || ch == '_' || ch == '~')) {
-            ++chars_to_encode;
-        }
-    }
-
-    if (chars_to_encode == 0) {
-        return sstring(value);
-    }
-
-    sstring encoded(value.size() + chars_to_encode * 2, 0);
-    size_t pos = 0;
-    for (unsigned char ch : value) {
-        if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')
-                || ch == '-' || ch == '.' || ch == '_' || ch == '~') {
-            encoded[pos++] = static_cast<char>(ch);
-        } else {
-            encoded[pos++] = '%';
-            encoded[pos++] = hex[ch >> 4];
-            encoded[pos++] = hex[ch & 0x0F];
-        }
-    }
-
-    return encoded;
-}
-
-/// Checks whether \p sentinel appears in any parsed URL component
-/// other than the filter (host, DN, attributes, extensions).
-bool sentinel_outside_filter(const LDAPURLDesc& desc, std::string_view sentinel) {
-    auto contains = [&](const char* field) {
-        return field && std::string_view(field).find(sentinel) != std::string_view::npos;
-    };
-    if (contains(desc.lud_host) || contains(desc.lud_dn)) {
-        return true;
-    }
-    if (desc.lud_attrs) {
-        for (int i = 0; desc.lud_attrs[i]; ++i) {
-            if (contains(desc.lud_attrs[i])) {
-                return true;
-            }
-        }
-    }
-    if (desc.lud_exts) {
-        for (int i = 0; desc.lud_exts[i]; ++i) {
-            if (contains(desc.lud_exts[i])) {
-                return true;
-            }
-        }
-    }
-    return false;
-}
-
-url_desc_ptr parse_url(const sstring& url) {
+url_desc_ptr parse_url(std::string_view url) {
    LDAPURLDesc *desc = nullptr;
-    if (ldap_url_parse(url.c_str(), &desc)) {
+    if (ldap_url_parse(url.data(), &desc)) {
        mylog.error("error in ldap_url_parse({})", url);
    }
    return url_desc_ptr(desc);
@@ -249,7 +115,6 @@ const resource_set& ldap_role_manager::protected_resources() const {
 }

 future<> ldap_role_manager::start() {
-    validate_query_template();
    if (!parse_url(get_url("dummy-user"))) { // Just need host and port -- any user should do.
        return make_exception_future(
                std::runtime_error(fmt::format("error getting LDAP server address from template {}", _query_template)));
@@ -334,7 +199,7 @@ future<> ldap_role_manager::revoke(std::string_view, std::string_view, ::service
 }

 future<role_set> ldap_role_manager::query_granted(std::string_view grantee_name, recursive_role_query) {
-    const auto url = get_url(grantee_name);
+    const auto url = get_url(grantee_name.data());
    auto desc = parse_url(url);
    if (!desc) {
        return make_exception_future<role_set>(std::runtime_error(format("Error parsing URL {}", url)));
@@ -466,46 +331,7 @@ future<> ldap_role_manager::remove_attribute(std::string_view role_name, std::st
 }

 sstring ldap_role_manager::get_url(std::string_view user) const {
-    // Two-layer encoding protects against injection:
-    // 1. RFC 4515 filter escaping neutralizes filter metacharacters (*, (, ), \, NUL)
-    // 2. URL percent-encoding prevents URL structure injection (?, #) and blocks
-    //    ldap_url_parse's percent-decoding from undoing the filter escaping (%2a -> *)
-    return boost::replace_all_copy(_query_template, user_placeholder,
-            percent_encode_for_url(escape_filter_value(user)));
-}
-
-void ldap_role_manager::validate_query_template() const {
-    if (_query_template.find(user_placeholder) == sstring::npos) {
-        return;
-    }
-
-    // Substitute {USER} with a sentinel and let ldap_url_parse tell us
-    // which URL component it landed in.  The sentinel is purely
-    // alphanumeric so it cannot affect URL parsing.
-    static constexpr std::string_view sentinel = "XLDAPSENTINELX";
-    sstring test_url = boost::replace_all_copy(_query_template, user_placeholder, sentinel);
-    auto desc = parse_url(test_url);
-    if (!desc) {
-        throw url_error(format("LDAP URL template is not a valid URL when {{USER}} is substituted: {}", _query_template));
-    }
-
-    // The sentinel must appear in the filter ...
-    if (!desc->lud_filter
-            || std::string_view(desc->lud_filter).find(sentinel) == std::string_view::npos) {
-        throw url_error(format(
-                "LDAP URL template places {{USER}} outside the filter component. "
-                "RFC 4515 filter escaping only protects the filter; other components "
-                "(e.g. the base DN) require different escaping and are not supported. "
-                "Template: {}", _query_template));
-    }
-    // ... and nowhere else (host, DN, attributes, extensions).
-    if (sentinel_outside_filter(*desc, sentinel)) {
-        throw url_error(format(
-                "LDAP URL template places {{USER}} outside the filter component. "
-                "RFC 4515 filter escaping only protects the filter; other components "
-                "(e.g. the host) require different escaping and are not supported. "
-                "Template: {}", _query_template));
-    }
+    return boost::replace_all_copy(_query_template, "{USER}", user);
 }

 future<std::vector<cql3::description>> ldap_role_manager::describe_role_grants() {
--- a/auth/ldap_role_manager.hh
+++ b/auth/ldap_role_manager.hh
@@ -107,9 +107,6 @@ class ldap_role_manager : public role_manager {
    /// Macro-expands _query_template, returning the result.
    sstring get_url(std::string_view user) const;

-    /// Validates that {USER}, if present, is used only in the LDAP filter component.
-    void validate_query_template() const;
-
    /// Used to auto-create roles returned by ldap.
    future<> create_role(std::string_view role_name);

--- a/auth/service.cc
+++ b/auth/service.cc
@@ -876,6 +876,22 @@ future<> migrate_to_auth_v2(db::system_keyspace& sys_ks, ::service::raft_group0_
                continue; // some tables might not have been created if they were not used
            }

+            // use longer than usual timeout as we scan the whole table
+            // but not infinite or very long as we want to fail reasonably fast
+            const auto t = 5min;
+            const timeout_config tc{t, t, t, t, t, t, t};
+            ::service::client_state cs(::service::client_state::internal_tag{}, tc);
+            ::service::query_state qs(cs, empty_service_permit());
+
+            auto rows = co_await qp.execute_internal(
+                    seastar::format("SELECT * FROM {}.{}", meta::legacy::AUTH_KS, cf_name),
+                    db::consistency_level::ALL,
+                    qs,
+                    {},
+                    cql3::query_processor::cache_internal::no);
+            if (rows->empty()) {
+                continue;
+            }
            std::vector<sstring> col_names;
            for (const auto& col : schema->all_columns()) {
                col_names.push_back(col.name_as_cql_string());
@@ -884,51 +900,30 @@ future<> migrate_to_auth_v2(db::system_keyspace& sys_ks, ::service::raft_group0_
            for (size_t i = 1; i < col_names.size(); ++i) {
                val_binders_str += ", ?";
            }
-
-            std::vector<mutation> collected;
-            // use longer than usual timeout as we scan the whole table
-            // but not infinite or very long as we want to fail reasonably fast
-            const auto t = 5min;
-            const timeout_config tc{t, t, t, t, t, t, t};
-            ::service::client_state cs(::service::client_state::internal_tag{}, tc);
-            ::service::query_state qs(cs, empty_service_permit());
-
-            co_await qp.query_internal(
-                seastar::format("SELECT * FROM {}.{}", meta::legacy::AUTH_KS, cf_name),
-                db::consistency_level::ALL,
-                {},
-                1000,
-                [&qp, &cf_name, &col_names, &val_binders_str, &schema, ts, &collected] (const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
-                    std::vector<data_value_or_unset> values;
-                    for (const auto& col : schema->all_columns()) {
-                        if (row.has(col.name_as_text())) {
-                            values.push_back(
-                                    col.type->deserialize(row.get_blob_unfragmented(col.name_as_text())));
-                        } else {
-                            values.push_back(unset_value{});
-                        }
+            for (const auto& row : *rows) {
+                std::vector<data_value_or_unset> values;
+                for (const auto& col : schema->all_columns()) {
+                    if (row.has(col.name_as_text())) {
+                        values.push_back(
+                                col.type->deserialize(row.get_blob_unfragmented(col.name_as_text())));
+                    } else {
+                        values.push_back(unset_value{});
                    }
-                    auto muts = co_await qp.get_mutations_internal(
-                            seastar::format("INSERT INTO {}.{} ({}) VALUES ({})",
-                                    db::system_keyspace::NAME,
-                                    cf_name,
-                                    fmt::join(col_names, ", "),
-                                    val_binders_str),
-                            internal_distributed_query_state(),
-                            ts,
-                            std::move(values));
-                    if (muts.size() != 1) {
-                        on_internal_error(log,
-                                format("expecting single insert mutation, got {}", muts.size()));
-                    }
-
-                    collected.push_back(std::move(muts[0]));
-                    co_return stop_iteration::no;
-                },
-                std::move(qs));
-
-            for (auto& m : collected) {
-                co_yield std::move(m);
+                }
+                auto muts = co_await qp.get_mutations_internal(
+                        seastar::format("INSERT INTO {}.{} ({}) VALUES ({})",
+                                db::system_keyspace::NAME,
+                                cf_name,
+                                fmt::join(col_names, ", "),
+                                val_binders_str),
+                        internal_distributed_query_state(),
+                        ts,
+                        std::move(values));
+                if (muts.size() != 1) {
+                    on_internal_error(log,
+                            format("expecting single insert mutation, got {}", muts.size()));
+                }
+                co_yield std::move(muts[0]);
            }
        }
        co_yield co_await sys_ks.make_auth_version_mutation(ts,
--- a/compaction/compaction.cc
+++ b/compaction/compaction.cc
@@ -48,7 +48,6 @@
 #include "mutation/mutation_fragment_stream_validator.hh"
 #include "utils/assert.hh"
 #include "utils/error_injection.hh"
-#include "utils/chunked_vector.hh"
 #include "utils/pretty_printers.hh"
 #include "readers/multi_range.hh"
 #include "readers/compacting.hh"
@@ -612,23 +611,23 @@ private:
    }

    // Called in a seastar thread
-    utils::chunked_vector<dht::partition_range>
+    dht::partition_range_vector
    get_ranges_for_invalidation(const std::vector<sstables::shared_sstable>& sstables) {
        // If owned ranges is disengaged, it means no cleanup work was done and
        // so nothing needs to be invalidated.
        if (!_owned_ranges) {
-            return {};
+            return dht::partition_range_vector{};
        }
-        auto owned_ranges = dht::to_partition_ranges_chunked(*_owned_ranges).get();
+        auto owned_ranges = dht::to_partition_ranges(*_owned_ranges, utils::can_yield::yes);

        auto non_owned_ranges = sstables
                | std::views::transform([] (const sstables::shared_sstable& sst) {
            seastar::thread::maybe_yield();
            return dht::partition_range::make({sst->get_first_decorated_key(), true},
                                              {sst->get_last_decorated_key(), true});
-        })      | std::ranges::to<utils::chunked_vector<dht::partition_range>>();
+        })      | std::ranges::to<dht::partition_range_vector>();

-        return dht::subtract_ranges(*_schema, std::move(non_owned_ranges), std::move(owned_ranges)).get();
+        return dht::subtract_ranges(*_schema, non_owned_ranges, std::move(owned_ranges)).get();
    }
 protected:
    compaction(compaction_group_view& table_s, compaction_descriptor descriptor, compaction_data& cdata, compaction_progress_monitor& progress_monitor, use_backlog_tracker use_backlog_tracker)
@@ -719,8 +718,8 @@ protected:

    compaction_completion_desc
    get_compaction_completion_desc(std::vector<sstables::shared_sstable> input_sstables, std::vector<sstables::shared_sstable> output_sstables) {
-        auto ranges = get_ranges_for_invalidation(input_sstables);
-        return compaction_completion_desc{std::move(input_sstables), std::move(output_sstables), std::move(ranges)};
+        auto ranges_for_for_invalidation = get_ranges_for_invalidation(input_sstables);
+        return compaction_completion_desc{std::move(input_sstables), std::move(output_sstables), std::move(ranges_for_for_invalidation)};
    }

    // Tombstone expiration is enabled based on the presence of sstable set.
--- a/compaction/compaction_descriptor.hh
+++ b/compaction/compaction_descriptor.hh
@@ -16,7 +16,6 @@
 #include "sstables/sstable_set.hh"
 #include "compaction_fwd.hh"
 #include "mutation_writer/token_group_based_splitting_writer.hh"
-#include "utils/chunked_vector.hh"

 namespace compaction {

@@ -39,7 +38,7 @@ struct compaction_completion_desc {
    // New, fresh SSTables that should be added to SSTable set, replacing the old ones.
    std::vector<sstables::shared_sstable> new_sstables;
    // Set of compacted partition ranges that should be invalidated in the cache.
-    utils::chunked_vector<dht::partition_range> ranges_for_cache_invalidation;
+    dht::partition_range_vector ranges_for_cache_invalidation;
 };

 // creates a new SSTable for a given shard
--- a/compaction/compaction_manager.cc
+++ b/compaction/compaction_manager.cc
@@ -778,7 +778,6 @@ compaction_manager::get_incremental_repair_read_lock(compaction::compaction_grou
        cmlog.debug("Get get_incremental_repair_read_lock for {} started", reason);
    }
    compaction::compaction_state& cs = get_compaction_state(&t);
-    auto gh = cs.gate.hold();
    auto ret = co_await cs.incremental_repair_lock.hold_read_lock();
    if (!reason.empty()) {
        cmlog.debug("Get get_incremental_repair_read_lock for {} done", reason);
@@ -792,7 +791,6 @@ compaction_manager::get_incremental_repair_write_lock(compaction::compaction_gro
        cmlog.debug("Get get_incremental_repair_write_lock for {} started", reason);
    }
    compaction::compaction_state& cs = get_compaction_state(&t);
-    auto gh = cs.gate.hold();
    auto ret = co_await cs.incremental_repair_lock.hold_write_lock();
    if (!reason.empty()) {
        cmlog.debug("Get get_incremental_repair_write_lock for {} done", reason);
@@ -1128,10 +1126,7 @@ void compaction_manager::enable() {

    _compaction_submission_timer.cancel();
    _compaction_submission_timer.arm_periodic(periodic_compaction_submission_interval());
-    if (_waiting_reevaluation) {
-        on_internal_error(cmlog, "postponed compactions reevaluation is already running when enabling compaction manager");
-    }
-    _waiting_reevaluation.emplace(postponed_compactions_reevaluation());
+    _waiting_reevalution = postponed_compactions_reevaluation();
    cmlog.info("Enabled");
 }

@@ -1179,16 +1174,6 @@ void compaction_manager::reevaluate_postponed_compactions() noexcept {
    _postponed_reevaluation.signal();
 }

-future<> compaction_manager::stop_postponed_compactions() noexcept {
-    auto waiting_reevaluation = std::exchange(_waiting_reevaluation, std::nullopt);
-    if (!waiting_reevaluation) {
-        return make_ready_future();
-    }
-    // Trigger a signal to properly exit from postponed_compactions_reevaluation() fiber
-    reevaluate_postponed_compactions();
-    return std::move(*waiting_reevaluation);
-}
-
 void compaction_manager::postpone_compaction_for_table(compaction_group_view* t) {
    _postponed.insert(t);
 }
@@ -1272,7 +1257,8 @@ future<> compaction_manager::drain() {
    _compaction_submission_timer.cancel();
    // Stop ongoing compactions, if the request has not been sent already and wait for them to stop.
    co_await stop_ongoing_compactions("drain");
-    co_await stop_postponed_compactions();
+    // Trigger a signal to properly exit from postponed_compactions_reevaluation() fiber
+    reevaluate_postponed_compactions();
    cmlog.info("Drained");
 }

@@ -1280,15 +1266,9 @@ future<> compaction_manager::start(const db::config& cfg, utils::disk_space_moni
    if (dsm && (this_shard_id() == 0)) {
        _out_of_space_subscription = dsm->subscribe(cfg.critical_disk_utilization_level, [this] (auto threshold_reached) {
            if (threshold_reached) {
-                return container().invoke_on_all([] (compaction_manager& cm) {
-                    cm._in_critical_disk_utilization_mode = true;
-                    return cm.drain();
-                });
+                return container().invoke_on_all([] (compaction_manager& cm) { return cm.drain(); });
            }
-            return container().invoke_on_all([] (compaction_manager& cm) {
-                cm._in_critical_disk_utilization_mode = false;
-                cm.enable();
-            });
+            return container().invoke_on_all([] (compaction_manager& cm) { cm.enable(); });
        });
    }

@@ -1316,7 +1296,8 @@ future<> compaction_manager::really_do_stop() noexcept {
    if (!_tasks.empty()) {
        on_fatal_internal_error(cmlog, format("{} tasks still exist after being stopped", _tasks.size()));
    }
-    co_await stop_postponed_compactions();
+    reevaluate_postponed_compactions();
+    co_await std::move(_waiting_reevalution);
    co_await _sys_ks.close();
    _weight_tracker.clear();
    _compaction_submission_timer.cancel();
@@ -1538,9 +1519,7 @@ future<> compaction_manager::maybe_wait_for_sstable_count_reduction(compaction_g
            | std::views::transform(std::mem_fn(&sstables::sstable::run_identifier))
            | std::ranges::to<std::unordered_set>());
    };
-    const auto threshold = utils::get_local_injector().inject_parameter<size_t>("set_sstable_count_reduction_threshold")
-        .value_or(size_t(std::max(schema->max_compaction_threshold(), 32)));
-
+    const auto threshold = size_t(std::max(schema->max_compaction_threshold(), 32));
    auto count = co_await num_runs_for_compaction();
    if (count <= threshold) {
        cmlog.trace("No need to wait for sstable count reduction in {}: {} <= {}",
@@ -1555,7 +1534,9 @@ future<> compaction_manager::maybe_wait_for_sstable_count_reduction(compaction_g
    auto& cstate = get_compaction_state(&t);
    try {
        while (can_perform_regular_compaction(t) && co_await num_runs_for_compaction() > threshold) {
-            co_await cstate.compaction_done.wait();
+            co_await cstate.compaction_done.wait([this, &t] {
+                return !can_perform_regular_compaction(t);
+            });
        }
    } catch (const broken_condition_variable&) {
        co_return;
@@ -2308,16 +2289,6 @@ future<compaction_manager::compaction_stats_opt> compaction_manager::perform_spl
    return perform_task_on_all_files<split_compaction_task_executor>("split", info, t, std::move(options), std::move(owned_ranges_ptr), std::move(get_sstables), throw_if_stopping::no);
 }

-std::exception_ptr compaction_manager::make_disabled_exception(compaction::compaction_group_view& cg) {
-    std::exception_ptr ex;
-    if (_in_critical_disk_utilization_mode) {
-        ex = std::make_exception_ptr(std::runtime_error("critical disk utilization"));
-    } else {
-        ex = std::make_exception_ptr(compaction_stopped_exception(cg.schema()->ks_name(), cg.schema()->cf_name(), "compaction disabled"));
-    }
-    return ex;
-}
-
 future<std::vector<sstables::shared_sstable>>
 compaction_manager::maybe_split_new_sstable(sstables::shared_sstable sst, compaction_group_view& t, compaction_type_options::split opt) {
    if (!split_compaction_task_executor::sstable_needs_split(sst, opt)) {
@@ -2327,7 +2298,8 @@ compaction_manager::maybe_split_new_sstable(sstables::shared_sstable sst, compac
    // We don't want to prevent split because compaction is temporarily disabled on a view only for synchronization,
    // which is unneeded against new sstables that aren't part of any set yet, so never use can_proceed(&t) here.
    if (is_disabled()) {
-        co_return coroutine::exception(make_disabled_exception(t));
+        co_return coroutine::exception(std::make_exception_ptr(std::runtime_error(format("Cannot split {} because manager has compaction disabled, " \
+                                                                                         "reason might be out of space prevention", sst->get_filename()))));
    }
    std::vector<sstables::shared_sstable> ret;

@@ -2415,8 +2387,6 @@ future<> compaction_manager::remove(compaction_group_view& t, sstring reason) no
    if (!c_state.gate.is_closed()) {
        auto close_gate = c_state.gate.close();
        co_await stop_ongoing_compactions(reason, &t);
-        // Wait for users of incremental repair lock (can be either repair itself or maintenance compactions).
-        co_await c_state.incremental_repair_lock.write_lock();
        co_await std::move(close_gate);
    }

--- a/compaction/compaction_manager.hh
+++ b/compaction/compaction_manager.hh
@@ -114,8 +114,6 @@ private:
    uint32_t _disabled_state_count = 0;

    bool is_disabled() const { return _state != state::running || _disabled_state_count > 0; }
-    // precondition: is_disabled() is true.
-    std::exception_ptr make_disabled_exception(compaction::compaction_group_view& cg);

    std::optional<future<>> _stop_future;

@@ -127,7 +125,7 @@ private:
    // a sstable from being compacted twice.
    std::unordered_set<sstables::shared_sstable> _compacting_sstables;

-    std::optional<future<>> _waiting_reevaluation;
+    future<> _waiting_reevalution = make_ready_future<>();
    condition_variable _postponed_reevaluation;
    // tables that wait for compaction but had its submission postponed due to ongoing compaction.
    std::unordered_set<compaction::compaction_group_view*> _postponed;
@@ -175,7 +173,6 @@ private:
    tombstone_gc_state _tombstone_gc_state;

    utils::disk_space_monitor::subscription _out_of_space_subscription;
-    bool _in_critical_disk_utilization_mode = false;
 private:
    // Requires task->_compaction_state.gate to be held and task to be registered in _tasks.
    future<compaction_stats_opt> perform_task(shared_ptr<compaction::compaction_task_executor> task, throw_if_stopping do_throw_if_stopping);
@@ -237,7 +234,6 @@ private:

    future<> postponed_compactions_reevaluation();
    void reevaluate_postponed_compactions() noexcept;
-    future<> stop_postponed_compactions() noexcept;
    // Postpone compaction for a table that couldn't be executed due to ongoing
    // similar-sized compaction.
    void postpone_compaction_for_table(compaction::compaction_group_view* t);
--- a/configure.py
+++ b/configure.py
@@ -725,9 +725,29 @@ raft_tests = set([
 vector_search_tests = set([
    'test/vector_search/vector_store_client_test',
    'test/vector_search/load_balancer_test',
-    'test/vector_search/client_test',
-    'test/vector_search/filter_test',
-    'test/vector_search/rescoring_test'
+    'test/vector_search/client_test'
+])
+
+vector_search_validator_bin = 'vector-search-validator/bin/vector-search-validator'
+vector_search_validator_deps = set([
+    'test/vector_search_validator/build-validator',
+    'test/vector_search_validator/Cargo.toml',
+    'test/vector_search_validator/crates/validator/Cargo.toml',
+    'test/vector_search_validator/crates/validator/src/main.rs',
+    'test/vector_search_validator/crates/validator-scylla/Cargo.toml',
+    'test/vector_search_validator/crates/validator-scylla/src/lib.rs',
+    'test/vector_search_validator/crates/validator-scylla/src/cql.rs',
+])
+
+vector_store_bin = 'vector-search-validator/bin/vector-store'
+vector_store_deps = set([
+    'test/vector_search_validator/build-env',
+    'test/vector_search_validator/build-vector-store',
+])
+
+vector_search_validator_bins = set([
+    vector_search_validator_bin,
+    vector_store_bin,
 ])

 wasms = set([
@@ -763,7 +783,7 @@ other = set([
    'iotune',
 ])

-all_artifacts = apps | cpp_apps | tests | other | wasms
+all_artifacts = apps | cpp_apps | tests | other | wasms | vector_search_validator_bins

 arg_parser = argparse.ArgumentParser('Configure scylla', add_help=False, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 arg_parser.add_argument('--out', dest='buildfile', action='store', default='build.ninja',
@@ -1014,9 +1034,6 @@ scylla_core = (['message/messaging_service.cc',
                'cql3/functions/aggregate_fcts.cc',
                'cql3/functions/castas_fcts.cc',
                'cql3/functions/error_injection_fcts.cc',
-                'cql3/statements/strong_consistency/modification_statement.cc',
-                'cql3/statements/strong_consistency/select_statement.cc',
-                'cql3/statements/strong_consistency/statement_helpers.cc',
                'cql3/functions/vector_similarity_fcts.cc',
                'cql3/statements/cf_prop_defs.cc',
                'cql3/statements/cf_statement.cc',
@@ -1042,8 +1059,8 @@ scylla_core = (['message/messaging_service.cc',
                'cql3/statements/raw/parsed_statement.cc',
                'cql3/statements/property_definitions.cc',
                'cql3/statements/update_statement.cc',
-                'cql3/statements/broadcast_modification_statement.cc',
-                'cql3/statements/broadcast_select_statement.cc',
+                'cql3/statements/strongly_consistent_modification_statement.cc',
+                'cql3/statements/strongly_consistent_select_statement.cc',
                'cql3/statements/delete_statement.cc',
                'cql3/statements/prune_materialized_view_statement.cc',
                'cql3/statements/batch_statement.cc',
@@ -1334,9 +1351,6 @@ scylla_core = (['message/messaging_service.cc',
                'lang/wasm.cc',
                'lang/wasm_alien_thread_runner.cc',
                'lang/wasm_instance_cache.cc',
-                'service/strong_consistency/groups_manager.cc',
-                'service/strong_consistency/coordinator.cc',
-                'service/strong_consistency/state_machine.cc',
                'service/raft/group0_state_id_handler.cc',
                'service/raft/group0_state_machine.cc',
                'service/raft/group0_state_machine_merger.cc',
@@ -1366,7 +1380,6 @@ scylla_core = (['message/messaging_service.cc',
                'vector_search/dns.cc',
                'vector_search/client.cc',
                'vector_search/clients.cc',
-                'vector_search/filter.cc',
                'vector_search/truststore.cc'
                ] + [Antlr3Grammar('cql3/Cql.g')] \
                  + scylla_raft_core
@@ -1476,7 +1489,6 @@ idls = ['idl/gossip_digest.idl.hh',
        'idl/hinted_handoff.idl.hh',
        'idl/storage_proxy.idl.hh',
        'idl/sstables.idl.hh',
-        'idl/strong_consistency/state_machine.idl.hh',
        'idl/group0_state_machine.idl.hh',
        'idl/mapreduce_request.idl.hh',
        'idl/replica_exception.idl.hh',
@@ -1701,7 +1713,6 @@ deps['test/boost/combined_tests'] += [
    'test/boost/tracing_test.cc',
    'test/boost/user_function_test.cc',
    'test/boost/user_types_test.cc',
-    'test/boost/vector_index_test.cc',
    'test/boost/view_build_test.cc',
    'test/boost/view_complex_test.cc',
    'test/boost/view_schema_ckey_test.cc',
@@ -1773,8 +1784,6 @@ deps['test/raft/discovery_test'] =  ['test/raft/discovery_test.cc',
 deps['test/vector_search/vector_store_client_test'] =  ['test/vector_search/vector_store_client_test.cc'] + scylla_tests_dependencies
 deps['test/vector_search/load_balancer_test'] = ['test/vector_search/load_balancer_test.cc'] + scylla_tests_dependencies
 deps['test/vector_search/client_test'] = ['test/vector_search/client_test.cc'] + scylla_tests_dependencies
-deps['test/vector_search/filter_test'] = ['test/vector_search/filter_test.cc'] + scylla_tests_dependencies
-deps['test/vector_search/rescoring_test'] = ['test/vector_search/rescoring_test.cc'] + scylla_tests_dependencies

 boost_tests_prefixes = ["test/boost/", "test/vector_search/", "test/raft/", "test/manual/", "test/ldap/"]

@@ -2561,10 +2570,11 @@ def write_build_file(f,
              description = RUST_LIB $out
            ''').format(mode=mode, antlr3_exec=args.antlr3_exec, fmt_lib=fmt_lib, test_repeat=args.test_repeat, test_timeout=args.test_timeout, rustc_wrapper=rustc_wrapper, **modeval))
        f.write(
-            'build {mode}-build: phony {artifacts} {wasms}\n'.format(
+            'build {mode}-build: phony {artifacts} {wasms} {vector_search_validator_bins}\n'.format(
                mode=mode,
-                artifacts=str.join(' ', ['$builddir/' + mode + '/' + x for x in sorted(build_artifacts - wasms)]),
+                artifacts=str.join(' ', ['$builddir/' + mode + '/' + x for x in sorted(build_artifacts - wasms - vector_search_validator_bins)]),
                wasms = str.join(' ', ['$builddir/' + x for x in sorted(build_artifacts & wasms)]),
+                vector_search_validator_bins=str.join(' ', ['$builddir/' + x for x in sorted(build_artifacts & vector_search_validator_bins)]),
            )
        )
        if profile_recipe := modes[mode].get('profile_recipe'):
@@ -2594,7 +2604,7 @@ def write_build_file(f,
                continue
            profile_dep = modes[mode].get('profile_target', "")

-            if binary in other or binary in wasms:
+            if binary in other or binary in wasms or binary in vector_search_validator_bins:
                continue
            srcs = deps[binary]
            # 'scylla'
@@ -2705,10 +2715,11 @@ def write_build_file(f,
        )

        f.write(
-            'build {mode}-test: test.{mode} {test_executables} $builddir/{mode}/scylla {wasms}\n'.format(
+            'build {mode}-test: test.{mode} {test_executables} $builddir/{mode}/scylla {wasms} {vector_search_validator_bins} \n'.format(
                mode=mode,
                test_executables=' '.join(['$builddir/{}/{}'.format(mode, binary) for binary in sorted(tests)]),
                wasms=' '.join([f'$builddir/{binary}' for binary in sorted(wasms)]),
+                vector_search_validator_bins=' '.join([f'$builddir/{binary}' for binary in sorted(vector_search_validator_bins)]),
            )
        )
        f.write(
@@ -2876,6 +2887,19 @@ def write_build_file(f,
            'build compiler-training: phony {}\n'.format(' '.join(['{mode}-compiler-training'.format(mode=mode) for mode in default_modes]))
    )

+    f.write(textwrap.dedent(f'''\
+        rule build-vector-search-validator
+            command = test/vector_search_validator/build-validator $builddir
+        rule build-vector-store
+            command = test/vector_search_validator/build-vector-store $builddir
+        '''))
+    f.write(
+            'build $builddir/{vector_search_validator_bin}: build-vector-search-validator {}\n'.format(' '.join([dep for dep in sorted(vector_search_validator_deps)]), vector_search_validator_bin=vector_search_validator_bin)
+    )
+    f.write(
+            'build $builddir/{vector_store_bin}: build-vector-store {}\n'.format(' '.join([dep for dep in sorted(vector_store_deps)]), vector_store_bin=vector_store_bin)
+    )
+
    f.write(textwrap.dedent(f'''\
        build dist-unified-tar: phony {' '.join([f'$builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz' for mode in default_modes])}
        build dist-unified: phony dist-unified-tar
--- a/cql3/CMakeLists.txt
+++ b/cql3/CMakeLists.txt
@@ -47,9 +47,6 @@ target_sources(cql3
    functions/aggregate_fcts.cc
    functions/castas_fcts.cc
    functions/error_injection_fcts.cc
-    statements/strong_consistency/select_statement.cc
-    statements/strong_consistency/modification_statement.cc
-    statements/strong_consistency/statement_helpers.cc
    functions/vector_similarity_fcts.cc
    statements/cf_prop_defs.cc
    statements/cf_statement.cc
@@ -75,8 +72,8 @@ target_sources(cql3
    statements/raw/parsed_statement.cc
    statements/property_definitions.cc
    statements/update_statement.cc
-    statements/broadcast_modification_statement.cc
-    statements/broadcast_select_statement.cc
+    statements/strongly_consistent_modification_statement.cc
+    statements/strongly_consistent_select_statement.cc
    statements/delete_statement.cc
    statements/prune_materialized_view_statement.cc
    statements/batch_statement.cc
--- a/cql3/authorized_prepared_statements_cache.hh
+++ b/cql3/authorized_prepared_statements_cache.hh
@@ -136,9 +136,9 @@ public:
    {}

    future<> insert(auth::authenticated_user user, cql3::prepared_cache_key_type prep_cache_key, value_type v) noexcept {
-        return _cache.insert(key_type(std::move(user), std::move(prep_cache_key)), [v = std::move(v)] (const cache_key_type&) mutable {
+        return _cache.get_ptr(key_type(std::move(user), std::move(prep_cache_key)), [v = std::move(v)] (const cache_key_type&) mutable {
            return make_ready_future<value_type>(std::move(v));
-        });
+        }).discard_result();
    }

    value_ptr find(const auth::authenticated_user& user, const cql3::prepared_cache_key_type& prep_cache_key) {
--- a/cql3/column_specification.cc
+++ b/cql3/column_specification.cc
@@ -10,7 +10,6 @@

 #include "utils/assert.hh"
 #include "cql3/column_specification.hh"
-#include "cql3/column_identifier.hh"

 namespace cql3 {

@@ -32,12 +31,4 @@ bool column_specification::all_in_same_table(const std::vector<lw_shared_ptr<col
    });
 }

-lw_shared_ptr<column_specification> make_column_spec(std::string_view ks_name, std::string_view cf_name, sstring name, data_type type) {
-    return make_lw_shared<column_specification>(
-            ks_name,
-            cf_name,
-            ::make_shared<column_identifier>(std::move(name), true),
-            std::move(type));
-}
-
 }
--- a/cql3/column_specification.hh
+++ b/cql3/column_specification.hh
@@ -42,6 +42,4 @@ public:
    static bool all_in_same_table(const std::vector<lw_shared_ptr<column_specification>>& names);
 };

-lw_shared_ptr<column_specification> make_column_spec(std::string_view ks_name, std::string_view cf_name, sstring name, data_type type);
-
 }
--- a/cql3/functions/vector_similarity_fcts.cc
+++ b/cql3/functions/vector_similarity_fcts.cc
@@ -10,41 +10,9 @@
 #include "types/types.hh"
 #include "types/vector.hh"
 #include "exceptions/exceptions.hh"
-#include <span>
-#include <bit>

 namespace cql3 {
 namespace functions {
-
-namespace detail {
-
-std::vector<float> extract_float_vector(const bytes_opt& param, size_t dimension) {
-    if (!param) {
-        throw exceptions::invalid_request_exception("Cannot extract float vector from null parameter");
-    }
-
-    const size_t expected_size = dimension * sizeof(float);
-    if (param->size() != expected_size) {
-        throw exceptions::invalid_request_exception(
-            fmt::format("Invalid vector size: expected {} bytes for {} floats, got {} bytes",
-                       expected_size, dimension, param->size()));
-    }
-
-    std::vector<float> result;
-    result.reserve(dimension);
-
-    bytes_view view(*param);
-    for (size_t i = 0; i < dimension; ++i) {
-        // read_simple handles network byte order (big-endian) conversion
-        uint32_t raw = read_simple<uint32_t>(view);
-        result.push_back(std::bit_cast<float>(raw));
-    }
-
-    return result;
-}
-
-} // namespace detail
-
 namespace {

 // The computations of similarity scores match the exact formulas of Cassandra's (jVector's) implementation to ensure compatibility.
@@ -54,14 +22,14 @@ namespace {

 // You should only use this function if you need to preserve the original vectors and cannot normalize
 // them in advance.
-float compute_cosine_similarity(std::span<const float> v1, std::span<const float> v2) {
+float compute_cosine_similarity(const std::vector<data_value>& v1, const std::vector<data_value>& v2) {
    double dot_product = 0.0;
    double squared_norm_a = 0.0;
    double squared_norm_b = 0.0;

    for (size_t i = 0; i < v1.size(); ++i) {
-        double a = v1[i];
-        double b = v2[i];
+        double a = value_cast<float>(v1[i]);
+        double b = value_cast<float>(v2[i]);

        dot_product += a * b;
        squared_norm_a += a * a;
@@ -69,7 +37,7 @@ float compute_cosine_similarity(std::span<const float> v1, std::span<const float
    }

    if (squared_norm_a == 0 || squared_norm_b == 0) {
-        return std::numeric_limits<float>::quiet_NaN();
+        throw exceptions::invalid_request_exception("Function system.similarity_cosine doesn't support all-zero vectors");
    }

    // The cosine similarity is in the range [-1, 1].
@@ -78,12 +46,12 @@ float compute_cosine_similarity(std::span<const float> v1, std::span<const float
    return (1 + (dot_product / (std::sqrt(squared_norm_a * squared_norm_b)))) / 2;
 }

-float compute_euclidean_similarity(std::span<const float> v1, std::span<const float> v2) {
+float compute_euclidean_similarity(const std::vector<data_value>& v1, const std::vector<data_value>& v2) {
    double sum = 0.0;

    for (size_t i = 0; i < v1.size(); ++i) {
-        double a = v1[i];
-        double b = v2[i];
+        double a = value_cast<float>(v1[i]);
+        double b = value_cast<float>(v2[i]);

        double diff = a - b;
        sum += diff * diff;
@@ -97,12 +65,12 @@ float compute_euclidean_similarity(std::span<const float> v1, std::span<const fl

 // Assumes that both vectors are L2-normalized.
 // This similarity is intended as an optimized way to perform cosine similarity calculation.
-float compute_dot_product_similarity(std::span<const float> v1, std::span<const float> v2) {
+float compute_dot_product_similarity(const std::vector<data_value>& v1, const std::vector<data_value>& v2) {
    double dot_product = 0.0;

    for (size_t i = 0; i < v1.size(); ++i) {
-        double a = v1[i];
-        double b = v2[i];
+        double a = value_cast<float>(v1[i]);
+        double b = value_cast<float>(v2[i]);
        dot_product += a * b;
    }

@@ -168,15 +136,13 @@ bytes_opt vector_similarity_fct::execute(std::span<const bytes_opt> parameters)
        return std::nullopt;
    }

-    // Extract dimension from the vector type
-    const auto& type = static_cast<const vector_type_impl&>(*arg_types()[0]);
-    size_t dimension = type.get_dimension();
+    const auto& type = arg_types()[0];
+    data_value v1 = type->deserialize(*parameters[0]);
+    data_value v2 = type->deserialize(*parameters[1]);
+    const auto& v1_elements = value_cast<std::vector<data_value>>(v1);
+    const auto& v2_elements = value_cast<std::vector<data_value>>(v2);

-    // Optimized path: extract floats directly from bytes, bypassing data_value overhead
-    std::vector<float> v1 = detail::extract_float_vector(parameters[0], dimension);
-    std::vector<float> v2 = detail::extract_float_vector(parameters[1], dimension);
-
-    float result = SIMILARITY_FUNCTIONS.at(_name)(v1, v2);
+    float result = SIMILARITY_FUNCTIONS.at(_name)(v1_elements, v2_elements);
    return float_type->decompose(result);
 }

--- a/cql3/functions/vector_similarity_fcts.hh
+++ b/cql3/functions/vector_similarity_fcts.hh
@@ -11,7 +11,6 @@
 #include "native_scalar_function.hh"
 #include "cql3/assignment_testable.hh"
 #include "cql3/functions/function_name.hh"
-#include <span>

 namespace cql3 {
 namespace functions {
@@ -20,7 +19,7 @@ static const function_name SIMILARITY_COSINE_FUNCTION_NAME = function_name::nati
 static const function_name SIMILARITY_EUCLIDEAN_FUNCTION_NAME = function_name::native_function("similarity_euclidean");
 static const function_name SIMILARITY_DOT_PRODUCT_FUNCTION_NAME = function_name::native_function("similarity_dot_product");

-using similarity_function_t = float (*)(std::span<const float>, std::span<const float>);
+using similarity_function_t = float (*)(const std::vector<data_value>&, const std::vector<data_value>&);
 extern thread_local const std::unordered_map<function_name, similarity_function_t> SIMILARITY_FUNCTIONS;

 std::vector<data_type> retrieve_vector_arg_types(const function_name& name, const std::vector<shared_ptr<assignment_testable>>& provided_args);
@@ -34,14 +33,5 @@ public:
    virtual bytes_opt execute(std::span<const bytes_opt> parameters) override;
 };

-namespace detail {
-
-// Extract float vector directly from serialized bytes, bypassing data_value overhead.
-// This is an internal API exposed for testing purposes.
-// Vector<float, N> wire format: N floats as big-endian uint32_t values, 4 bytes each.
-std::vector<float> extract_float_vector(const bytes_opt& param, size_t dimension);
-
-} // namespace detail
-
 } // namespace functions
 } // namespace cql3
--- a/cql3/prepared_statements_cache.hh
+++ b/cql3/prepared_statements_cache.hh
@@ -105,7 +105,6 @@ public:
    static const std::chrono::minutes entry_expiry;

    using key_type = prepared_cache_key_type;
-    using pinned_value_type = cache_value_ptr;
    using value_type = checked_weak_ptr;
    using statement_is_too_big = typename cache_type::entry_is_too_big;

@@ -117,14 +116,9 @@ public:
        : _cache(size, entry_expiry, logger)
    {}

-    template <typename LoadFunc>
-    future<pinned_value_type> get_pinned(const key_type& key, LoadFunc&& load) {
-        return _cache.get_ptr(key.key(), [load = std::forward<LoadFunc>(load)] (const cache_key_type&) { return load(); });
-    }
-
    template <typename LoadFunc>
    future<value_type> get(const key_type& key, LoadFunc&& load) {
-        return get_pinned(key, std::forward<LoadFunc>(load)).then([] (cache_value_ptr v_ptr) {
+        return _cache.get_ptr(key.key(), [load = std::forward<LoadFunc>(load)] (const cache_key_type&) { return load(); }).then([] (cache_value_ptr v_ptr) {
            return make_ready_future<value_type>((*v_ptr)->checked_weak_from_this());
        });
    }
--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -48,10 +48,8 @@ const std::chrono::minutes prepared_statements_cache::entry_expiry = std::chrono

 struct query_processor::remote {
    remote(service::migration_manager& mm, service::mapreduce_service& fwd,
-           service::storage_service& ss, service::raft_group0_client& group0_client,
-           service::strong_consistency::coordinator& _sc_coordinator)
+           service::storage_service& ss, service::raft_group0_client& group0_client)
            : mm(mm), mapreducer(fwd), ss(ss), group0_client(group0_client)
-            , sc_coordinator(_sc_coordinator)
            , gate("query_processor::remote")
    {}

@@ -59,7 +57,6 @@ struct query_processor::remote {
    service::mapreduce_service& mapreducer;
    service::storage_service& ss;
    service::raft_group0_client& group0_client;
-    service::strong_consistency::coordinator& sc_coordinator;

    seastar::named_gate gate;
 };
@@ -517,16 +514,9 @@ query_processor::~query_processor() {
    }
 }

-std::pair<std::reference_wrapper<service::strong_consistency::coordinator>, gate::holder>
-query_processor::acquire_strongly_consistent_coordinator() {
-    auto [remote_, holder] = remote();
-    return {remote_.get().sc_coordinator, std::move(holder)};
-}
-
 void query_processor::start_remote(service::migration_manager& mm, service::mapreduce_service& mapreducer,
-                                   service::storage_service& ss, service::raft_group0_client& group0_client,
-                                   service::strong_consistency::coordinator& sc_coordinator) {
-    _remote = std::make_unique<struct remote>(mm, mapreducer, ss, group0_client, sc_coordinator);
+                                   service::storage_service& ss, service::raft_group0_client& group0_client) {
+    _remote = std::make_unique<struct remote>(mm, mapreducer, ss, group0_client);
 }

 future<> query_processor::stop_remote() {
@@ -697,7 +687,7 @@ future<::shared_ptr<cql_transport::messages::result_message::prepared>>
 query_processor::prepare(sstring query_string, const service::client_state& client_state, cql3::dialect d) {
    try {
        auto key = compute_id(query_string, client_state.get_raw_keyspace(), d);
-        auto prep_entry = co_await _prepared_cache.get_pinned(key, [this, &query_string, &client_state, d] {
+        auto prep_ptr = co_await _prepared_cache.get(key, [this, &query_string, &client_state, d] {
                auto prepared = get_statement(query_string, client_state, d);
                prepared->calculate_metadata_id();
                auto bound_terms = prepared->statement->get_bound_terms();
@@ -711,13 +701,13 @@ query_processor::prepare(sstring query_string, const service::client_state& clie
                return make_ready_future<std::unique_ptr<statements::prepared_statement>>(std::move(prepared));
            });

-        co_await utils::get_local_injector().inject(
-                "query_processor_prepare_wait_after_cache_get",
-                utils::wait_for_message(std::chrono::seconds(60)));
-  
-        auto msg = ::make_shared<result_message::prepared::cql>(prepared_cache_key_type::cql_id(key), std::move(prep_entry),
+        const auto& warnings = prep_ptr->warnings;
+        const auto msg = ::make_shared<result_message::prepared::cql>(prepared_cache_key_type::cql_id(key), std::move(prep_ptr),
                    client_state.is_protocol_extension_set(cql_transport::cql_protocol_extension::LWT_ADD_METADATA_MARK));
-        co_return std::move(msg);
+        for (const auto& w : warnings) {
+            msg->add_warning(w);
+        }
+        co_return ::shared_ptr<cql_transport::messages::result_message::prepared>(std::move(msg));
    } catch(typename prepared_statements_cache::statement_is_too_big&) {
        throw prepared_statement_is_too_big(query_string);
    }
@@ -870,7 +860,6 @@ struct internal_query_state {
    sstring query_string;
    std::unique_ptr<query_options> opts;
    statements::prepared_statement::checked_weak_ptr p;
-    std::optional<service::query_state> qs;
    bool more_results = true;
 };

@@ -878,14 +867,10 @@ internal_query_state query_processor::create_paged_state(
        const sstring& query_string,
        db::consistency_level cl,
        const data_value_list& values,
-        int32_t page_size,
-        std::optional<service::query_state> qs) {
+        int32_t page_size) {
    auto p = prepare_internal(query_string);
    auto opts = make_internal_options(p, values, cl, page_size);
-    if (!qs) {
-        qs.emplace(query_state_for_internal_call());
-    }
-    return internal_query_state{query_string, std::make_unique<cql3::query_options>(std::move(opts)), std::move(p), std::move(qs), true};
+    return internal_query_state{query_string, std::make_unique<cql3::query_options>(std::move(opts)), std::move(p), true};
 }

 bool query_processor::has_more_results(cql3::internal_query_state& state) const {
@@ -908,8 +893,9 @@ future<> query_processor::for_each_cql_result(
 future<::shared_ptr<untyped_result_set>>
 query_processor::execute_paged_internal(internal_query_state& state) {
    state.p->statement->validate(*this, service::client_state::for_internal_calls());
+    auto qs = query_state_for_internal_call();
    ::shared_ptr<cql_transport::messages::result_message> msg =
-      co_await state.p->statement->execute(*this, *state.qs, *state.opts, std::nullopt);
+      co_await state.p->statement->execute(*this, qs, *state.opts, std::nullopt);

    class visitor : public result_message::visitor_base {
        internal_query_state& _state;
@@ -1029,11 +1015,6 @@ query_processor::execute_batch_without_checking_exception_message(
        query_options& options,
        std::unordered_map<prepared_cache_key_type, authorized_prepared_statements_cache::value_type> pending_authorization_entries) {
    auto access_future = co_await coroutine::as_future(batch->check_access(*this, query_state.get_client_state()));
-    bool failed = access_future.failed();
-    co_await audit::inspect(batch, query_state, options, failed);
-    if (failed) {
-        std::rethrow_exception(access_future.get_exception());
-    }
    co_await coroutine::parallel_for_each(pending_authorization_entries, [this, &query_state] (auto& e) -> future<> {
            try {
                co_await _authorized_prepared_cache.insert(*query_state.get_client_state().user(), e.first, std::move(e.second));
@@ -1041,6 +1022,11 @@ query_processor::execute_batch_without_checking_exception_message(
                log.error("failed to cache the entry: {}", std::current_exception());
            }
        });
+    bool failed = access_future.failed();
+    co_await audit::inspect(batch, query_state, options, failed);
+    if (access_future.failed()) {
+        std::rethrow_exception(access_future.get_exception());
+    }
    batch->validate();
    batch->validate(*this, query_state.get_client_state());
    _stats.queries_by_cl[size_t(options.get_consistency())] += batch->get_statements().size();
@@ -1216,9 +1202,8 @@ future<> query_processor::query_internal(
        db::consistency_level cl,
        const data_value_list& values,
        int32_t page_size,
-        noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f,
-        std::optional<service::query_state> qs) {
-    auto query_state = create_paged_state(query_string, cl, values, page_size, std::move(qs));
+        noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f) {
+    auto query_state = create_paged_state(query_string, cl, values, page_size);
    co_return co_await for_each_cql_result(query_state, std::move(f));
 }

--- a/cql3/query_processor.hh
+++ b/cql3/query_processor.hh
@@ -44,10 +44,6 @@ class query_state;
 class mapreduce_service;
 class raft_group0_client;

-namespace strong_consistency {
-class coordinator;
-}
-
 namespace broadcast_tables {
 struct query;
 }
@@ -159,8 +155,7 @@ public:
    ~query_processor();

    void start_remote(service::migration_manager&, service::mapreduce_service&,
-                      service::storage_service& ss, service::raft_group0_client&,
-                      service::strong_consistency::coordinator&);
+                      service::storage_service& ss, service::raft_group0_client&);
    future<> stop_remote();

    data_dictionary::database db() {
@@ -179,9 +174,6 @@ public:
        return _proxy;
    }

-    std::pair<std::reference_wrapper<service::strong_consistency::coordinator>, gate::holder>
-    acquire_strongly_consistent_coordinator();
-
    cql_stats& get_cql_stats() {
        return _cql_stats;
    }
@@ -330,7 +322,6 @@ public:
     * page_size - maximum page size
     * f - a function to be run on each row of the query result,
     *     if the function returns stop_iteration::yes the iteration will stop
-     * qs - optional query state (default: std::nullopt)
     *
     * \note This function is optimized for convenience, not performance.
     */
@@ -339,8 +330,7 @@ public:
            db::consistency_level cl,
            const data_value_list& values,
            int32_t page_size,
-            noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f,
-            std::optional<service::query_state> qs = std::nullopt);
+            noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f);

    /*
     * \brief iterate over all cql results using paging
@@ -509,8 +499,7 @@ private:
            const sstring& query_string,
            db::consistency_level,
            const data_value_list& values,
-            int32_t page_size,
-            std::optional<service::query_state> qs = std::nullopt);
+            int32_t page_size);

    /*!
     * \brief run a query using paging
--- a/cql3/query_result_printer.hh
+++ b/cql3/query_result_printer.hh
@@ -1,20 +0,0 @@
-/*
- * Copyright 2025-present ScyllaDB
- */
-
-/*
- * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
- */
-
-#pragma once
-
-#include <ostream>
-
-namespace cql3 {
-
-class result;
-
-void print_query_results_text(std::ostream& os, const result& result);
-void print_query_results_json(std::ostream& os, const result& result);
-
-} // namespace cql3
--- a/cql3/restrictions/statement_restrictions.hh
+++ b/cql3/restrictions/statement_restrictions.hh
@@ -201,10 +201,6 @@ public:
        return _clustering_columns_restrictions;
    }

-    const expr::expression& get_nonprimary_key_restrictions() const {
-        return _nonprimary_key_restrictions;
-    }
-
    // Get a set of columns restricted by the IS NOT NULL restriction.
    // IS NOT NULL is a special case that is handled separately from other restrictions.
    const std::unordered_set<const column_definition*> get_not_null_columns() const;
--- a/cql3/result_set.cc
+++ b/cql3/result_set.cc
@@ -9,10 +9,8 @@
 */

 #include <cstdint>
-#include "types/json_utils.hh"
 #include "utils/assert.hh"
 #include "utils/hashers.hh"
-#include "utils/rjson.hh"
 #include "cql3/result_set.hh"

 namespace cql3 {
@@ -48,13 +46,6 @@ void metadata::add_non_serialized_column(lw_shared_ptr<column_specification> nam
    _column_info->_names.emplace_back(std::move(name));
 }

-void metadata::hide_last_column() {
-    if (_column_info->_column_count == 0) {
-        utils::on_internal_error("Trying to hide a column when there are no columns visible.");
-    }
-    _column_info->_column_count--;
-}
-
 void metadata::set_paging_state(lw_shared_ptr<const service::pager::paging_state> paging_state) {
    _flags.set<flag::HAS_MORE_PAGES>();
    _paging_state = std::move(paging_state);
@@ -197,85 +188,4 @@ make_empty_metadata() {
    return empty_metadata_cache;
 }

-void print_query_results_text(std::ostream& os, const cql3::result& result) {
-    const auto& metadata = result.get_metadata();
-    const auto& column_metadata = metadata.get_names();
-
-    struct column_values {
-        size_t max_size{0};
-        sstring header_format;
-        sstring row_format;
-        std::vector<sstring> values;
-
-        void add(sstring value) {
-            max_size = std::max(max_size, value.size());
-            values.push_back(std::move(value));
-        }
-    };
-
-    std::vector<column_values> columns;
-    columns.resize(column_metadata.size());
-
-    for (size_t i = 0; i < column_metadata.size(); ++i) {
-        columns[i].add(column_metadata[i]->name->text());
-    }
-
-    for (const auto& row : result.result_set().rows()) {
-        for (size_t i = 0; i < row.size(); ++i) {
-            if (row[i]) {
-                columns[i].add(column_metadata[i]->type->to_string(linearized(managed_bytes_view(*row[i]))));
-            } else {
-                columns[i].add("");
-            }
-        }
-    }
-
-    std::vector<sstring> separators(columns.size(), sstring());
-    for (size_t i = 0; i < columns.size(); ++i) {
-        auto& col_values = columns[i];
-        col_values.header_format = seastar::format(" {{:<{}}} ", col_values.max_size);
-        col_values.row_format = seastar::format(" {{:>{}}} ", col_values.max_size);
-        for (size_t c = 0; c < col_values.max_size; ++c) {
-            separators[i] += "-";
-        }
-    }
-
-    for (size_t r = 0; r < result.result_set().rows().size() + 1; ++r) {
-        std::vector<sstring> row;
-        row.reserve(columns.size());
-        for (size_t i = 0; i < columns.size(); ++i) {
-            const auto& format = r == 0 ? columns[i].header_format : columns[i].row_format;
-            row.push_back(fmt::format(fmt::runtime(std::string_view(format)), columns[i].values[r]));
-        }
-        fmt::print(os, "{}\n", fmt::join(row, "|"));
-        if (!r) {
-            fmt::print(os, "-{}-\n", fmt::join(separators, "-+-"));
-        }
-    }
-}
-
-void print_query_results_json(std::ostream& os, const cql3::result& result) {
-    const auto& metadata = result.get_metadata();
-    const auto& column_metadata = metadata.get_names();
-
-    rjson::streaming_writer writer(os);
-
-    writer.StartArray();
-    for (const auto& row : result.result_set().rows()) {
-        writer.StartObject();
-        for (size_t i = 0; i < row.size(); ++i) {
-            writer.Key(column_metadata[i]->name->text());
-            if (!row[i] || row[i]->empty()) {
-                writer.Null();
-                continue;
-            }
-            const auto value = to_json_string(*column_metadata[i]->type, *row[i]);
-            const auto type = to_json_type(*column_metadata[i]->type, *row[i]);
-            writer.RawValue(value, type);
-        }
-        writer.EndObject();
-    }
-    writer.EndArray();
-}
-
 }
--- a/cql3/result_set.hh
+++ b/cql3/result_set.hh
@@ -73,7 +73,6 @@ public:
    uint32_t value_count() const;

    void add_non_serialized_column(lw_shared_ptr<column_specification> name);
-    void hide_last_column();

 public:
    void set_paging_state(lw_shared_ptr<const service::pager::paging_state> paging_state);
--- a/cql3/statements/alter_keyspace_statement.cc
+++ b/cql3/statements/alter_keyspace_statement.cc
@@ -225,9 +225,10 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
            //    The second hyphen is not really true because currently topological changes can
            //    disturb it (see scylladb/scylladb#23345), but we ignore that.
            locator::assert_rf_rack_valid_keyspace(_name, tmptr, *rs);
-        } catch (const std::invalid_argument& e) {
+        } catch (const std::exception& e) {
            if (replica::database::enforce_rf_rack_validity_for_keyspace(qp.db().get_config(), *ks_md)) {
-                // wrap the exception manually here in a type that can be passed to the user.
+                // There's no guarantee what the type of the exception will be, so we need to
+                // wrap it manually here in a type that can be passed to the user.
                throw exceptions::invalid_request_exception(e.what());
            } else {
                // Even when RF-rack-validity is not enforced for the keyspace, we'd
--- a/cql3/statements/create_index_statement.cc
+++ b/cql3/statements/create_index_statement.cc
@@ -8,7 +8,6 @@
 * SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
 */

-#include <boost/algorithm/string.hpp>
 #include <seastar/core/coroutine.hh>
 #include "create_index_statement.hh"
 #include "db/config.hh"
@@ -38,7 +37,6 @@
 #include "types/concrete_types.hh"
 #include "db/tags/extension.hh"
 #include "tombstone_gc_extension.hh"
-#include "index/secondary_index.hh"

 #include <stdexcept>

@@ -118,15 +116,6 @@ static data_type type_for_computed_column(cql3::statements::index_target::target
    }
 }

-static bool is_vector_capable_class(const sstring& class_name) {
-    return boost::iequals(class_name, "vector_index");
-}
-
-static bool is_vector_index(const index_options_map& options) {
-    auto class_it = options.find(db::index::secondary_index::custom_class_option_name);
-    return class_it != options.end() && is_vector_capable_class(class_it->second);
-}
-
 view_ptr create_index_statement::create_view_for_index(const schema_ptr schema, const index_metadata& im,
        const data_dictionary::database& db) const
 {
@@ -277,7 +266,7 @@ create_index_statement::validate(query_processor& qp, const service::client_stat
    _idx_properties->validate();

    // FIXME: This is ugly and can be improved.
-    const bool is_vector_index = _idx_properties->custom_class && is_vector_capable_class(*_idx_properties->custom_class);
+    const bool is_vector_index = _idx_properties->custom_class && *_idx_properties->custom_class == "vector_index";
    const bool uses_view_properties = _view_properties.properties()->count() > 0
            || _view_properties.use_compact_storage()
            || _view_properties.defined_ordering().size() > 0;
@@ -459,15 +448,11 @@ void create_index_statement::validate_for_local_index(const schema& schema) cons
                auto base_pk_identifiers = *index_pk | std::views::transform([&schema] (const ::shared_ptr<column_identifier::raw>& raw_ident) {
                    return raw_ident->prepare_column_identifier(schema);
                });
-                auto const is_vector_index = _idx_properties->custom_class && is_vector_capable_class(*_idx_properties->custom_class);
                auto remaining_base_pk_columns = schema.partition_key_columns();
                auto next_expected_base_column = remaining_base_pk_columns.begin();
                for (const auto& ident : base_pk_identifiers) {
                    auto it = schema.columns_by_name().find(ident->name());
                    if (it == schema.columns_by_name().end() || !it->second->is_partition_key()) {
-                        if (is_vector_index) {
-                            throw exceptions::invalid_request_exception(format("Local vector index definition must contain partition key's columns only. Redundant column: {}", ident->to_string()));
-                        }
                        throw exceptions::invalid_request_exception(format("Local index definition must contain full partition key only. Redundant column: {}", ident->to_string()));
                    }
                    if (next_expected_base_column == remaining_base_pk_columns.end()) {
@@ -478,7 +463,7 @@ void create_index_statement::validate_for_local_index(const schema& schema) cons
                    }
                    ++next_expected_base_column;
                }
-                if (!is_vector_index && next_expected_base_column != remaining_base_pk_columns.end()) {
+                if (next_expected_base_column != remaining_base_pk_columns.end()) {
                    throw exceptions::invalid_request_exception(format("Local index definition must contain full partition key only. Missing column: {}", next_expected_base_column->name_as_text()));
                }
                if (_raw_targets.size() == 1) {
@@ -712,9 +697,7 @@ index_metadata create_index_statement::make_index_metadata(const std::vector<::s
                                                           const index_options_map& options)
 {
    index_options_map new_options = options;
-    auto target_option = is_vector_index(options)
-        ? secondary_index::vector_index::serialize_targets(targets)
-        : secondary_index::target_parser::serialize_targets(targets);
+    auto target_option = secondary_index::target_parser::serialize_targets(targets);
    new_options.emplace(index_target::target_option_name, target_option);

    const auto& first_target = targets.front()->value;
--- a/cql3/statements/create_keyspace_statement.cc
+++ b/cql3/statements/create_keyspace_statement.cc
@@ -123,9 +123,10 @@ future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, utils::chun
            // We hold a group0_guard, so it's correct to check this here.
            // The topology or schema cannot change while we're performing this query.
            locator::assert_rf_rack_valid_keyspace(_name, tmptr, *rs);
-        } catch (const std::invalid_argument& e) {
+        } catch (const std::exception& e) {
            if (replica::database::enforce_rf_rack_validity_for_keyspace(cfg, *ksm)) {
-                // wrap the exception in a type that can be passed to the user.
+                // There's no guarantee what the type of the exception will be, so we need to
+                // wrap it manually here in a type that can be passed to the user.
                throw exceptions::invalid_request_exception(e.what());
            } else {
                // Even when RF-rack-validity is not enforced for the keyspace, we'd
--- a/cql3/statements/create_table_statement.cc
+++ b/cql3/statements/create_table_statement.cc
@@ -31,6 +31,8 @@
 #include "db/config.hh"
 #include "compaction/time_window_compaction_strategy.hh"

+bool is_internal_keyspace(std::string_view name);
+
 namespace cql3 {

 namespace statements {
@@ -122,6 +124,10 @@ void create_table_statement::apply_properties_to(schema_builder& builder, const
        addColumnMetadataFromAliases(cfmd, Collections.singletonList(valueAlias), defaultValidator, ColumnDefinition.Kind.COMPACT_VALUE);
 #endif

+    if (!_properties->get_compression_options() && !is_internal_keyspace(keyspace())) {
+        builder.set_compressor_params(db.get_config().sstable_compression_user_table_options());
+    }
+
    _properties->apply_to_builder(builder, _properties->make_schema_extensions(db.extensions()), db, keyspace(), true);
 }

--- a/cql3/statements/describe_statement.cc
+++ b/cql3/statements/describe_statement.cc
@@ -23,7 +23,6 @@
 #include "index/vector_index.hh"
 #include "schema/schema.hh"
 #include "service/client_state.hh"
-#include "service/paxos/paxos_state.hh"
 #include "types/types.hh"
 #include "cql3/query_processor.hh"
 #include "cql3/cql_statement.hh"
@@ -330,19 +329,6 @@ future<std::vector<description>> table(const data_dictionary::database& db, cons
                "*/",
                *table_desc.create_statement);

-        table_desc.create_statement = std::move(os).to_managed_string();
-    } else if (service::paxos::paxos_store::try_get_base_table(name)) {
-        // Paxos state table is internally managed by Scylla and it shouldn't be exposed to the user.
-        // The table is allowed to be described as a comment to ease administrative work but it's hidden from all listings.
-        fragmented_ostringstream os{};
-
-        fmt::format_to(os.to_iter(),
-                "/* Do NOT execute this statement! It's only for informational purposes.\n"
-                "   A paxos state table is created automatically when enabling LWT on a base table.\n"
-                "\n{}\n"
-                "*/",
-                *table_desc.create_statement);
-
        table_desc.create_statement = std::move(os).to_managed_string();
    }
    result.push_back(std::move(table_desc));
@@ -378,7 +364,7 @@ future<std::vector<description>> table(const data_dictionary::database& db, cons
 future<std::vector<description>> tables(const data_dictionary::database& db, const lw_shared_ptr<keyspace_metadata>& ks, std::optional<bool> with_internals = std::nullopt) {
    auto& replica_db = db.real_database();
    auto tables = ks->tables() | std::views::filter([&replica_db] (const schema_ptr& s) {
-        return !cdc::is_log_for_some_table(replica_db, s->ks_name(), s->cf_name()) && !service::paxos::paxos_store::try_get_base_table(s->cf_name());
+        return !cdc::is_log_for_some_table(replica_db, s->ks_name(), s->cf_name());
    }) | std::ranges::to<std::vector<schema_ptr>>();
    std::ranges::sort(tables, std::ranges::less(), std::mem_fn(&schema::cf_name));

--- a/cql3/statements/ks_prop_defs.cc
+++ b/cql3/statements/ks_prop_defs.cc
@@ -98,7 +98,6 @@ static locator::replication_strategy_config_options prepare_options(
        const sstring& strategy_class,
        const locator::token_metadata& tm,
        bool rf_rack_valid_keyspaces,
-        bool enforce_rack_list,
        locator::replication_strategy_config_options options,
        const locator::replication_strategy_config_options& old_options,
        bool rack_list_enabled,
@@ -108,7 +107,7 @@ static locator::replication_strategy_config_options prepare_options(
    auto is_nts = locator::abstract_replication_strategy::to_qualified_class_name(strategy_class) == "org.apache.cassandra.locator.NetworkTopologyStrategy";
    auto is_alter = !old_options.empty();
    const auto& all_dcs = tm.get_datacenter_racks_token_owners();
-    auto auto_expand_racks = uses_tablets && rack_list_enabled && (rf_rack_valid_keyspaces || enforce_rack_list);
+    auto auto_expand_racks = uses_tablets && rf_rack_valid_keyspaces && rack_list_enabled;

    logger.debug("prepare_options: {}: is_nts={} auto_expand_racks={} rack_list_enabled={} old_options={} new_options={} all_dcs={}",
                 strategy_class, is_nts, auto_expand_racks, rack_list_enabled, old_options, options, all_dcs);
@@ -418,7 +417,7 @@ lw_shared_ptr<data_dictionary::keyspace_metadata> ks_prop_defs::as_ks_metadata(s
    auto initial_tablets = get_initial_tablets(default_initial_tablets, cfg.enforce_tablets());
    bool uses_tablets = initial_tablets.has_value();
    bool rack_list_enabled = utils::get_local_injector().enter("create_with_numeric") ? false : feat.rack_list_rf;
-    auto options = prepare_options(sc, tm, cfg.rf_rack_valid_keyspaces(), cfg.enforce_rack_list(), get_replication_options(), {}, rack_list_enabled, uses_tablets);
+    auto options = prepare_options(sc, tm, cfg.rf_rack_valid_keyspaces(), get_replication_options(), {}, rack_list_enabled, uses_tablets);
    return data_dictionary::keyspace_metadata::new_keyspace(ks_name, sc,
            std::move(options), initial_tablets, get_consistency_option(), get_boolean(KW_DURABLE_WRITES, true), get_storage_options());
 }
@@ -435,7 +434,7 @@ lw_shared_ptr<data_dictionary::keyspace_metadata> ks_prop_defs::as_ks_metadata_u
    auto sc = get_replication_strategy_class();
    bool rack_list_enabled = utils::get_local_injector().enter("create_with_numeric") ? false : feat.rack_list_rf;
    if (sc) {
-        options = prepare_options(*sc, tm, cfg.rf_rack_valid_keyspaces(), cfg.enforce_rack_list(), get_replication_options(), old_options, rack_list_enabled, uses_tablets);
+        options = prepare_options(*sc, tm, cfg.rf_rack_valid_keyspaces(), get_replication_options(), old_options, rack_list_enabled, uses_tablets);
    } else {
        sc = old->strategy_name();
        options = old_options;
--- a/cql3/statements/list_effective_service_level_statement.cc
+++ b/cql3/statements/list_effective_service_level_statement.cc
@@ -30,14 +30,13 @@ list_effective_service_level_statement::prepare(data_dictionary::database db, cq
    return std::make_unique<prepared_statement>(audit_info(), ::make_shared<list_effective_service_level_statement>(*this));
 }

-shared_ptr<const cql3::metadata> list_effective_service_level_statement::get_result_metadata() const {
-    return ::make_shared<cql3::metadata>(
-            std::vector<lw_shared_ptr<column_specification>>{
-                    make_column_spec("QOS", "effective_service_level", "service_level_option", utf8_type),
-                    make_column_spec("QOS", "effective_service_level", "effective_service_level", utf8_type),
-                    make_column_spec("QOS", "effective_service_level", "value", utf8_type)
-            });
-}
+static auto make_column(sstring name, const shared_ptr<const abstract_type> type) {
+    return make_lw_shared<column_specification>(
+        "QOS",
+        "effective_service_level",
+        ::make_shared<column_identifier>(std::move(name), true),
+        type);
+};

 static bytes_opt decompose_timeout (const qos::service_level_options::timeout_type& duration) {
    return std::visit(overloaded_functor{
@@ -70,6 +69,11 @@ static bytes_opt decompose_shares(const qos::service_level_options::shares_type&

 future<::shared_ptr<cql_transport::messages::result_message>>
 list_effective_service_level_statement::execute(query_processor& qp, service::query_state& state, const query_options&, std::optional<service::group0_guard>) const {
+    static thread_local const std::vector<lw_shared_ptr<column_specification>> metadata({
+        make_column("service_level_option", utf8_type),
+        make_column("effective_service_level", utf8_type),
+        make_column("value", utf8_type)
+    });
    auto& role_manager = state.get_client_state().get_auth_service()->underlying_role_manager();

    if (!co_await role_manager.exists(_role_name)) {
@@ -83,7 +87,7 @@ list_effective_service_level_statement::execute(query_processor& qp, service::qu
        throw exceptions::invalid_request_exception(format("Role {} doesn't have assigned any service level", _role_name));
    }

-    auto rs = std::make_unique<result_set>(::make_shared<cql3::metadata>(*get_result_metadata()));
+    auto rs = std::make_unique<result_set>(metadata);
    rs->add_row({
        utf8_type->decompose("workload_type"),
        utf8_type->decompose(slo->effective_names->workload),
@@ -106,4 +110,4 @@ list_effective_service_level_statement::execute(query_processor& qp, service::qu

 }

-}
+}
--- a/cql3/statements/list_effective_service_level_statement.hh
+++ b/cql3/statements/list_effective_service_level_statement.hh
@@ -21,11 +21,9 @@ public:

    virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;

-    virtual seastar::shared_ptr<const metadata> get_result_metadata() const override;
-
    virtual future<::shared_ptr<cql_transport::messages::result_message>>
    execute(query_processor&, service::query_state&, const query_options&, std::optional<service::group0_guard>) const override;
 };

 }
-}
+}
--- a/cql3/statements/list_permissions_statement.cc
+++ b/cql3/statements/list_permissions_statement.cc
@@ -15,18 +15,9 @@
 #include "auth/authorizer.hh"
 #include "auth/common.hh"
 #include "cql3/result_set.hh"
-#include "db/system_keyspace.hh"
+#include "cql3/column_identifier.hh"
 #include "transport/messages/result_message.hh"

-shared_ptr<const cql3::metadata> cql3::statements::list_permissions_statement::get_result_metadata() const {
-    return ::make_shared<cql3::metadata>(
-            std::vector<lw_shared_ptr<cql3::column_specification>>{
-                    make_column_spec(db::system_keyspace::NAME, "permissions", "role", utf8_type),
-                    make_column_spec(db::system_keyspace::NAME, "permissions", "username", utf8_type),
-                    make_column_spec(db::system_keyspace::NAME, "permissions", "resource", utf8_type),
-                    make_column_spec(db::system_keyspace::NAME, "permissions", "permission", utf8_type)});
-}
-
 cql3::statements::list_permissions_statement::list_permissions_statement(
        auth::permission_set permissions,
        std::optional<auth::resource> resource,
@@ -89,6 +80,18 @@ cql3::statements::list_permissions_statement::execute(
        service::query_state& state,
        const query_options& options,
        std::optional<service::group0_guard> guard) const {
+    auto make_column = [auth_ks = auth::get_auth_ks_name(qp)](sstring name) {
+        return make_lw_shared<column_specification>(
+                auth_ks,
+                "permissions",
+                ::make_shared<column_identifier>(std::move(name), true),
+                utf8_type);
+    };
+
+    std::vector<lw_shared_ptr<column_specification>> metadata({
+        make_column("role"), make_column("username"), make_column("resource"), make_column("permission")
+    });
+
    const auto make_resource_filter = [this]()
            -> std::optional<std::pair<auth::resource, auth::recursive_permissions>> {
        if (!_resource) {
@@ -101,7 +104,6 @@ cql3::statements::list_permissions_statement::execute(
    };

    const auto& as = *state.get_client_state().get_auth_service();
-    auto metadata = ::make_shared<cql3::metadata>(*get_result_metadata());

    return do_with(make_resource_filter(), [this, &as, metadata = std::move(metadata)](const auto& resource_filter) mutable {
        return auth::list_filtered_permissions(
--- a/cql3/statements/list_permissions_statement.hh
+++ b/cql3/statements/list_permissions_statement.hh
@@ -34,8 +34,6 @@ public:

    std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;

-    virtual seastar::shared_ptr<const metadata> get_result_metadata() const override;
-
    void validate(query_processor&, const service::client_state&) const override;

    future<> check_access(query_processor& qp, const service::client_state&) const override;
--- a/cql3/statements/list_roles_statement.hh
+++ b/cql3/statements/list_roles_statement.hh
@@ -35,8 +35,6 @@ public:

    std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;

-    virtual seastar::shared_ptr<const metadata> get_result_metadata() const override;
-
    virtual future<> check_access(query_processor& qp, const service::client_state&) const override;

    virtual future<::shared_ptr<cql_transport::messages::result_message>>
--- a/cql3/statements/list_service_level_attachments_statement.cc
+++ b/cql3/statements/list_service_level_attachments_statement.cc
@@ -8,6 +8,7 @@

 #include "seastarx.hh"
 #include "cql3/statements/list_service_level_attachments_statement.hh"
+#include "cql3/column_identifier.hh"
 #include "transport/messages/result_message.hh"
 #include "service/client_state.hh"
 #include "service/query_state.hh"
@@ -16,15 +17,6 @@ namespace cql3 {

 namespace statements {

-shared_ptr<const cql3::metadata> list_service_level_attachments_statement::get_result_metadata() const {
-    static thread_local const std::vector<lw_shared_ptr<column_specification>> metadata({
-        make_column_spec("QOS", "service_levels_attachments", "role", utf8_type),
-        make_column_spec("QOS", "service_levels_attachments", "service_level", utf8_type)
-    });
-
-    return ::make_shared<cql3::metadata>(metadata);
-}
-
 list_service_level_attachments_statement::list_service_level_attachments_statement(sstring role_name) :
    _role_name(role_name), _describe_all(false) {
 }
@@ -48,7 +40,19 @@ list_service_level_attachments_statement::execute(query_processor& qp,
        service::query_state &state,
        const query_options &,
        std::optional<service::group0_guard> guard) const {
-    auto metadata = ::make_shared<cql3::metadata>(*get_result_metadata());
+
+    static auto make_column = [] (sstring name, const shared_ptr<const abstract_type> type) {
+        return make_lw_shared<column_specification>(
+                "QOS",
+                "service_levels_attachments",
+                ::make_shared<column_identifier>(std::move(name), true),
+                type);
+    };
+
+    static thread_local const std::vector<lw_shared_ptr<column_specification>> metadata({
+        make_column("role", utf8_type), make_column("service_level", utf8_type)
+    });
+

    return make_ready_future().then([this, &state] () {
        if (_describe_all) {
@@ -63,7 +67,7 @@ list_service_level_attachments_statement::execute(query_processor& qp,
            });

        }
-    }).then([metadata = std::move(metadata)] (std::unordered_map<sstring, sstring> roles_to_att_val) {
+    }).then([] (std::unordered_map<sstring, sstring> roles_to_att_val) {

        auto rs = std::make_unique<result_set>(metadata);
        for (auto&& role_to_sl : roles_to_att_val) {
--- a/cql3/statements/list_service_level_attachments_statement.hh
+++ b/cql3/statements/list_service_level_attachments_statement.hh
@@ -22,7 +22,6 @@ public:
    list_service_level_attachments_statement(sstring role_name);
    list_service_level_attachments_statement();
    std::unique_ptr<cql3::statements::prepared_statement> prepare(data_dictionary::database db, cql_stats &stats) override;
-    virtual seastar::shared_ptr<const metadata> get_result_metadata() const override;
    virtual future<> check_access(query_processor& qp, const service::client_state&) const override;
    virtual future<::shared_ptr<cql_transport::messages::result_message>>
    execute(query_processor&, service::query_state&, const query_options&, std::optional<service::group0_guard> guard) const override;
--- a/cql3/statements/list_service_level_statement.cc
+++ b/cql3/statements/list_service_level_statement.cc
@@ -8,6 +8,7 @@

 #include "seastarx.hh"
 #include "cql3/statements/list_service_level_statement.hh"
+#include "cql3/column_identifier.hh"
 #include "service/qos/service_level_controller.hh"
 #include "transport/messages/result_message.hh"
 #include "utils/overloaded_functor.hh"
@@ -18,20 +19,6 @@ namespace cql3 {

 namespace statements {

-shared_ptr<const cql3::metadata> list_service_level_statement::get_result_metadata() const {
-    std::vector<lw_shared_ptr<column_specification>> metadata{
-            make_column_spec("QOS", "service_levels", "service_level", utf8_type),
-            make_column_spec("QOS", "service_levels", "timeout", duration_type),
-            make_column_spec("QOS", "service_levels", "workload_type", utf8_type),
-            make_column_spec("QOS", "service_levels", "shares", int32_type),
-    };
-    if (_describe_all) {
-        metadata.push_back(make_column_spec("QOS", "service_levels", "percentage of all service level shares", utf8_type));
-    }
-
-    return ::make_shared<cql3::metadata>(std::move(metadata));
-}
-
 list_service_level_statement::list_service_level_statement(sstring service_level, bool describe_all) :
    _service_level(service_level), _describe_all(describe_all) {
 }
@@ -51,7 +38,23 @@ list_service_level_statement::execute(query_processor& qp,
        service::query_state &state,
        const query_options &,
        std::optional<service::group0_guard> guard) const {
-    auto metadata = ::make_shared<cql3::metadata>(*get_result_metadata());
+
+    static auto make_column = [] (sstring name, const shared_ptr<const abstract_type> type) {
+        return make_lw_shared<column_specification>(
+                "QOS",
+                "service_levels",
+                ::make_shared<column_identifier>(std::move(name), true),
+                type);
+    };
+
+    std::vector<lw_shared_ptr<column_specification>> metadata({make_column("service_level", utf8_type),
+        make_column("timeout", duration_type),
+        make_column("workload_type", utf8_type),
+        make_column("shares", int32_type),
+    });
+    if (_describe_all) {
+        metadata.push_back(make_column("percentage of all service level shares", utf8_type));
+    }

    return make_ready_future().then([this, &state] () {
                                  if (_describe_all) {
--- a/cql3/statements/list_service_level_statement.hh
+++ b/cql3/statements/list_service_level_statement.hh
@@ -21,7 +21,6 @@ class list_service_level_statement final : public service_level_statement {
 public:
    list_service_level_statement(sstring service_level, bool describe_all);
    std::unique_ptr<cql3::statements::prepared_statement> prepare(data_dictionary::database db, cql_stats &stats) override;
-    virtual seastar::shared_ptr<const metadata> get_result_metadata() const override;
    virtual future<> check_access(query_processor& qp, const service::client_state&) const override;
    virtual future<::shared_ptr<cql_transport::messages::result_message>>
    execute(query_processor&, service::query_state&, const query_options&, std::optional<service::group0_guard> guard) const override;
--- a/cql3/statements/list_users_statement.cc
+++ b/cql3/statements/list_users_statement.cc
@@ -12,17 +12,10 @@
 #include "list_users_statement.hh"
 #include "cql3/query_processor.hh"
 #include "cql3/query_options.hh"
+#include "cql3/column_identifier.hh"
 #include "auth/common.hh"
-#include "db/system_keyspace.hh"
 #include "transport/messages/result_message.hh"

-shared_ptr<const cql3::metadata> cql3::statements::list_users_statement::get_result_metadata() const {
-    return ::make_shared<cql3::metadata>(
-        std::vector<lw_shared_ptr<cql3::column_specification>>{
-                cql3::make_column_spec(db::system_keyspace::NAME, "users", "name", utf8_type),
-                cql3::make_column_spec(db::system_keyspace::NAME, "users", "super", boolean_type)});
-}
-
 std::unique_ptr<cql3::statements::prepared_statement> cql3::statements::list_users_statement::prepare(
                data_dictionary::database db, cql_stats& stats) {
    return std::make_unique<prepared_statement>(audit_info(), ::make_shared<list_users_statement>(*this));
@@ -35,7 +28,20 @@ future<> cql3::statements::list_users_statement::check_access(query_processor& q

 future<::shared_ptr<cql_transport::messages::result_message>>
 cql3::statements::list_users_statement::execute(query_processor& qp, service::query_state& state, const query_options& options, std::optional<service::group0_guard> guard) const {
-    auto metadata = ::make_shared<cql3::metadata>(*get_result_metadata());
+    static const sstring virtual_table_name("users");
+
+    const auto make_column_spec = [auth_ks = auth::get_auth_ks_name(qp)](const sstring& name, const ::shared_ptr<const abstract_type>& ty) {
+        return make_lw_shared<column_specification>(
+            auth_ks,
+            virtual_table_name,
+            ::make_shared<column_identifier>(name, true),
+            ty);
+    };
+
+    auto metadata = ::make_shared<cql3::metadata>(
+        std::vector<lw_shared_ptr<column_specification>>{
+                make_column_spec("name", utf8_type),
+                make_column_spec("super", boolean_type)});

    auto make_results = [metadata = std::move(metadata)](const auth::service& as, std::unordered_set<sstring>&& roles) mutable {
        using cql_transport::messages::result_message;
--- a/cql3/statements/list_users_statement.hh
+++ b/cql3/statements/list_users_statement.hh
@@ -23,8 +23,6 @@ public:

    std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;

-    virtual seastar::shared_ptr<const metadata> get_result_metadata() const override;
-
    future<> check_access(query_processor& qp, const service::client_state&) const override;
    future<::shared_ptr<cql_transport::messages::result_message>> execute(query_processor&
                    , service::query_state&
--- a/cql3/statements/modification_statement.cc
+++ b/cql3/statements/modification_statement.cc
@@ -11,7 +11,7 @@
 #include "utils/assert.hh"
 #include "cql3/cql_statement.hh"
 #include "cql3/statements/modification_statement.hh"
-#include "cql3/statements/broadcast_modification_statement.hh"
+#include "cql3/statements/strongly_consistent_modification_statement.hh"
 #include "cql3/statements/raw/modification_statement.hh"
 #include "cql3/statements/prepared_statement.hh"
 #include "cql3/expr/expr-utils.hh"
@@ -29,8 +29,6 @@
 #include "cql3/query_processor.hh"
 #include "service/storage_proxy.hh"
 #include "service/broadcast_tables/experimental/lang.hh"
-#include "cql3/statements/strong_consistency/modification_statement.hh"
-#include "cql3/statements/strong_consistency/statement_helpers.hh"

 #include <boost/lexical_cast.hpp>

@@ -548,7 +546,7 @@ modification_statement::process_where_clause(data_dictionary::database db, expr:
    }
 }

-::shared_ptr<broadcast_modification_statement>
+::shared_ptr<strongly_consistent_modification_statement>
 modification_statement::prepare_for_broadcast_tables() const {
    // FIXME: implement for every type of `modification_statement`.
    throw service::broadcast_tables::unsupported_operation_error{};
@@ -556,27 +554,24 @@ modification_statement::prepare_for_broadcast_tables() const {

 namespace raw {

+::shared_ptr<cql_statement_opt_metadata>
+modification_statement::prepare_statement(data_dictionary::database db, prepare_context& ctx, cql_stats& stats) {
+    ::shared_ptr<cql3::statements::modification_statement> statement = prepare(db, ctx, stats);
+
+    if (service::broadcast_tables::is_broadcast_table_statement(keyspace(), column_family())) {
+        return statement->prepare_for_broadcast_tables();
+    } else {
+        return statement;
+    }
+}
+
 std::unique_ptr<prepared_statement>
 modification_statement::prepare(data_dictionary::database db, cql_stats& stats) {
    schema_ptr schema = validation::validate_column_family(db, keyspace(), column_family());
    auto meta = get_prepare_context();
-
-    auto statement = std::invoke([&] -> shared_ptr<cql_statement> {
-        auto result = prepare(db, meta, stats);
-
-        if (strong_consistency::is_strongly_consistent(db, schema->ks_name())) {
-            return ::make_shared<strong_consistency::modification_statement>(std::move(result));
-        }
-
-        if (service::broadcast_tables::is_broadcast_table_statement(keyspace(), column_family())) {
-            return result->prepare_for_broadcast_tables();
-        }
-        return result;
-    });
-
+    auto statement = prepare_statement(db, meta, stats);
    auto partition_key_bind_indices = meta.get_partition_key_bind_indexes(*schema);
-    return std::make_unique<prepared_statement>(audit_info(), std::move(statement), meta, 
-        std::move(partition_key_bind_indices));
+    return std::make_unique<prepared_statement>(audit_info(), std::move(statement), meta, std::move(partition_key_bind_indices));
 }

 ::shared_ptr<cql3::statements::modification_statement>
--- a/cql3/statements/modification_statement.hh
+++ b/cql3/statements/modification_statement.hh
@@ -30,7 +30,7 @@ class operation;

 namespace statements {

-class broadcast_modification_statement;
+class strongly_consistent_modification_statement;

 namespace raw { class modification_statement; }

@@ -113,15 +113,15 @@ public:

    virtual void add_update_for_key(mutation& m, const query::clustering_range& range, const update_parameters& params, const json_cache_opt& json_cache) const = 0;

-    uint32_t get_bound_terms() const override;
+    virtual uint32_t get_bound_terms() const override;

-    const sstring& keyspace() const;
+    virtual const sstring& keyspace() const;

-    const sstring& column_family() const;
+    virtual const sstring& column_family() const;

-    bool is_counter() const;
+    virtual bool is_counter() const;

-    bool is_view() const;
+    virtual bool is_view() const;

    int64_t get_timestamp(int64_t now, const query_options& options) const;

@@ -129,12 +129,12 @@ public:

    std::optional<gc_clock::duration> get_time_to_live(const query_options& options) const;

-    future<> check_access(query_processor& qp, const service::client_state& state) const override;
+    virtual future<> check_access(query_processor& qp, const service::client_state& state) const override;

    // Validate before execute, using client state and current schema
    void validate(query_processor&, const service::client_state& state) const override;

-    bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const override;
+    virtual bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const override;

    void add_operation(::shared_ptr<operation> op);

@@ -256,9 +256,7 @@ public:

    virtual json_cache_opt maybe_prepare_json_cache(const query_options& options) const;

-    virtual ::shared_ptr<broadcast_modification_statement> prepare_for_broadcast_tables() const;
-
-    db::timeout_clock::duration get_timeout(const service::client_state& state, const query_options& options) const;
+    virtual ::shared_ptr<strongly_consistent_modification_statement> prepare_for_broadcast_tables() const;

 protected:
    /**
@@ -266,7 +264,9 @@ protected:
     * processed to check that they are compatible.
     * @throws InvalidRequestException
     */
-    void validate_where_clause_for_conditions() const;
+    virtual void validate_where_clause_for_conditions() const;
+
+    db::timeout_clock::duration get_timeout(const service::client_state& state, const query_options& options) const;

    friend class raw::modification_statement;
 };
--- a/cql3/statements/raw/modification_statement.hh
+++ b/cql3/statements/raw/modification_statement.hh
@@ -40,6 +40,7 @@ protected:

 public:
    virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
+    ::shared_ptr<cql_statement_opt_metadata> prepare_statement(data_dictionary::database db, prepare_context& ctx, cql_stats& stats);
    ::shared_ptr<cql3::statements::modification_statement> prepare(data_dictionary::database db, prepare_context& ctx, cql_stats& stats) const;
    void add_raw(sstring&& raw) { _raw_cql = std::move(raw); }
    const sstring& get_raw_cql() const { return _raw_cql; }
--- a/cql3/statements/raw/select_statement.hh
+++ b/cql3/statements/raw/select_statement.hh
@@ -131,6 +131,8 @@ private:

    void verify_ordering_is_valid(const prepared_orderings_type&, const schema&, const restrictions::statement_restrictions& restrictions) const;

+    prepared_ann_ordering_type prepare_ann_ordering(const schema& schema, prepare_context& ctx, data_dictionary::database db) const;
+
    // Checks whether this ordering reverses all results.
    // We only allow leaving select results unchanged or reversing them.
    bool is_ordering_reversed(const prepared_orderings_type&) const;
--- a/cql3/statements/role-management-statements.cc
+++ b/cql3/statements/role-management-statements.cc
@@ -27,7 +27,6 @@
 #include "cql3/statements/list_roles_statement.hh"
 #include "cql3/statements/revoke_role_statement.hh"
 #include "cql3/statements/request_validations.hh"
-#include "db/system_keyspace.hh"
 #include "exceptions/exceptions.hh"
 #include "service/storage_proxy.hh"
 #include "transport/messages/result_message.hh"
@@ -348,17 +347,6 @@ std::unique_ptr<prepared_statement> list_roles_statement::prepare(
    return std::make_unique<prepared_statement>(audit_info(), ::make_shared<list_roles_statement>(*this));
 }

-shared_ptr<const cql3::metadata> list_roles_statement::get_result_metadata() const {
-    static const thread_local auto custom_options_type = map_type_impl::get_instance(utf8_type, utf8_type, true);
-
-    return ::make_shared<cql3::metadata>(
-            std::vector<lw_shared_ptr<column_specification>>{
-                    make_column_spec(db::system_keyspace::NAME, "roles", "role", utf8_type),
-                    make_column_spec(db::system_keyspace::NAME, "roles", "super", boolean_type),
-                    make_column_spec(db::system_keyspace::NAME, "roles", "login", boolean_type),
-                    make_column_spec(db::system_keyspace::NAME, "roles", "options", custom_options_type)});
-}
-
 future<> list_roles_statement::check_access(query_processor& qp, const service::client_state& state) const {
    state.ensure_not_anonymous();

@@ -388,8 +376,24 @@ future<> list_roles_statement::check_access(query_processor& qp, const service::

 future<result_message_ptr>
 list_roles_statement::execute(query_processor& qp, service::query_state& state, const query_options&, std::optional<service::group0_guard> guard) const {
+    static const sstring virtual_table_name("roles");
+
+    const auto make_column_spec = [auth_ks = auth::get_auth_ks_name(qp)](const sstring& name, const ::shared_ptr<const abstract_type>& ty) {
+        return make_lw_shared<column_specification>(
+                auth_ks,
+                virtual_table_name,
+                ::make_shared<column_identifier>(name, true),
+                ty);
+    };
+
    static const thread_local auto custom_options_type = map_type_impl::get_instance(utf8_type, utf8_type, true);
-    auto metadata = ::make_shared<cql3::metadata>(*get_result_metadata());
+
+    auto metadata = ::make_shared<cql3::metadata>(
+            std::vector<lw_shared_ptr<column_specification>>{
+                    make_column_spec("role", utf8_type),
+                    make_column_spec("super", boolean_type),
+                    make_column_spec("login", boolean_type),
+                    make_column_spec("options", custom_options_type)});

    auto make_results = [metadata = std::move(metadata)](
            auth::role_manager& rm,
--- a/cql3/statements/select_statement.cc
+++ b/cql3/statements/select_statement.cc
@@ -8,8 +8,6 @@
 * SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
 */

-#include "cql3/statements/strong_consistency/select_statement.hh"
-#include "cql3/statements/strong_consistency/statement_helpers.hh"
 #include "cql3/statements/select_statement.hh"
 #include "cql3/expr/expression.hh"
 #include "cql3/expr/evaluate.hh"
@@ -18,7 +16,7 @@
 #include "cql3/statements/raw/select_statement.hh"
 #include "cql3/query_processor.hh"
 #include "cql3/statements/prune_materialized_view_statement.hh"
-#include "cql3/statements/broadcast_select_statement.hh"
+#include "cql3/statements/strongly_consistent_select_statement.hh"

 #include "exceptions/exceptions.hh"
 #include <seastar/core/future.hh>
@@ -27,14 +25,12 @@
 #include "service/broadcast_tables/experimental/lang.hh"
 #include "service/qos/qos_common.hh"
 #include "transport/messages/result_message.hh"
-#include "cql3/functions/functions.hh"
 #include "cql3/functions/as_json_function.hh"
 #include "cql3/selection/selection.hh"
 #include "cql3/util.hh"
 #include "cql3/restrictions/statement_restrictions.hh"
 #include "index/secondary_index.hh"
 #include "types/vector.hh"
-#include "vector_search/filter.hh"
 #include "validation.hh"
 #include "exceptions/unrecognized_entity_exception.hh"
 #include <optional>
@@ -372,9 +368,8 @@ uint64_t select_statement::get_inner_loop_limit(uint64_t limit, bool is_aggregat
 }

 bool select_statement::needs_post_query_ordering() const {
-    // We need post-query ordering for queries with IN on the partition key and an ORDER BY
-    // and ANN index queries with rescoring.
-    return static_cast<bool>(_ordering_comparator);
+    // We need post-query ordering only for queries with IN on the partition key and an ORDER BY.
+    return _restrictions->key_is_in_relation() && !_parameters->orderings().empty();
 }

 struct select_statement_executor {
@@ -1963,139 +1958,46 @@ mutation_fragments_select_statement::do_execute(query_processor& qp, service::qu
            }));
 }

-struct ann_ordering_info {
-    secondary_index::index _index;
-    raw::select_statement::prepared_ann_ordering_type _prepared_ann_ordering;
-    bool is_rescoring_enabled;
-};
-
-static std::optional<ann_ordering_info> get_ann_ordering_info(
-        data_dictionary::database db,
-        schema_ptr schema,
-        lw_shared_ptr<const raw::select_statement::parameters> parameters,
-        prepare_context& ctx) {
-
-    if (parameters->orderings().empty()) {
-        return std::nullopt;
-    }
-
-    auto [column_id, ordering] = parameters->orderings().front();
-    const auto& ann_vector = std::get_if<raw::select_statement::ann_vector>(&ordering);
-    if (!ann_vector) {
-        return std::nullopt;
-    }
-
-    ::shared_ptr<column_identifier> column = column_id->prepare_column_identifier(*schema);
-    const column_definition* def = schema->get_column_definition(column->name());
-    if (!def) {
-        throw exceptions::invalid_request_exception(
-                fmt::format("Undefined column name {}", column->text()));
-    }
-
-    if (!def->type->is_vector() || static_cast<const vector_type_impl*>(def->type.get())->get_elements_type()->get_kind() != abstract_type::kind::float_kind) {
-        throw exceptions::invalid_request_exception("ANN ordering is only supported on float vector indexes");
-    }
-
-    auto e =  expr::prepare_expression(*ann_vector, db, schema->ks_name(), nullptr, def->column_specification);
-    expr::fill_prepare_context(e, ctx);
-
-    raw::select_statement::prepared_ann_ordering_type prepared_ann_ordering = std::make_pair(std::move(def), std::move(e));
-
+::shared_ptr<cql3::statements::select_statement> vector_indexed_table_select_statement::prepare(data_dictionary::database db, schema_ptr schema,
+        uint32_t bound_terms, lw_shared_ptr<const parameters> parameters, ::shared_ptr<selection::selection> selection,
+        ::shared_ptr<restrictions::statement_restrictions> restrictions, ::shared_ptr<std::vector<size_t>> group_by_cell_indices, bool is_reversed,
+        ordering_comparator_type ordering_comparator, prepared_ann_ordering_type prepared_ann_ordering, std::optional<expr::expression> limit,
+        std::optional<expr::expression> per_partition_limit, cql_stats& stats, std::unique_ptr<attributes> attrs) {
    auto cf = db.find_column_family(schema);
    auto& sim = cf.get_index_manager();
+    auto [index_opt, _] = restrictions->find_idx(sim);

    auto indexes = sim.list_indexes();
    auto it = std::find_if(indexes.begin(), indexes.end(), [&prepared_ann_ordering](const auto& ind) {
-        return secondary_index::vector_index::is_vector_index_on_column(ind.metadata(), prepared_ann_ordering.first->name_as_text());
+        return (ind.metadata().options().contains(db::index::secondary_index::custom_class_option_name) &&
+                       ind.metadata().options().at(db::index::secondary_index::custom_class_option_name) == ANN_CUSTOM_INDEX_OPTION) &&
+               (ind.target_column() == prepared_ann_ordering.first->name_as_text());
    });

    if (it == indexes.end()) {
        throw exceptions::invalid_request_exception("ANN ordering by vector requires the column to be indexed using 'vector_index'");
    }
+    
+    index_opt = *it;

-    return ann_ordering_info{
-        *it,
-        std::move(prepared_ann_ordering),
-        secondary_index::vector_index::is_rescoring_enabled(it->metadata().options())
-    };
-}
-
-static uint32_t add_similarity_function_to_selectors(
-        std::vector<selection::prepared_selector>& prepared_selectors,
-        const ann_ordering_info& ann_ordering_info,
-        data_dictionary::database db,
-        schema_ptr schema) {
-    auto similarity_function_name = secondary_index::vector_index::get_cql_similarity_function_name(ann_ordering_info._index.metadata().options());
-    // Create the function name
-    auto func_name = functions::function_name::native_function(sstring(similarity_function_name));
-
-    // Create the function arguments
-    std::vector<expr::expression> args;
-    args.push_back(expr::column_value(ann_ordering_info._prepared_ann_ordering.first));
-    args.push_back(ann_ordering_info._prepared_ann_ordering.second);
-
-    // Get the function object
-    std::vector<shared_ptr<assignment_testable>> provided_args;
-    provided_args.push_back(expr::as_assignment_testable(args[0], expr::type_of(args[0])));
-    provided_args.push_back(expr::as_assignment_testable(args[1], expr::type_of(args[1])));
-
-    auto func = cql3::functions::instance().get(db, schema->ks_name(), func_name, provided_args, schema->ks_name(), schema->cf_name(), nullptr);
-
-    // Create the function call expression
-    expr::function_call similarity_func_call{
-        .func = func,
-        .args = std::move(args),
-    };
-
-    // Add the similarity function as a prepared selector (last)
-    prepared_selectors.push_back(selection::prepared_selector{
-        .expr = std::move(similarity_func_call),
-        .alias = nullptr,
-    });
-    return prepared_selectors.size() - 1;
-}
-
-static select_statement::ordering_comparator_type get_similarity_ordering_comparator(std::vector<selection::prepared_selector>& prepared_selectors, uint32_t similarity_column_index) {
-    auto type = expr::type_of(prepared_selectors[similarity_column_index].expr);
-    if (type->get_kind() != abstract_type::kind::float_kind) {
-        seastar::on_internal_error(logger, "Similarity function must return float type.");
+    if (!index_opt) {
+        throw std::runtime_error("No index found.");
    }
-    return [similarity_column_index, type] (const raw::select_statement::result_row_type& r1, const raw::select_statement::result_row_type& r2) {
-        auto& c1 = r1[similarity_column_index];
-        auto& c2 = r2[similarity_column_index];
-        auto f1 = c1 ? value_cast<float>(type->deserialize(*c1)) : std::numeric_limits<float>::quiet_NaN();
-        auto f2 = c2 ? value_cast<float>(type->deserialize(*c2)) : std::numeric_limits<float>::quiet_NaN();
-        if (std::isfinite(f1) && std::isfinite(f2)) {
-            return f1 > f2;
-        }
-        return std::isfinite(f1);
-    };
-}
-
-::shared_ptr<cql3::statements::select_statement> vector_indexed_table_select_statement::prepare(data_dictionary::database db, schema_ptr schema,
-        uint32_t bound_terms, lw_shared_ptr<const parameters> parameters, ::shared_ptr<selection::selection> selection,
-        ::shared_ptr<restrictions::statement_restrictions> restrictions, ::shared_ptr<std::vector<size_t>> group_by_cell_indices, bool is_reversed,
-        ordering_comparator_type ordering_comparator, prepared_ann_ordering_type prepared_ann_ordering, std::optional<expr::expression> limit,
-        std::optional<expr::expression> per_partition_limit, cql_stats& stats, const secondary_index::index& index, std::unique_ptr<attributes> attrs) {
-
-    auto prepared_filter = vector_search::prepare_filter(*restrictions, parameters->allow_filtering());

    return ::make_shared<cql3::statements::vector_indexed_table_select_statement>(schema, bound_terms, parameters, std::move(selection), std::move(restrictions),
            std::move(group_by_cell_indices), is_reversed, std::move(ordering_comparator), std::move(prepared_ann_ordering), std::move(limit),
-            std::move(per_partition_limit), stats, index, std::move(prepared_filter), std::move(attrs));
+            std::move(per_partition_limit), stats, *index_opt, std::move(attrs));
 }

 vector_indexed_table_select_statement::vector_indexed_table_select_statement(schema_ptr schema, uint32_t bound_terms, lw_shared_ptr<const parameters> parameters,
        ::shared_ptr<selection::selection> selection, ::shared_ptr<const restrictions::statement_restrictions> restrictions,
        ::shared_ptr<std::vector<size_t>> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator,
        prepared_ann_ordering_type prepared_ann_ordering, std::optional<expr::expression> limit,
-        std::optional<expr::expression> per_partition_limit, cql_stats& stats, const secondary_index::index& index,
-        vector_search::prepared_filter prepared_filter, std::unique_ptr<attributes> attrs)
+        std::optional<expr::expression> per_partition_limit, cql_stats& stats, const secondary_index::index& index, std::unique_ptr<attributes> attrs)
    : select_statement{schema, bound_terms, parameters, selection, restrictions, group_by_cell_indices, is_reversed, ordering_comparator, limit,
              per_partition_limit, stats, std::move(attrs)}
    , _index{index}
-    , _prepared_ann_ordering(std::move(prepared_ann_ordering))
-    , _prepared_filter(std::move(prepared_filter)) {
+    , _prepared_ann_ordering(std::move(prepared_ann_ordering)) {

    if (!limit.has_value()) {
        throw exceptions::invalid_request_exception("Vector ANN queries must have a limit specified");
@@ -2130,19 +2032,13 @@ future<shared_ptr<cql_transport::messages::result_message>> vector_indexed_table

        auto timeout = db::timeout_clock::now() + get_timeout(state.get_client_state(), options);
        auto aoe = abort_on_expiry(timeout);
-        auto filter_json = _prepared_filter.to_json(options);
-        uint64_t fetch = static_cast<uint64_t>(std::ceil(limit * secondary_index::vector_index::get_oversampling(_index.metadata().options())));
        auto pkeys = co_await qp.vector_store_client().ann(
-                _schema->ks_name(), _index.metadata().name(), _schema, get_ann_ordering_vector(options), fetch, filter_json, aoe.abort_source());
+                _schema->ks_name(), _index.metadata().name(), _schema, get_ann_ordering_vector(options), limit, aoe.abort_source());
        if (!pkeys.has_value()) {
            co_await coroutine::return_exception(
                    exceptions::invalid_request_exception(std::visit(vector_search::vector_store_client::ann_error_visitor{}, pkeys.error())));
        }

-        if (pkeys->size() > limit && !secondary_index::vector_index::is_rescoring_enabled(_index.metadata().options())) {
-            pkeys->erase(pkeys->begin() + limit, pkeys->end());
-        }
-
        co_return co_await query_base_table(qp, state, options, pkeys.value(), timeout);
    });

@@ -2159,11 +2055,11 @@ void vector_indexed_table_select_statement::update_stats() const {
 }

 lw_shared_ptr<query::read_command> vector_indexed_table_select_statement::prepare_command_for_base_query(
-        query_processor& qp, service::query_state& state, const query_options& options, uint64_t fetch_limit) const {
+        query_processor& qp, service::query_state& state, const query_options& options) const {
    auto slice = make_partition_slice(options);
    return ::make_lw_shared<query::read_command>(_schema->id(), _schema->version(), std::move(slice), qp.proxy().get_max_result_size(slice),
            query::tombstone_limit(qp.proxy().get_tombstone_limit()),
-            query::row_limit(get_inner_loop_limit(fetch_limit, _selection->is_aggregate())), query::partition_limit(query::max_partitions),
+            query::row_limit(get_inner_loop_limit(get_limit(options, _limit), _selection->is_aggregate())), query::partition_limit(query::max_partitions),
            _query_start_time_point, tracing::make_trace_info(state.get_trace_state()), query_id::create_null_id(), query::is_first_page::no,
            options.get_timestamp(state));
 }
@@ -2181,7 +2077,7 @@ std::vector<float> vector_indexed_table_select_statement::get_ann_ordering_vecto
 future<::shared_ptr<cql_transport::messages::result_message>> vector_indexed_table_select_statement::query_base_table(query_processor& qp,
        service::query_state& state, const query_options& options, const std::vector<vector_search::primary_key>& pkeys,
        lowres_clock::time_point timeout) const {
-    auto command = prepare_command_for_base_query(qp, state, options, pkeys.size());
+    auto command = prepare_command_for_base_query(qp, state, options);

    // For tables without clustering columns, we can optimize by querying
    // partition ranges instead of individual primary keys, since the
@@ -2220,7 +2116,6 @@ future<::shared_ptr<cql_transport::messages::result_message>> vector_indexed_tab
            query::result_merger{command->get_row_limit(), query::max_partitions});

    co_return co_await wrap_result_to_error_message([this, &command, &options](auto result) {
-        command->set_row_limit(get_limit(options, _limit));
        return process_results(std::move(result), command, options, _query_start_time_point);
    })(std::move(result));
 }
@@ -2234,7 +2129,6 @@ future<::shared_ptr<cql_transport::messages::result_message>> vector_indexed_tab
                    {timeout, state.get_permit(), state.get_client_state(), state.get_trace_state(), {}, {}, options.get_specific_options().node_local_only},
                    std::nullopt)
            .then(wrap_result_to_error_message([this, &options, command](service::storage_proxy::coordinator_query_result qr) {
-                command->set_row_limit(get_limit(options, _limit));
                return this->process_results(std::move(qr.query_result), command, options, _query_start_time_point);
            }));
 }
@@ -2329,41 +2223,32 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d

    prepared_selectors = maybe_jsonize_select_clause(std::move(prepared_selectors), db, schema);

-    std::optional<ann_ordering_info> ann_ordering_info_opt = get_ann_ordering_info(db, schema, _parameters, ctx);
-    bool is_ann_query = ann_ordering_info_opt.has_value();
+    auto aggregation_depth = 0u;

-    if (prepared_selectors.empty() && (!_group_by_columns.empty() || (is_ann_query && ann_ordering_info_opt->is_rescoring_enabled))) {
-        // We have a "SELECT * GROUP BY" or "SELECT * ORDER BY ANN" with rescoring enabled. If we leave prepared_selectors
-        // empty, below we choose selection::wildcard() for SELECT *, and either:
-        //  - forget to do the "levellize" trick needed for the GROUP BY. See #16531.
-        //  - forget to add the similarity function needed for ORDER BY ANN with rescoring. See below.
-        // So we need to set prepared_selectors. 
-        auto all_columns = selection::selection::wildcard_columns(schema);
-        std::vector<::shared_ptr<selection::raw_selector>> select_all;
-        select_all.reserve(all_columns.size());
-        for (const column_definition *cdef : all_columns) {
-            auto name = ::make_shared<cql3::column_identifier::raw>(cdef->name_as_text(), true);
-            select_all.push_back(::make_shared<selection::raw_selector>(
-                expr::unresolved_identifier(std::move(name)), nullptr));
+    // Force aggregation if GROUP BY is used. This will wrap every column x as first(x).
+    if (!_group_by_columns.empty()) {
+        aggregation_depth = std::max(aggregation_depth, 1u);
+        if (prepared_selectors.empty()) {
+            // We have a "SELECT * GROUP BY". If we leave prepared_selectors
+            // empty, below we choose selection::wildcard() for SELECT *, and
+            // forget to do the "levellize" trick needed for the GROUP BY.
+            // So we need to set prepared_selectors. See #16531.
+            auto all_columns = selection::selection::wildcard_columns(schema);
+            std::vector<::shared_ptr<selection::raw_selector>> select_all;
+            select_all.reserve(all_columns.size());
+            for (const column_definition *cdef : all_columns) {
+                auto name = ::make_shared<cql3::column_identifier::raw>(cdef->name_as_text(), true);
+                select_all.push_back(::make_shared<selection::raw_selector>(
+                    expr::unresolved_identifier(std::move(name)), nullptr));
+            }
+            prepared_selectors = selection::raw_selector::to_prepared_selectors(select_all, *schema, db, keyspace());
        }
-        prepared_selectors = selection::raw_selector::to_prepared_selectors(select_all, *schema, db, keyspace());
    }

    for (auto& ps : prepared_selectors) {
        expr::fill_prepare_context(ps.expr, ctx);
    }

-    // Force aggregation if GROUP BY is used. This will wrap every column x as first(x).
-    auto aggregation_depth = _group_by_columns.empty() ? 0u : 1u;
-
-    select_statement::ordering_comparator_type ordering_comparator;
-    bool hide_last_column = false;
-    if (is_ann_query && ann_ordering_info_opt->is_rescoring_enabled) {
-        uint32_t similarity_column_index = add_similarity_function_to_selectors(prepared_selectors, *ann_ordering_info_opt, db, schema);
-        hide_last_column = true;
-        ordering_comparator = get_similarity_ordering_comparator(prepared_selectors, similarity_column_index);
-    }
-
    for (auto& ps : prepared_selectors) {
        aggregation_depth = std::max(aggregation_depth, expr::aggregation_depth(ps.expr));
    }
@@ -2381,11 +2266,6 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
                     ? selection::selection::wildcard(schema)
                     : selection::selection::from_selectors(db, schema, keyspace(), levellized_prepared_selectors);

-    if (is_ann_query && hide_last_column) {
-        // Hide the similarity selector from the client by reducing column_count
-        selection->get_result_metadata()->hide_last_column();
-    }
-
    // Cassandra 5.0.2 disallows PER PARTITION LIMIT with aggregate queries
    // but only if GROUP BY is not used.
    // See #9879 for more details.
@@ -2393,6 +2273,8 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
        throw exceptions::invalid_request_exception("PER PARTITION LIMIT is not allowed with aggregate queries.");
    }

+    bool is_ann_query = !_parameters->orderings().empty() && std::holds_alternative<select_statement::ann_vector>(_parameters->orderings().front().second);
+
    auto restrictions = prepare_restrictions(db, schema, ctx, selection, for_view, _parameters->allow_filtering() || is_ann_query,
            restrictions::check_indexes(!_parameters->is_mutation_fragments()));

@@ -2400,14 +2282,19 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
        validate_distinct_selection(*schema, *selection, *restrictions);
    }

+    select_statement::ordering_comparator_type ordering_comparator;
    bool is_reversed_ = false;

+    std::optional<prepared_ann_ordering_type> prepared_ann_ordering;
+
    auto orderings = _parameters->orderings();

-    if (!orderings.empty() && !is_ann_query) {
+    if (!orderings.empty()) {
        std::visit([&](auto&& ordering) {
            using T = std::decay_t<decltype(ordering)>;
-            if constexpr (!std::is_same_v<T, select_statement::ann_vector>) {
+            if constexpr (std::is_same_v<T, select_statement::ann_vector>) {
+                prepared_ann_ordering = prepare_ann_ordering(*schema, ctx, db);
+            } else {
                SCYLLA_ASSERT(!for_view);
                verify_ordering_is_allowed(*_parameters, *restrictions);
                prepared_orderings_type prepared_orderings = prepare_orderings(*schema);
@@ -2420,7 +2307,7 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
    }

    std::vector<sstring> warnings;
-    if (!is_ann_query) {
+    if (!prepared_ann_ordering.has_value()) {
        check_needs_filtering(*restrictions, db.get_config().strict_allow_filtering(), warnings);
        ensure_filtering_columns_retrieval(db, *selection, *restrictions);
    }
@@ -2474,21 +2361,7 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
                && restrictions->partition_key_restrictions_size() == schema->partition_key_size());
    };

-    if (strong_consistency::is_strongly_consistent(db, schema->ks_name())) {
-        stmt = ::make_shared<strong_consistency::select_statement>(
-                schema,
-                ctx.bound_variables_size(),
-                _parameters,
-                std::move(selection),
-                std::move(restrictions),
-                std::move(group_by_cell_indices),
-                is_reversed_,
-                std::move(ordering_comparator),
-                prepare_limit(db, ctx, _limit),
-                prepare_limit(db, ctx, _per_partition_limit),
-                stats,
-                std::move(prepared_attrs));
-    } else if (_parameters->is_prune_materialized_view()) {
+    if (_parameters->is_prune_materialized_view()) {
        stmt = ::make_shared<cql3::statements::prune_materialized_view_statement>(
                schema,
                ctx.bound_variables_size(),
@@ -2517,10 +2390,10 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
                prepare_limit(db, ctx, _per_partition_limit),
                stats,
                std::move(prepared_attrs));
-    } else if (is_ann_query) {
+    } else if (prepared_ann_ordering) {
        stmt = vector_indexed_table_select_statement::prepare(db, schema, ctx.bound_variables_size(), _parameters, std::move(selection), std::move(restrictions),
-                std::move(group_by_cell_indices), is_reversed_, std::move(ordering_comparator), std::move(ann_ordering_info_opt->_prepared_ann_ordering),
-                prepare_limit(db, ctx, _limit), prepare_limit(db, ctx, _per_partition_limit), stats, ann_ordering_info_opt->_index, std::move(prepared_attrs));
+                std::move(group_by_cell_indices), is_reversed_, std::move(ordering_comparator), std::move(*prepared_ann_ordering),
+                prepare_limit(db, ctx, _limit), prepare_limit(db, ctx, _per_partition_limit), stats, std::move(prepared_attrs));
    } else if (restrictions->uses_secondary_indexing()) {
        stmt = view_indexed_table_select_statement::prepare(
                db,
@@ -2552,7 +2425,7 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
            std::move(prepared_attrs)
        );
    } else if (service::broadcast_tables::is_broadcast_table_statement(keyspace(), column_family())) {
-        stmt = ::make_shared<cql3::statements::broadcast_select_statement>(
+        stmt = ::make_shared<cql3::statements::strongly_consistent_select_statement>(
                schema,
                ctx.bound_variables_size(),
                _parameters,
@@ -2742,6 +2615,28 @@ void select_statement::verify_ordering_is_valid(const prepared_orderings_type& o
    }
 }

+select_statement::prepared_ann_ordering_type select_statement::prepare_ann_ordering(const schema& schema, prepare_context& ctx, data_dictionary::database db) const {
+    auto [column_id, ordering] = _parameters->orderings().front();
+    const auto& ann_vector = std::get_if<select_statement::ann_vector>(&ordering);
+    SCYLLA_ASSERT(ann_vector);
+
+    ::shared_ptr<column_identifier> column = column_id->prepare_column_identifier(schema);
+    const column_definition* def = schema.get_column_definition(column->name());
+    if (!def) {
+        throw exceptions::invalid_request_exception(
+                fmt::format("Undefined column name {}", column->text()));
+    }
+
+    if (!def->type->is_vector() || static_cast<const vector_type_impl*>(def->type.get())->get_elements_type()->get_kind() != abstract_type::kind::float_kind) {
+        throw exceptions::invalid_request_exception("ANN ordering is only supported on float vector indexes");
+    }
+
+    auto e =  expr::prepare_expression(*ann_vector, db, keyspace(), nullptr, def->column_specification);
+    expr::fill_prepare_context(e, ctx);
+
+    return std::make_pair(std::move(def), std::move(e));
+}
+
 select_statement::ordering_comparator_type select_statement::get_ordering_comparator(const prepared_orderings_type& orderings,
    selection::selection& selection,
    const restrictions::statement_restrictions& restrictions) {
--- a/cql3/statements/select_statement.hh
+++ b/cql3/statements/select_statement.hh
@@ -22,7 +22,6 @@
 #include "locator/host_id.hh"
 #include "service/cas_shard.hh"
 #include "vector_search/vector_store_client.hh"
-#include "vector_search/filter.hh"

 namespace service {
    class client_state;
@@ -363,7 +362,6 @@ private:
 class vector_indexed_table_select_statement : public select_statement {
    secondary_index::index _index;
    prepared_ann_ordering_type _prepared_ann_ordering;
-    vector_search::prepared_filter _prepared_filter;
    mutable gc_clock::time_point _query_start_time_point;

 public:
@@ -373,13 +371,13 @@ public:
            lw_shared_ptr<const parameters> parameters, ::shared_ptr<selection::selection> selection,
            ::shared_ptr<restrictions::statement_restrictions> restrictions, ::shared_ptr<std::vector<size_t>> group_by_cell_indices, bool is_reversed,
            ordering_comparator_type ordering_comparator, prepared_ann_ordering_type prepared_ann_ordering, std::optional<expr::expression> limit,
-            std::optional<expr::expression> per_partition_limit, cql_stats& stats, const secondary_index::index& index, std::unique_ptr<cql3::attributes> attrs);
+            std::optional<expr::expression> per_partition_limit, cql_stats& stats, std::unique_ptr<cql3::attributes> attrs);

    vector_indexed_table_select_statement(schema_ptr schema, uint32_t bound_terms, lw_shared_ptr<const parameters> parameters,
            ::shared_ptr<selection::selection> selection, ::shared_ptr<const restrictions::statement_restrictions> restrictions,
            ::shared_ptr<std::vector<size_t>> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator,
            prepared_ann_ordering_type prepared_ann_ordering, std::optional<expr::expression> limit, std::optional<expr::expression> per_partition_limit,
-            cql_stats& stats, const secondary_index::index& index, vector_search::prepared_filter prepared_filter, std::unique_ptr<cql3::attributes> attrs);
+            cql_stats& stats, const secondary_index::index& index, std::unique_ptr<cql3::attributes> attrs);

 private:
    future<::shared_ptr<cql_transport::messages::result_message>> do_execute(
@@ -387,7 +385,7 @@ private:

    void update_stats() const;

-    lw_shared_ptr<query::read_command> prepare_command_for_base_query(query_processor& qp, service::query_state& state, const query_options& options, uint64_t fetch_limit) const;
+    lw_shared_ptr<query::read_command> prepare_command_for_base_query(query_processor& qp, service::query_state& state, const query_options& options) const;

    std::vector<float> get_ann_ordering_vector(const query_options& options) const;

--- a/cql3/statements/strong_consistency/modification_statement.cc
+++ b/cql3/statements/strong_consistency/modification_statement.cc
@@ -1,82 +0,0 @@
-/*
- * Copyright (C) 2025-present ScyllaDB
- */
-
-/*
- * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
- */
-
-#include "modification_statement.hh"
-
-#include "transport/messages/result_message.hh"
-#include "cql3/query_processor.hh"
-#include "service/strong_consistency/coordinator.hh"
-#include "cql3/statements/strong_consistency/statement_helpers.hh"
-
-namespace cql3::statements::strong_consistency {
-static logging::logger logger("sc_modification_statement");
-
-modification_statement::modification_statement(shared_ptr<base_statement> statement)
-    : cql_statement_opt_metadata(&timeout_config::write_timeout)
-    , _statement(std::move(statement))
-{
-}
-
-using result_message = cql_transport::messages::result_message;
-
-future<shared_ptr<result_message>> modification_statement::execute(query_processor& qp, service::query_state& qs, 
-    const query_options& options, std::optional<service::group0_guard> guard) const
-{
-    return execute_without_checking_exception_message(qp, qs, options, std::move(guard))
-            .then(cql_transport::messages::propagate_exception_as_future<shared_ptr<result_message>>);
-}
-
-future<shared_ptr<result_message>> modification_statement::execute_without_checking_exception_message(
-        query_processor& qp, service::query_state& qs, const query_options& options,
-        std::optional<service::group0_guard> guard) const
-{
-    auto json_cache = base_statement::json_cache_opt{};
-    const auto keys = _statement->build_partition_keys(options, json_cache);
-    if (keys.size() != 1 || !query::is_single_partition(keys[0])) {
-        throw exceptions::invalid_request_exception("Strongly consistent queries can only target a single partition");
-    }
-    if (_statement->requires_read()) {
-        throw exceptions::invalid_request_exception("Strongly consistent updates don't support data prefetch");
-    }
-
-    auto [coordinator, holder] = qp.acquire_strongly_consistent_coordinator();
-    const auto mutate_result = co_await coordinator.get().mutate(_statement->s,
-        keys[0].start()->value().token(),
-        [&](api::timestamp_type ts) {
-            const auto prefetch_data = update_parameters::prefetch_data(_statement->s);
-            const auto ttl = _statement->get_time_to_live(options);
-            const auto params = update_parameters(_statement->s, options, ts, ttl, prefetch_data);
-            const auto ranges = _statement->create_clustering_ranges(options, json_cache);
-            auto muts = _statement->apply_updates(keys, ranges, params, json_cache);
-            if (muts.size() != 1) {
-                on_internal_error(logger, ::format("statement '{}' has unexpected number of mutations {}",
-                    raw_cql_statement, muts.size()));
-            }
-            return std::move(*muts.begin());
-        });
-
-    using namespace service::strong_consistency;
-    if (const auto* redirect = get_if<need_redirect>(&mutate_result)) {
-        co_return co_await redirect_statement(qp, options, redirect->target);
-    }
-
-    co_return seastar::make_shared<result_message::void_message>();
-}
-
-future<> modification_statement::check_access(query_processor& qp, const service::client_state& state) const {
-    return _statement->check_access(qp, state);
-}
-
-uint32_t modification_statement::get_bound_terms() const {
-    return _statement->get_bound_terms();
-}
-
-bool modification_statement::depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const {
-    return _statement->depends_on(ks_name, cf_name);
-}
-}
--- a/cql3/statements/strong_consistency/modification_statement.hh
+++ b/cql3/statements/strong_consistency/modification_statement.hh
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2025-present ScyllaDB
- */
-
-/*
- * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
- */
-
-#pragma once
-
-#include "cql3/cql_statement.hh"
-#include "cql3/expr/expression.hh"
-#include "cql3/statements/modification_statement.hh"
-
-namespace cql3::statements::strong_consistency {
-
-class modification_statement : public cql_statement_opt_metadata {
-    using result_message = cql_transport::messages::result_message;
-    using base_statement = cql3::statements::modification_statement;
-
-    shared_ptr<base_statement> _statement;
-public:
-    modification_statement(shared_ptr<base_statement> statement);
-
-    future<shared_ptr<result_message>> execute(query_processor& qp, service::query_state& state,
-        const query_options& options, std::optional<service::group0_guard> guard) const override;
-
-    future<shared_ptr<result_message>> execute_without_checking_exception_message(query_processor& qp,
-        service::query_state& qs, const query_options& options,
-        std::optional<service::group0_guard> guard) const override;
-
-    future<> check_access(query_processor& qp, const service::client_state& state) const override;
-
-    uint32_t get_bound_terms() const override;
-
-    bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const override;
-};
-
-}
--- a/cql3/statements/strong_consistency/select_statement.cc
+++ b/cql3/statements/strong_consistency/select_statement.cc
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2025-present ScyllaDB
- */
-
-/*
- * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
- */
-
-#include "select_statement.hh"
-
-#include "query/query-request.hh"
-#include "cql3/query_processor.hh"
-#include "service/strong_consistency/coordinator.hh"
-#include "cql3/statements/strong_consistency/statement_helpers.hh"
-
-namespace cql3::statements::strong_consistency {
-
-using result_message = cql_transport::messages::result_message;
-
-future<::shared_ptr<result_message>> select_statement::do_execute(query_processor& qp,
-        service::query_state& state, 
-        const query_options& options) const
-{
-    const auto key_ranges = _restrictions->get_partition_key_ranges(options);
-    if (key_ranges.size() != 1 || !query::is_single_partition(key_ranges[0])) {
-        throw exceptions::invalid_request_exception("Strongly consistent queries can only target a single partition");
-    }
-    const auto now = gc_clock::now();
-    auto read_command = make_lw_shared<query::read_command>(
-        _query_schema->id(),
-        _query_schema->version(),
-        make_partition_slice(options),
-        query::max_result_size(query::result_memory_limiter::maximum_result_size),
-        query::tombstone_limit(query::tombstone_limit::max),
-        query::row_limit(get_inner_loop_limit(get_limit(options, _limit), _selection->is_aggregate())),
-        query::partition_limit(query::max_partitions),
-        now,
-        tracing::make_trace_info(state.get_trace_state()),
-        query_id::create_null_id(),
-        query::is_first_page::no,
-        options.get_timestamp(state));
-    const auto timeout = db::timeout_clock::now() + get_timeout(state.get_client_state(), options);
-    auto [coordinator, holder] = qp.acquire_strongly_consistent_coordinator();
-    auto query_result = co_await coordinator.get().query(_query_schema, *read_command,
-        key_ranges, state.get_trace_state(), timeout);
-
-    using namespace service::strong_consistency;
-    if (const auto* redirect = get_if<need_redirect>(&query_result)) {
-        co_return co_await redirect_statement(qp, options, redirect->target);
-    }
-
-    co_return co_await process_results(get<lw_shared_ptr<query::result>>(std::move(query_result)),
-        read_command, options, now);
-}
-
-}
--- a/cql3/statements/strong_consistency/select_statement.hh
+++ b/cql3/statements/strong_consistency/select_statement.hh
@@ -1,26 +0,0 @@
-/*
- * Copyright (C) 2025-present ScyllaDB
- */
-
-/*
- * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
- */
-
-#pragma once
-
-#include "cql3/cql_statement.hh"
-#include "cql3/statements/select_statement.hh"
-
-namespace cql3::statements::strong_consistency {
-
-class select_statement : public cql3::statements::select_statement {
-    using result_message = cql_transport::messages::result_message;
-
-public:
-    using cql3::statements::select_statement::select_statement;
-
-    future<::shared_ptr<cql_transport::messages::result_message>> do_execute(query_processor& qp,
-        service::query_state& state, const query_options& options) const override;
-};
-
-}
--- a/cql3/statements/strong_consistency/statement_helpers.cc
+++ b/cql3/statements/strong_consistency/statement_helpers.cc
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2025-present ScyllaDB
- */
-
-/*
- * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
- */
-
-#include "statement_helpers.hh"
-
-#include "transport/messages/result_message_base.hh"
-#include "cql3/query_processor.hh"
-#include "replica/database.hh"
-#include "locator/tablet_replication_strategy.hh"
-
-namespace cql3::statements::strong_consistency {
-future<::shared_ptr<cql_transport::messages::result_message>> redirect_statement(query_processor& qp,
-        const query_options& options,
-        const locator::tablet_replica& target)
-{
-    const auto my_host_id = qp.db().real_database().get_token_metadata().get_topology().my_host_id();
-    if (target.host != my_host_id) {
-        throw exceptions::invalid_request_exception(format(
-            "Strongly consistent writes can be executed only on the leader node, "
-            "leader id {}, current host id {}",
-            target.host, my_host_id));
-    }
-    auto&& func_values_cache = const_cast<cql3::query_options&>(options).take_cached_pk_function_calls();
-    co_return qp.bounce_to_shard(target.shard, std::move(func_values_cache));
-}
-
-bool is_strongly_consistent(data_dictionary::database db, std::string_view ks_name) {
-    const auto* tablet_aware_rs = db.find_keyspace(ks_name).get_replication_strategy().maybe_as_tablet_aware();
-    return tablet_aware_rs && tablet_aware_rs->get_consistency() != data_dictionary::consistency_config_option::eventual;
-}
-
-}
--- a/cql3/statements/strong_consistency/statement_helpers.hh
+++ b/cql3/statements/strong_consistency/statement_helpers.hh
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2025-present ScyllaDB
- */
-
-/*
- * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
- */
-
-#pragma once
-
-#include "cql3/cql_statement.hh"
-#include "locator/tablets.hh"
-
-namespace cql3::statements::strong_consistency {
-
-future<::shared_ptr<cql_transport::messages::result_message>> redirect_statement(
-    query_processor& qp,
-    const query_options& options,
-    const locator::tablet_replica& target);
-
-bool is_strongly_consistent(data_dictionary::database db, std::string_view ks_name);
-
-}
--- a/cql3/statements/strongly_consistent_modification_statement.cc
+++ b/cql3/statements/strongly_consistent_modification_statement.cc
@@ -9,7 +9,7 @@
 */


-#include "cql3/statements/broadcast_modification_statement.hh"
+#include "cql3/statements/strongly_consistent_modification_statement.hh"

 #include <optional>

@@ -28,11 +28,11 @@

 namespace cql3 {

-static logging::logger logger("broadcast_modification_statement");
+static logging::logger logger("strongly_consistent_modification_statement");

 namespace statements {

-broadcast_modification_statement::broadcast_modification_statement(
+strongly_consistent_modification_statement::strongly_consistent_modification_statement(
    uint32_t bound_terms,
    schema_ptr schema,
    broadcast_tables::prepared_update query)
@@ -43,7 +43,7 @@ broadcast_modification_statement::broadcast_modification_statement(
 { }

 future<::shared_ptr<cql_transport::messages::result_message>>
-broadcast_modification_statement::execute(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
+strongly_consistent_modification_statement::execute(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
    return execute_without_checking_exception_message(qp, qs, options, std::move(guard))
            .then(cql_transport::messages::propagate_exception_as_future<shared_ptr<cql_transport::messages::result_message>>);
 }
@@ -63,7 +63,7 @@ evaluate_prepared(
 }

 future<::shared_ptr<cql_transport::messages::result_message>>
-broadcast_modification_statement::execute_without_checking_exception_message(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
+strongly_consistent_modification_statement::execute_without_checking_exception_message(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
    if (this_shard_id() != 0) {
        co_return ::make_shared<cql_transport::messages::result_message::bounce_to_shard>(0, cql3::computed_function_values{});
    }
@@ -103,11 +103,11 @@ broadcast_modification_statement::execute_without_checking_exception_message(que
    ), result);
 }

-uint32_t broadcast_modification_statement::get_bound_terms() const {
+uint32_t strongly_consistent_modification_statement::get_bound_terms() const {
    return _bound_terms;
 }

-future<> broadcast_modification_statement::check_access(query_processor& qp, const service::client_state& state) const {
+future<> strongly_consistent_modification_statement::check_access(query_processor& qp, const service::client_state& state) const {
    auto f = state.has_column_family_access(_schema->ks_name(), _schema->cf_name(), auth::permission::MODIFY);
    if (_query.value_condition.has_value()) {
        f = f.then([this, &state] {
@@ -117,7 +117,7 @@ future<> broadcast_modification_statement::check_access(query_processor& qp, con
    return f;
 }

-bool broadcast_modification_statement::depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const {
+bool strongly_consistent_modification_statement::depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const {
    return _schema->ks_name() == ks_name && (!cf_name || _schema->cf_name() == *cf_name);
 }

--- a/cql3/statements/strongly_consistent_modification_statement.hh
+++ b/cql3/statements/strongly_consistent_modification_statement.hh
@@ -27,13 +27,13 @@ struct prepared_update {

 }

-class broadcast_modification_statement : public cql_statement_opt_metadata {
+class strongly_consistent_modification_statement : public cql_statement_opt_metadata {
    const uint32_t _bound_terms;
    const schema_ptr _schema;
    const broadcast_tables::prepared_update _query;

 public:
-    broadcast_modification_statement(uint32_t bound_terms, schema_ptr schema, broadcast_tables::prepared_update query);
+    strongly_consistent_modification_statement(uint32_t bound_terms, schema_ptr schema, broadcast_tables::prepared_update query);

    virtual future<::shared_ptr<cql_transport::messages::result_message>>
    execute(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const override;
--- a/cql3/statements/strongly_consistent_select_statement.cc
+++ b/cql3/statements/strongly_consistent_select_statement.cc
@@ -9,7 +9,7 @@
 */


-#include "cql3/statements/broadcast_select_statement.hh"
+#include "cql3/statements/strongly_consistent_select_statement.hh"

 #include <seastar/core/future.hh>
 #include <seastar/core/on_internal_error.hh>
@@ -24,7 +24,7 @@ namespace cql3 {

 namespace statements {

-static logging::logger logger("broadcast_select_statement");
+static logging::logger logger("strongly_consistent_select_statement");

 static
 expr::expression get_key(const cql3::expr::expression& partition_key_restrictions) {
@@ -58,7 +58,7 @@ bool is_selecting_only_value(const cql3::selection::selection& selection) {
           selection.get_columns()[0]->name() == "value";
 }

-broadcast_select_statement::broadcast_select_statement(schema_ptr schema, uint32_t bound_terms,
+strongly_consistent_select_statement::strongly_consistent_select_statement(schema_ptr schema, uint32_t bound_terms,
                                                                           lw_shared_ptr<const parameters> parameters,
                                                                           ::shared_ptr<selection::selection> selection,
                                                                           ::shared_ptr<const restrictions::statement_restrictions> restrictions,
@@ -73,7 +73,7 @@ broadcast_select_statement::broadcast_select_statement(schema_ptr schema, uint32
      _query{prepare_query()}
 { }

-broadcast_tables::prepared_select broadcast_select_statement::prepare_query() const {
+broadcast_tables::prepared_select strongly_consistent_select_statement::prepare_query() const {
    if (!is_selecting_only_value(*_selection)) {
        throw service::broadcast_tables::unsupported_operation_error("only 'value' selector is allowed");
    }
@@ -94,7 +94,7 @@ evaluate_prepared(
 }

 future<::shared_ptr<cql_transport::messages::result_message>>
-broadcast_select_statement::execute_without_checking_exception_message(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
+strongly_consistent_select_statement::execute_without_checking_exception_message(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
    if (this_shard_id() != 0) {
        co_return ::make_shared<cql_transport::messages::result_message::bounce_to_shard>(0, cql3::computed_function_values{});
    }
--- a/cql3/statements/strongly_consistent_select_statement.hh
+++ b/cql3/statements/strongly_consistent_select_statement.hh
@@ -25,12 +25,12 @@ struct prepared_select {

 }

-class broadcast_select_statement : public select_statement {
+class strongly_consistent_select_statement : public select_statement {
    const broadcast_tables::prepared_select _query;

    broadcast_tables::prepared_select prepare_query() const;
 public:
-    broadcast_select_statement(schema_ptr schema,
+    strongly_consistent_select_statement(schema_ptr schema,
                     uint32_t bound_terms,
                     lw_shared_ptr<const parameters> parameters,
                     ::shared_ptr<selection::selection> selection,
--- a/cql3/statements/update_statement.cc
+++ b/cql3/statements/update_statement.cc
@@ -13,7 +13,7 @@
 #include "cql3/expr/expression.hh"
 #include "cql3/expr/evaluate.hh"
 #include "cql3/expr/expr-utils.hh"
-#include "cql3/statements/broadcast_modification_statement.hh"
+#include "cql3/statements/strongly_consistent_modification_statement.hh"
 #include "service/broadcast_tables/experimental/lang.hh"
 #include "raw/update_statement.hh"

@@ -333,7 +333,7 @@ std::optional<expr::expression> get_value_condition(const expr::expression& the_
    return binop->rhs;
 }

-::shared_ptr<broadcast_modification_statement>
+::shared_ptr<strongly_consistent_modification_statement>
 update_statement::prepare_for_broadcast_tables() const {
    if (attrs) {
        if (attrs->is_time_to_live_set()) {
@@ -359,7 +359,7 @@ update_statement::prepare_for_broadcast_tables() const {
        .value_condition = get_value_condition(_condition),
    };

-    return ::make_shared<broadcast_modification_statement>(
+    return ::make_shared<strongly_consistent_modification_statement>(
        get_bound_terms(),
        s,
        query
--- a/cql3/statements/update_statement.hh
+++ b/cql3/statements/update_statement.hh
@@ -45,7 +45,7 @@ private:
    virtual void execute_operations_for_key(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params, const json_cache_opt& json_cache) const;

 public:
-    virtual ::shared_ptr<broadcast_modification_statement> prepare_for_broadcast_tables() const override;
+    virtual ::shared_ptr<strongly_consistent_modification_statement> prepare_for_broadcast_tables() const override;
 };

 /*
--- a/db/batchlog_manager.cc
+++ b/db/batchlog_manager.cc
@@ -55,21 +55,8 @@ int32_t batchlog_shard_of(db_clock::time_point written_at) {
    return hash & ((1ULL << batchlog_shard_bits) - 1);
 }

-bool is_batchlog_v1(const schema& schema) {
-    return schema.cf_name() == system_keyspace::BATCHLOG;
-}
-
 std::pair<partition_key, clustering_key>
 get_batchlog_key(const schema& schema, int32_t version, db::batchlog_stage stage, int32_t batchlog_shard, db_clock::time_point written_at, std::optional<utils::UUID> id) {
-    if (is_batchlog_v1(schema)) {
-        if (!id) {
-            on_internal_error(blogger, "get_batchlog_key(): key for batchlog v1 requires batchlog id");
-        }
-        auto pkey = partition_key::from_single_value(schema, {serialized(*id)});
-        auto ckey = clustering_key::make_empty();
-        return std::pair(std::move(pkey), std::move(ckey));
-    }
-
    auto pkey = partition_key::from_exploded(schema, {serialized(version), serialized(int8_t(stage)), serialized(batchlog_shard)});

    std::vector<bytes> ckey_components;
@@ -98,14 +85,6 @@ mutation get_batchlog_mutation_for(schema_ptr schema, managed_bytes data, int32_
    auto cdef_data = schema->get_column_definition(to_bytes("data"));
    m.set_cell(ckey, *cdef_data, atomic_cell::make_live(*cdef_data->type, timestamp, std::move(data)));

-    if (is_batchlog_v1(*schema)) {
-        auto cdef_version = schema->get_column_definition(to_bytes("version"));
-        m.set_cell(ckey, *cdef_version, atomic_cell::make_live(*cdef_version->type, timestamp, serialized(version)));
-
-        auto cdef_written_at = schema->get_column_definition(to_bytes("written_at"));
-        m.set_cell(ckey, *cdef_written_at, atomic_cell::make_live(*cdef_written_at->type, timestamp, serialized(now)));
-    }
-
    return m;
 }

@@ -143,10 +122,9 @@ mutation get_batchlog_delete_mutation(schema_ptr schema, int32_t version, db_clo
 const std::chrono::seconds db::batchlog_manager::replay_interval;
 const uint32_t db::batchlog_manager::page_size;

-db::batchlog_manager::batchlog_manager(cql3::query_processor& qp, db::system_keyspace& sys_ks, gms::feature_service& fs, batchlog_manager_config config)
+db::batchlog_manager::batchlog_manager(cql3::query_processor& qp, db::system_keyspace& sys_ks, batchlog_manager_config config)
        : _qp(qp)
        , _sys_ks(sys_ks)
-        , _fs(fs)
        , _replay_timeout(config.replay_timeout)
        , _replay_rate(config.replay_rate)
        , _delay(config.delay)
@@ -322,206 +300,149 @@ future<> db::batchlog_manager::maybe_migrate_v1_to_v2() {
    });
 }

-namespace {
-
-using clock_type = db_clock::rep;
-
-struct replay_stats {
-    std::optional<db_clock::time_point> min_too_fresh;
-    bool need_cleanup = false;
-};
-
-} // anonymous namespace
-
-static future<db::all_batches_replayed> process_batch(
-        cql3::query_processor& qp,
-        db::batchlog_manager::stats& stats,
-        db::batchlog_manager::post_replay_cleanup cleanup,
-        utils::rate_limiter& limiter,
-        schema_ptr schema,
-        std::unordered_map<int32_t, replay_stats>& replay_stats_per_shard,
-        const db_clock::time_point now,
-        db_clock::duration replay_timeout,
-        std::chrono::seconds write_timeout,
-        const cql3::untyped_result_set::row& row) {
-    const bool is_v1 = db::is_batchlog_v1(*schema);
-    const auto stage = is_v1 ? db::batchlog_stage::initial : static_cast<db::batchlog_stage>(row.get_as<int8_t>("stage"));
-    const auto batch_shard = is_v1 ? 0 : row.get_as<int32_t>("shard");
-    auto written_at = row.get_as<db_clock::time_point>("written_at");
-    auto id = row.get_as<utils::UUID>("id");
-    // enough time for the actual write + batchlog entry mutation delivery (two separate requests).
-    auto timeout = replay_timeout;
-
-    if (utils::get_local_injector().is_enabled("skip_batch_replay")) {
-        blogger.debug("Skipping batch replay due to skip_batch_replay injection");
-        co_return db::all_batches_replayed::no;
-    }
-
-    auto data = row.get_blob_unfragmented("data");
-
-    blogger.debug("Replaying batch {} from stage {} and batch shard {}", id, int32_t(stage), batch_shard);
-
-    utils::chunked_vector<mutation> mutations;
-    bool send_failed = false;
-
-    auto& shard_written_at = replay_stats_per_shard.try_emplace(batch_shard, replay_stats{}).first->second;
-
-    try {
-        utils::chunked_vector<std::pair<canonical_mutation, schema_ptr>> fms;
-        auto in = ser::as_input_stream(data);
-        while (in.size()) {
-            auto fm = ser::deserialize(in, std::type_identity<canonical_mutation>());
-            const auto tbl = qp.db().try_find_table(fm.column_family_id());
-            if (!tbl) {
-                continue;
-            }
-            if (written_at <= tbl->get_truncation_time()) {
-                continue;
-            }
-            schema_ptr s = tbl->schema();
-            if (s->tombstone_gc_options().mode() == tombstone_gc_mode::repair) {
-                timeout = std::min(timeout, std::chrono::duration_cast<db_clock::duration>(s->tombstone_gc_options().propagation_delay_in_seconds()));
-            }
-            fms.emplace_back(std::move(fm), std::move(s));
-        }
-
-        if (now < written_at + timeout) {
-            blogger.debug("Skipping replay of {}, too fresh", id);
-
-            shard_written_at.min_too_fresh = std::min(shard_written_at.min_too_fresh.value_or(written_at), written_at);
-
-            co_return db::all_batches_replayed::no;
-        }
-
-        auto size = data.size();
-
-        for (const auto& [fm, s] : fms) {
-            mutations.emplace_back(fm.to_mutation(s));
-            co_await coroutine::maybe_yield();
-        }
-
-        if (!mutations.empty()) {
-            const auto ttl = [written_at]() -> clock_type {
-                /*
-                * Calculate ttl for the mutations' hints (and reduce ttl by the time the mutations spent in the batchlog).
-                * This ensures that deletes aren't "undone" by an old batch replay.
-                */
-                auto unadjusted_ttl = std::numeric_limits<gc_clock::rep>::max();
-                warn(unimplemented::cause::HINT);
-#if 0
-                for (auto& m : *mutations) {
-                    unadjustedTTL = Math.min(unadjustedTTL, HintedHandOffManager.calculateHintTTL(mutation));
-                }
-#endif
-                return unadjusted_ttl - std::chrono::duration_cast<gc_clock::duration>(db_clock::now() - written_at).count();
-            }();
-
-            if (ttl > 0) {
-                // Origin does the send manually, however I can't see a super great reason to do so.
-                // Our normal write path does not add much redundancy to the dispatch, and rate is handled after send
-                // in both cases.
-                // FIXME: verify that the above is reasonably true.
-                co_await limiter.reserve(size);
-                stats.write_attempts += mutations.size();
-                auto timeout = db::timeout_clock::now() + write_timeout;
-                if (cleanup) {
-                    co_await qp.proxy().send_batchlog_replay_to_all_replicas(mutations, timeout);
-                } else {
-                    co_await qp.proxy().send_batchlog_replay_to_all_replicas(std::move(mutations), timeout);
-                }
-            }
-        }
-    } catch (data_dictionary::no_such_keyspace& ex) {
-        // should probably ignore and drop the batch
-    } catch (const data_dictionary::no_such_column_family&) {
-        // As above -- we should drop the batch if the table doesn't exist anymore.
-    } catch (...) {
-        blogger.warn("Replay failed (will retry): {}", std::current_exception());
-        // timeout, overload etc.
-        // Do _not_ remove the batch, assuning we got a node write error.
-        // Since we don't have hints (which origin is satisfied with),
-        // we have to resort to keeping this batch to next lap.
-        if (is_v1 || !cleanup || stage == db::batchlog_stage::failed_replay) {
-            co_return db::all_batches_replayed::no;
-        }
-        send_failed = true;
-    }
-
-    auto& sp = qp.proxy();
-
-    if (send_failed) {
-        blogger.debug("Moving batch {} to stage failed_replay", id);
-        auto m = get_batchlog_mutation_for(schema, mutations, netw::messaging_service::current_version, db::batchlog_stage::failed_replay, written_at, id);
-        co_await sp.mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
-    }
-
-    // delete batch
-    auto m = get_batchlog_delete_mutation(schema, netw::messaging_service::current_version, stage, written_at, id);
-    co_await qp.proxy().mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
-
-    shard_written_at.need_cleanup = true;
-
-    co_return db::all_batches_replayed(!send_failed);
-}
-
-future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches_v1(post_replay_cleanup) {
-    db::all_batches_replayed all_replayed = all_batches_replayed::yes;
-    // rate limit is in bytes per second. Uses Double.MAX_VALUE if disabled (set to 0 in cassandra.yaml).
-    // max rate is scaled by the number of nodes in the cluster (same as for HHOM - see CASSANDRA-5272).
-    auto throttle = _replay_rate / _qp.proxy().get_token_metadata_ptr()->count_normal_token_owners();
-    utils::rate_limiter limiter(throttle);
-
-    auto schema = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG);
-
-    std::unordered_map<int32_t, replay_stats> replay_stats_per_shard;
-
-    // Use a stable `now` across all batches, so skip/replay decisions are the
-    // same across a while prefix of written_at (across all ids).
-    const auto now = db_clock::now();
-
-    auto batch = [this, &limiter, schema, &all_replayed, &replay_stats_per_shard, now] (const cql3::untyped_result_set::row& row) mutable -> future<stop_iteration> {
-        all_replayed = all_replayed && co_await process_batch(_qp, _stats, post_replay_cleanup::no, limiter, schema, replay_stats_per_shard, now, _replay_timeout, write_timeout, row);
-        co_return stop_iteration::no;
-    };
-
-    co_await with_gate(_gate, [this, &all_replayed, batch = std::move(batch)] () mutable -> future<> {
-        blogger.debug("Started replayAllFailedBatches");
-        co_await utils::get_local_injector().inject("add_delay_to_batch_replay", std::chrono::milliseconds(1000));
-
-        auto schema = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG);
-
-        co_await _qp.query_internal(
-                format("SELECT * FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG),
-                db::consistency_level::ONE,
-                {},
-                page_size,
-                batch);
-
-        blogger.debug("Finished replayAllFailedBatches with all_replayed: {}", all_replayed);
-    });
-
-    co_return all_replayed;
-}
-
-future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches_v2(post_replay_cleanup cleanup) {
+future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches(post_replay_cleanup cleanup) {
    co_await maybe_migrate_v1_to_v2();

+    typedef db_clock::rep clock_type;
+
    db::all_batches_replayed all_replayed = all_batches_replayed::yes;
    // rate limit is in bytes per second. Uses Double.MAX_VALUE if disabled (set to 0 in cassandra.yaml).
    // max rate is scaled by the number of nodes in the cluster (same as for HHOM - see CASSANDRA-5272).
    auto throttle = _replay_rate / _qp.proxy().get_token_metadata_ptr()->count_normal_token_owners();
-    utils::rate_limiter limiter(throttle);
+    auto limiter = make_lw_shared<utils::rate_limiter>(throttle);

    auto schema = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG_V2);

+    struct replay_stats {
+        std::optional<db_clock::time_point> min_too_fresh;
+        bool need_cleanup = false;
+    };
+
    std::unordered_map<int32_t, replay_stats> replay_stats_per_shard;

    // Use a stable `now` across all batches, so skip/replay decisions are the
    // same across a while prefix of written_at (across all ids).
    const auto now = db_clock::now();

-    auto batch = [this, cleanup, &limiter, schema, &all_replayed, &replay_stats_per_shard, now] (const cql3::untyped_result_set::row& row) mutable -> future<stop_iteration> {
-        all_replayed = all_replayed && co_await process_batch(_qp, _stats, cleanup, limiter, schema, replay_stats_per_shard, now, _replay_timeout, write_timeout, row);
+    auto batch = [this, cleanup, limiter, schema, &all_replayed, &replay_stats_per_shard, now] (const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
+        const auto stage = static_cast<batchlog_stage>(row.get_as<int8_t>("stage"));
+        const auto batch_shard = row.get_as<int32_t>("shard");
+        auto written_at = row.get_as<db_clock::time_point>("written_at");
+        auto id = row.get_as<utils::UUID>("id");
+        // enough time for the actual write + batchlog entry mutation delivery (two separate requests).
+        auto timeout = _replay_timeout;
+
+        if (utils::get_local_injector().is_enabled("skip_batch_replay")) {
+            blogger.debug("Skipping batch replay due to skip_batch_replay injection");
+            all_replayed = all_batches_replayed::no;
+            co_return stop_iteration::no;
+        }
+
+        auto data = row.get_blob_unfragmented("data");
+
+        blogger.debug("Replaying batch {} from stage {} and batch shard {}", id, int32_t(stage), batch_shard);
+
+        utils::chunked_vector<mutation> mutations;
+        bool send_failed = false;
+
+        auto& shard_written_at = replay_stats_per_shard.try_emplace(batch_shard, replay_stats{}).first->second;
+
+        try {
+            utils::chunked_vector<std::pair<canonical_mutation, schema_ptr>> fms;
+            auto in = ser::as_input_stream(data);
+            while (in.size()) {
+                auto fm = ser::deserialize(in, std::type_identity<canonical_mutation>());
+                const auto tbl = _qp.db().try_find_table(fm.column_family_id());
+                if (!tbl) {
+                    continue;
+                }
+                if (written_at <= tbl->get_truncation_time()) {
+                    continue;
+                }
+                schema_ptr s = tbl->schema();
+                if (s->tombstone_gc_options().mode() == tombstone_gc_mode::repair) {
+                    timeout = std::min(timeout, std::chrono::duration_cast<db_clock::duration>(s->tombstone_gc_options().propagation_delay_in_seconds()));
+                }
+                fms.emplace_back(std::move(fm), std::move(s));
+            }
+
+            if (now < written_at + timeout) {
+                blogger.debug("Skipping replay of {}, too fresh", id);
+
+                shard_written_at.min_too_fresh = std::min(shard_written_at.min_too_fresh.value_or(written_at), written_at);
+
+                co_return stop_iteration::no;
+            }
+
+            auto size = data.size();
+
+            for (const auto& [fm, s] : fms) {
+                mutations.emplace_back(fm.to_mutation(s));
+                co_await coroutine::maybe_yield();
+            }
+
+            if (!mutations.empty()) {
+                const auto ttl = [written_at]() -> clock_type {
+                    /*
+                    * Calculate ttl for the mutations' hints (and reduce ttl by the time the mutations spent in the batchlog).
+                    * This ensures that deletes aren't "undone" by an old batch replay.
+                    */
+                    auto unadjusted_ttl = std::numeric_limits<gc_clock::rep>::max();
+                    warn(unimplemented::cause::HINT);
+#if 0
+                    for (auto& m : *mutations) {
+                        unadjustedTTL = Math.min(unadjustedTTL, HintedHandOffManager.calculateHintTTL(mutation));
+                    }
+#endif
+                    return unadjusted_ttl - std::chrono::duration_cast<gc_clock::duration>(db_clock::now() - written_at).count();
+                }();
+
+                if (ttl > 0) {
+                    // Origin does the send manually, however I can't see a super great reason to do so.
+                    // Our normal write path does not add much redundancy to the dispatch, and rate is handled after send
+                    // in both cases.
+                    // FIXME: verify that the above is reasonably true.
+                    co_await limiter->reserve(size);
+                    _stats.write_attempts += mutations.size();
+                    auto timeout = db::timeout_clock::now() + write_timeout;
+                    if (cleanup) {
+                        co_await _qp.proxy().send_batchlog_replay_to_all_replicas(mutations, timeout);
+                    } else {
+                        co_await _qp.proxy().send_batchlog_replay_to_all_replicas(std::move(mutations), timeout);
+                    }
+                }
+            }
+        } catch (data_dictionary::no_such_keyspace& ex) {
+            // should probably ignore and drop the batch
+        } catch (const data_dictionary::no_such_column_family&) {
+            // As above -- we should drop the batch if the table doesn't exist anymore.
+        } catch (...) {
+            blogger.warn("Replay failed (will retry): {}", std::current_exception());
+            all_replayed = all_batches_replayed::no;
+            // timeout, overload etc.
+            // Do _not_ remove the batch, assuning we got a node write error.
+            // Since we don't have hints (which origin is satisfied with),
+            // we have to resort to keeping this batch to next lap.
+            if (!cleanup || stage == batchlog_stage::failed_replay) {
+                co_return stop_iteration::no;
+            }
+            send_failed = true;
+        }
+
+        auto& sp = _qp.proxy();
+
+        if (send_failed) {
+            blogger.debug("Moving batch {} to stage failed_replay", id);
+            auto m = get_batchlog_mutation_for(schema, mutations, netw::messaging_service::current_version, batchlog_stage::failed_replay, written_at, id);
+            co_await sp.mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
+        }
+
+        // delete batch
+        auto m = get_batchlog_delete_mutation(schema, netw::messaging_service::current_version, stage, written_at, id);
+        co_await _qp.proxy().mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
+
+        shard_written_at.need_cleanup = true;
+
        co_return stop_iteration::no;
    };

@@ -580,10 +501,3 @@ future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches

    co_return all_replayed;
 }
-
-future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches(post_replay_cleanup cleanup) {
-    if (_fs.batchlog_v2) {
-        return replay_all_failed_batches_v2(cleanup);
-    }
-    return replay_all_failed_batches_v1(cleanup);
-}
--- a/db/batchlog_manager.hh
+++ b/db/batchlog_manager.hh
@@ -27,12 +27,6 @@ class query_processor;

 } // namespace cql3

-namespace gms {
-
-class feature_service;
-
-} // namespace gms
-
 namespace db {

 class system_keyspace;
@@ -55,11 +49,6 @@ class batchlog_manager : public peering_sharded_service<batchlog_manager> {
 public:
    using post_replay_cleanup = bool_class<class post_replay_cleanup_tag>;

-    struct stats {
-        uint64_t write_attempts = 0;
-    };
-
-
 private:
    static constexpr std::chrono::seconds replay_interval = std::chrono::seconds(60);
    static constexpr uint32_t page_size = 128; // same as HHOM, for now, w/out using any heuristics. TODO: set based on avg batch size.
@@ -67,13 +56,14 @@ private:

    using clock_type = lowres_clock;

-    stats _stats;
+    struct stats {
+        uint64_t write_attempts = 0;
+    } _stats;

    seastar::metrics::metric_groups _metrics;

    cql3::query_processor& _qp;
    db::system_keyspace& _sys_ks;
-    gms::feature_service& _fs;
    db_clock::duration _replay_timeout;
    uint64_t _replay_rate;
    std::chrono::milliseconds _delay;
@@ -94,14 +84,12 @@ private:

    future<> maybe_migrate_v1_to_v2();

-    future<all_batches_replayed> replay_all_failed_batches_v1(post_replay_cleanup cleanup);
-    future<all_batches_replayed> replay_all_failed_batches_v2(post_replay_cleanup cleanup);
    future<all_batches_replayed> replay_all_failed_batches(post_replay_cleanup cleanup);
 public:
    // Takes a QP, not a distributes. Because this object is supposed
    // to be per shard and does no dispatching beyond delegating the the
    // shard qp (which is what you feed here).
-    batchlog_manager(cql3::query_processor&, db::system_keyspace& sys_ks, gms::feature_service& fs, batchlog_manager_config config);
+    batchlog_manager(cql3::query_processor&, db::system_keyspace& sys_ks, batchlog_manager_config config);

    // abort the replay loop and return its future.
    future<> drain();
@@ -114,7 +102,7 @@ public:
        return _last_replay;
    }

-    const stats& get_stats() const {
+    const stats& stats() const {
        return _stats;
    }
 private:
--- a/db/cache_mutation_reader.hh
+++ b/db/cache_mutation_reader.hh
@@ -323,9 +323,6 @@ void cache_mutation_reader::touch_partition() {

 inline
 future<> cache_mutation_reader::fill_buffer() {
-    if (const auto& ex = get_abort_exception(); ex) {
-        return make_exception_future<>(ex);
-    }
    if (_state == state::before_static_row) {
        touch_partition();
        auto after_static_row = [this] {
--- a/db/commitlog/commitlog.cc
+++ b/db/commitlog/commitlog.cc
@@ -1986,13 +1986,13 @@ future<> db::commitlog::segment_manager::replenish_reserve() {
            }
            continue;
        } catch (shutdown_marker&) {
+            _reserve_segments.abort(std::current_exception());
            break;
        } catch (...) {
            clogger.warn("Exception in segment reservation: {}", std::current_exception());
        }
        co_await sleep(100ms);
    }
-    _reserve_segments.abort(std::make_exception_ptr(shutdown_marker()));
 }

 future<std::vector<db::commitlog::descriptor>>
--- a/db/config.cc
+++ b/db/config.cc
@@ -1291,7 +1291,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
    , ignore_dead_nodes_for_replace(this, "ignore_dead_nodes_for_replace", value_status::Used, "", "List dead nodes to ignore for replace operation using a comma-separated list of host IDs. E.g., scylla --ignore-dead-nodes-for-replace 8d5ed9f4-7764-4dbd-bad8-43fddce94b7c,125ed9f4-7777-1dbn-mac8-43fddce9123e")
    , override_decommission(this, "override_decommission", value_status::Deprecated, false, "Set true to force a decommissioned node to join the cluster (cannot be set if consistent-cluster-management is enabled).")
    , enable_repair_based_node_ops(this, "enable_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, true, "Set true to use enable repair based node operations instead of streaming based.")
-    , allowed_repair_based_node_ops(this, "allowed_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, "replace,removenode,rebuild", "A comma separated list of node operations which are allowed to enable repair based node operations. The operations can be bootstrap, replace, removenode, decommission and rebuild.")
+    , allowed_repair_based_node_ops(this, "allowed_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, "replace,removenode,rebuild,bootstrap,decommission", "A comma separated list of node operations which are allowed to enable repair based node operations. The operations can be bootstrap, replace, removenode, decommission and rebuild.")
    , enable_compacting_data_for_streaming_and_repair(this, "enable_compacting_data_for_streaming_and_repair", liveness::LiveUpdate, value_status::Used, true, "Enable the compacting reader, which compacts the data for streaming and repair (load'n'stream included) before sending it to, or synchronizing it with peers. Can reduce the amount of data to be processed by removing dead data, but adds CPU overhead.")
    , enable_tombstone_gc_for_streaming_and_repair(this, "enable_tombstone_gc_for_streaming_and_repair", liveness::LiveUpdate, value_status::Used, false,
            "If the compacting reader is enabled for streaming and repair (see enable_compacting_data_for_streaming_and_repair), allow it to garbage-collect tombstones."
@@ -1318,7 +1318,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
    , prometheus_port(this, "prometheus_port", value_status::Used, 9180, "Prometheus port, set to zero to disable.")
    , prometheus_address(this, "prometheus_address", value_status::Used, {/* listen_address */}, "Prometheus listening address, defaulting to listen_address if not explicitly set.")
    , prometheus_prefix(this, "prometheus_prefix", value_status::Used, "scylla", "Set the prefix of the exported Prometheus metrics. Changing this will break Scylla's dashboard compatibility, do not change unless you know what you are doing.")
-    , prometheus_allow_protobuf(this, "prometheus_allow_protobuf", value_status::Used, false, "If set allows the experimental Prometheus protobuf with native histogram")
+    , prometheus_allow_protobuf(this, "prometheus_allow_protobuf", value_status::Used, true, "Enable Prometheus protobuf with native histogram. Set to false to force text exposition format.")
    , abort_on_lsa_bad_alloc(this, "abort_on_lsa_bad_alloc", value_status::Used, false, "Abort when allocation in LSA region fails.")
    , murmur3_partitioner_ignore_msb_bits(this, "murmur3_partitioner_ignore_msb_bits", value_status::Used, default_murmur3_partitioner_ignore_msb_bits, "Number of most significant token bits to ignore in murmur3 partitioner; increase for very large clusters.")
    , unspooled_dirty_soft_limit(this, "unspooled_dirty_soft_limit", value_status::Used, 0.6, "Soft limit of unspooled dirty memory expressed as a portion of the hard limit.")
@@ -1341,7 +1341,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
    , sstable_compression_user_table_options(this, "sstable_compression_user_table_options", value_status::Used, compression_parameters{compression_parameters::algorithm::lz4_with_dicts},
        "Server-global user table compression options. If enabled, all user tables"
        "will be compressed using the provided options, unless overridden"
-        "by compression options in the table schema. User tables are all tables in non-system keyspaces. The available options are:\n"
+        "by compression options in the table schema. The available options are:\n"
        "* sstable_compression: The compression algorithm to use. Supported values: LZ4Compressor, LZ4WithDictsCompressor (default), SnappyCompressor, DeflateCompressor, ZstdCompressor, ZstdWithDictsCompressor, '' (empty string; disables compression).\n"
        "* chunk_length_in_kb: (Default: 4) The size of chunks to compress in kilobytes. Allowed values are powers of two between 1 and 128.\n"
        "* crc_check_chance: (Default: 1.0) Not implemented (option value is ignored).\n"
@@ -1584,14 +1584,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
    , enable_create_table_with_compact_storage(this, "enable_create_table_with_compact_storage", liveness::LiveUpdate, value_status::Used, false, "Enable the deprecated feature of CREATE TABLE WITH COMPACT STORAGE.  This feature will eventually be removed in a future version.")
    , rf_rack_valid_keyspaces(this, "rf_rack_valid_keyspaces", liveness::MustRestart, value_status::Used, false,
        "Enforce RF-rack-valid keyspaces. Additionally, if there are existing RF-rack-invalid "
-        "keyspaces, attempting to start a node with this option ON will fail. "
-        "DEPRECATED. Use enforce_rack_list instead.")
-    , enforce_rack_list(this, "enforce_rack_list", liveness::MustRestart, value_status::Used, false,
-            "Enforce rack list for tablet keyspaces. "
-            "When the option is on, CREATE STATEMENT expands numeric rfs to rack lists "
-            "and ALTER STATEMENT is allowed only when rack lists are used in all DCs."
-            "Additionally, if there are existing tablet keyspaces with numeric rf in any DC "
-            "attempting to start a node with this option ON will fail.")
+        "keyspaces, attempting to start a node with this option ON will fail.")
    // FIXME: make frequency per table in order to reduce work in each iteration.
    // Bigger tables will take longer to be resized. similar-sized tables can be batched into same iteration.
    , tablet_load_stats_refresh_interval_in_seconds(this, "tablet_load_stats_refresh_interval_in_seconds", liveness::LiveUpdate, value_status::Used, 60,
@@ -1792,21 +1785,6 @@ const db::extensions& db::config::extensions() const {
    return *_extensions;
 }

-compression_parameters db::config::get_sstable_compression_user_table_options(bool dicts_feature_enabled) const {
-    if (sstable_compression_user_table_options.is_set()
-            || dicts_feature_enabled
-            || !sstable_compression_user_table_options().uses_dictionary_compressor()) {
-        return sstable_compression_user_table_options();
-    } else {
-        // Fall back to non-dict if dictionary compression is not enabled cluster-wide.
-        auto options = sstable_compression_user_table_options();
-        auto params = options.get_options();
-        auto algo = compression_parameters::non_dict_equivalent(options.get_algorithm());
-        params[compression_parameters::SSTABLE_COMPRESSION] = sstring(compression_parameters::algorithm_to_name(algo));
-        return compression_parameters{params};
-    }
-}
-
 std::map<sstring, db::experimental_features_t::feature> db::experimental_features_t::map() {
    // We decided against using the construct-on-first-use idiom here:
    // https://github.com/scylladb/scylla/pull/5369#discussion_r353614807
--- a/db/config.hh
+++ b/db/config.hh
@@ -419,13 +419,7 @@ public:
    named_value<bool> enable_sstables_mc_format;
    named_value<bool> enable_sstables_md_format;
    named_value<sstring> sstable_format;
-
-    // NOTE: Do not use this option directly.
-    // Use get_sstable_compression_user_table_options() instead.
    named_value<compression_parameters> sstable_compression_user_table_options;
-
-    compression_parameters get_sstable_compression_user_table_options(bool dicts_feature_enabled) const;
-
    named_value<bool> sstable_compression_dictionaries_allow_in_ddl;
    named_value<bool> sstable_compression_dictionaries_enable_writing;
    named_value<float> sstable_compression_dictionaries_memory_budget_fraction;
@@ -605,7 +599,6 @@ public:
    named_value<bool> enable_create_table_with_compact_storage;

    named_value<bool> rf_rack_valid_keyspaces;
-    named_value<bool> enforce_rack_list;

    named_value<uint32_t> tablet_load_stats_refresh_interval_in_seconds;
    named_value<bool> force_capacity_based_balancing;
--- a/db/consistency_level.cc
+++ b/db/consistency_level.cc
@@ -31,23 +31,19 @@ size_t quorum_for(const locator::effective_replication_map& erm) {
    return replication_factor ? (replication_factor / 2) + 1 : 0;
 }

-static size_t get_replication_factor_for_dc(const locator::effective_replication_map& erm, const sstring& dc) {
+size_t local_quorum_for(const locator::effective_replication_map& erm, const sstring& dc) {
    using namespace locator;

    const auto& rs = erm.get_replication_strategy();

    if (rs.get_type() == replication_strategy_type::network_topology) {
-        const network_topology_strategy* nts =
+        const network_topology_strategy* nrs =
            static_cast<const network_topology_strategy*>(&rs);
-        return nts->get_replication_factor(dc);
+        size_t replication_factor = nrs->get_replication_factor(dc);
+        return replication_factor ? (replication_factor / 2) + 1 : 0;
    }

-    return erm.get_replication_factor();
-}
-
-size_t local_quorum_for(const locator::effective_replication_map& erm, const sstring& dc) {
-    auto rf = get_replication_factor_for_dc(erm, dc);
-    return rf ? (rf / 2) + 1 : 0;
+    return quorum_for(erm);
 }

 size_t block_for_local_serial(const locator::effective_replication_map& erm) {
@@ -192,30 +188,18 @@ void assure_sufficient_live_nodes(
        return pending <= live ? live - pending : 0;
    };

-    auto make_rf_zero_error_msg = [cl] (const sstring& local_dc) {
-        return format("Cannot achieve consistency level {} in datacenter '{}' with replication factor 0. "
-                      "Ensure the keyspace is replicated to this datacenter or use a non-local consistency level.", cl, local_dc);
-    };
-
    const auto& topo = erm.get_topology();
-    const sstring& local_dc = topo.get_datacenter();

    switch (cl) {
    case consistency_level::ANY:
        // local hint is acceptable, and local node is always live
        break;
    case consistency_level::LOCAL_ONE:
-        if (size_t local_rf = get_replication_factor_for_dc(erm, local_dc); local_rf == 0) {
-            throw exceptions::unavailable_exception(make_rf_zero_error_msg(local_dc), cl, 1, 0);
-        }
        if (topo.count_local_endpoints(live_endpoints) < topo.count_local_endpoints(pending_endpoints) + 1) {
            throw exceptions::unavailable_exception(cl, 1, 0);
        }
        break;
    case consistency_level::LOCAL_QUORUM: {
-        if (size_t local_rf = get_replication_factor_for_dc(erm, local_dc); local_rf == 0) {
-            throw exceptions::unavailable_exception(make_rf_zero_error_msg(local_dc), cl, need, 0);
-        }
        size_t local_live = topo.count_local_endpoints(live_endpoints);
        size_t pending = topo.count_local_endpoints(pending_endpoints);
        if (local_live < need + pending) {
--- a/db/partition_snapshot_row_cursor.hh
+++ b/db/partition_snapshot_row_cursor.hh
@@ -461,17 +461,7 @@ public:
                    }
                }
            } else {
-                if (_reversed) [[unlikely]] {
-                    if (!rows.empty()) {
-                        it = std::prev(rows.end());
-                        cont = is_continuous::yes;
-                        rt = {};
-                    } else {
-                        _background_continuity = true;
-                    }
-                } else {
-                    _background_continuity = true;
-                }
+                _background_continuity = true; // Default continuity
            }

            if (!it) {
--- a/db/row_cache.cc
+++ b/db/row_cache.cc
@@ -29,7 +29,6 @@
 #include "utils/assert.hh"
 #include "utils/updateable_value.hh"
 #include "utils/labels.hh"
-#include "utils/chunked_vector.hh"

 namespace cache {

@@ -851,7 +850,7 @@ mutation_reader row_cache::make_nonpopulating_reader(schema_ptr schema, reader_p
                    std::move(permit),
                    e.key(),
                    query::clustering_key_filter_ranges(slice.row_ranges(*schema, e.key().key())),
-                    e.partition().read(_tracker.region(), _tracker.memtable_cleaner(), &_tracker, phase_of(pos)),
+                    e.partition().read(_tracker.region(), _tracker.memtable_cleaner(), nullptr, phase_of(pos)),
                    false,
                    _tracker.region(),
                    _read_section,
@@ -1216,10 +1215,10 @@ future<> row_cache::invalidate(external_updater eu, const dht::decorated_key& dk
 }

 future<> row_cache::invalidate(external_updater eu, const dht::partition_range& range, cache_invalidation_filter filter) {
-    return invalidate(std::move(eu), utils::chunked_vector<dht::partition_range>({range}), std::move(filter));
+    return invalidate(std::move(eu), dht::partition_range_vector({range}), std::move(filter));
 }

-future<> row_cache::invalidate(external_updater eu, utils::chunked_vector<dht::partition_range>&& ranges, cache_invalidation_filter filter) {
+future<> row_cache::invalidate(external_updater eu, dht::partition_range_vector&& ranges, cache_invalidation_filter filter) {
    return do_update(std::move(eu), [this, ranges = std::move(ranges), filter = std::move(filter)] mutable {
        return seastar::async([this, ranges = std::move(ranges), filter = std::move(filter)] {
            auto on_failure = defer([this] () noexcept {
--- a/Show More
+++ b/Show More