test/alternator: fix copyright year to 2026

Co-authored-by: mykaul <4655593+mykaul@users.noreply.github.com>
test/alternator: rename fixture, split x/y attrs, reorder tests, fix index naming check
2026-03-03 16:28:02 +00:00 · 2026-03-03 15:46:23 +00:00 · 2026-03-03 15:28:34 +00:00 · 2026-03-03 15:16:11 +00:00 · 2026-03-03 15:00:56 +00:00 · 2026-03-03 11:07:10 +00:00
4277 changed files with 35837 additions and 18564 deletions
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -84,3 +84,14 @@ ninja build/<mode>/scylla
 - Strive for simplicity and clarity, add complexity only when clearly justified
 - Question requests: don't blindly implement requests - evaluate trade-offs, identify issues, and suggest better alternatives when appropriate
 - Consider different approaches, weigh pros and cons, and recommend the best fit for the specific context
+
+## Test Philosophy
+- Performance matters. Tests should run as quickly as possible. Sleeps in the code are highly discouraged and should be avoided, to reduce run time and flakiness.
+- Stability matters. Tests should be stable. New tests should be executed 100 times at least to ensure they pass 100 out of 100 times. (use --repeat 100 --max-failures 1 when running it)
+- Unit tests should ideally test one thing and one thing only.
+- Tests for bug fixes should run before the fix - and show the failure and after the fix - and show they now pass.
+- Tests for bug fixes should have in their comments which bug fixes (GitHub or JIRA issue) they test.
+- Tests in debug are always slower, so if needed, reduce number of iterations, rows, data used, cycles, etc. in debug mode.
+- Tests should strive to be repeatable, and not use random input that will make their results unpredictable.
+- Tests should consume as little resources as possible. Prefer running tests on a single node if it is sufficient, for example.
+
--- a/.github/workflows/backport-pr-fixes-validation.yaml
+++ b/.github/workflows/backport-pr-fixes-validation.yaml
@@ -18,7 +18,7 @@ jobs:
            
            // Regular expression pattern to check for "Fixes" prefix
            // Adjusted to dynamically insert the repository full name
-            const pattern = `Fixes:? ((?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)|([A-Z]+-\\d+))`;
+            const pattern = `Fixes:? ((?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)|(?:https://scylladb\\.atlassian\\.net/browse/)?([A-Z]+-\\d+))`;
            const regex = new RegExp(pattern);
            
            if (!regex.test(body)) {
--- a/.github/workflows/call_backport_with_jira.yaml
+++ b/.github/workflows/call_backport_with_jira.yaml
@@ -0,0 +1,53 @@
+name: Backport with Jira Integration
+
+on:
+  push:
+    branches:
+      - master
+      - next-*.*
+      - branch-*.*
+  pull_request_target:
+    types: [labeled, closed]
+    branches: 
+      - master
+      - next
+      - next-*.*
+      - branch-*.*
+
+jobs:
+  backport-on-push:
+    if: github.event_name == 'push'
+    uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
+    with:
+      event_type: 'push'
+      base_branch: ${{ github.ref }}
+      commits: ${{ github.event.before }}..${{ github.sha }}
+    secrets:
+      gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
+      jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
+
+  backport-on-label:
+    if: github.event_name == 'pull_request_target' && github.event.action == 'labeled'
+    uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
+    with:
+      event_type: 'labeled'
+      base_branch: refs/heads/${{ github.event.pull_request.base.ref }}
+      pull_request_number: ${{ github.event.pull_request.number }}
+      head_commit: ${{ github.event.pull_request.base.sha }}
+      label_name: ${{ github.event.label.name }}
+      pr_state: ${{ github.event.pull_request.state }}
+    secrets:
+      gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
+      jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
+
+  backport-chain:
+    if: github.event_name == 'pull_request_target' && github.event.action == 'closed' && github.event.pull_request.merged == true
+    uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
+    with:
+      event_type: 'chain'
+      base_branch: refs/heads/${{ github.event.pull_request.base.ref }}
+      pull_request_number: ${{ github.event.pull_request.number }}
+      pr_body: ${{ github.event.pull_request.body }}
+    secrets:
+      gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
+      jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
--- a/.github/workflows/call_jira_status_in_progress.yml
+++ b/.github/workflows/call_jira_status_in_progress.yml
@@ -1,12 +0,0 @@
-name: Call Jira Status In Progress
-
-on:
-  pull_request_target:
-    types: [opened]
-
-jobs:
-  call-jira-status-in-progress:
-    uses: scylladb/github-automation/.github/workflows/main_update_jira_status_to_in_progress.yml@main
-    secrets:
-      caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
-
--- a/.github/workflows/call_jira_status_in_review.yml
+++ b/.github/workflows/call_jira_status_in_review.yml
@@ -1,12 +0,0 @@
-name: Call Jira Status In Review
-
-on:
-  pull_request_target:
-    types: [ready_for_review, review_requested]
-
-jobs:
-  call-jira-status-in-review:
-    uses: scylladb/github-automation/.github/workflows/main_update_jira_status_to_in_review.yml@main
-    secrets:
-      caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
-
--- a/.github/workflows/call_jira_status_ready_for_merge.yml
+++ b/.github/workflows/call_jira_status_ready_for_merge.yml
@@ -1,12 +0,0 @@
-name: Call Jira Status Ready For Merge
-
-on:
-  pull_request_target:
-    types: [labeled]
-
-jobs:
-  call-jira-status-update:
-    uses: scylladb/github-automation/.github/workflows/main_update_jira_status_to_ready_for_merge.yml@main
-    secrets:
-      caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
-
--- a/.github/workflows/call_jira_sync.yml
+++ b/.github/workflows/call_jira_sync.yml
@@ -0,0 +1,41 @@
+name: Sync Jira Based on PR Events
+
+on:
+  pull_request_target:
+    types: [opened, ready_for_review, review_requested, labeled, unlabeled, closed]
+
+permissions:
+  contents: read
+  pull-requests: write
+  issues: write
+
+jobs:
+  jira-sync-pr-opened:
+    if: github.event.action == 'opened'
+    uses: scylladb/github-automation/.github/workflows/main_jira_sync_pr_opened.yml@main
+    secrets:
+      caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
+
+  jira-sync-in-review:
+    if: github.event.action == 'ready_for_review' || github.event.action == 'review_requested'
+    uses: scylladb/github-automation/.github/workflows/main_jira_sync_in_review.yml@main
+    secrets:
+      caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
+
+  jira-sync-add-label:
+    if: github.event.action == 'labeled'
+    uses: scylladb/github-automation/.github/workflows/main_jira_sync_add_label.yml@main
+    secrets:
+      caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
+
+  jira-status-remove-label:
+    if: github.event.action == 'unlabeled'
+    uses: scylladb/github-automation/.github/workflows/main_jira_sync_remove_label.yml@main
+    secrets:
+      caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
+
+  jira-status-pr-closed:
+    if: github.event.action == 'closed' 
+    uses: scylladb/github-automation/.github/workflows/main_jira_sync_pr_closed.yml@main
+    secrets:
+      caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
--- a/.github/workflows/call_jira_sync_pr_milestone.yml
+++ b/.github/workflows/call_jira_sync_pr_milestone.yml
@@ -0,0 +1,22 @@
+name: Sync Jira Based on PR Milestone Events
+
+on:
+  pull_request_target:
+    types: [milestoned, demilestoned]
+
+permissions:
+  contents: read
+  pull-requests: read
+
+jobs:
+  jira-sync-milestone-set:
+    if: github.event.action == 'milestoned'
+    uses: scylladb/github-automation/.github/workflows/main_jira_sync_pr_milestone_set.yml@main
+    secrets:
+      caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
+
+  jira-sync-milestone-removed:
+    if: github.event.action == 'demilestoned'
+    uses: scylladb/github-automation/.github/workflows/main_jira_sync_pr_milestone_removed.yml@main
+    secrets:
+      caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
--- a/.github/workflows/call_sync_milestone_to_jira.yml
+++ b/.github/workflows/call_sync_milestone_to_jira.yml
@@ -1,4 +1,4 @@
-name: Call Jira release creation for new milestone
+name: Call Jira release creation for new milestone

 on:
  milestone:
@@ -9,6 +9,6 @@ jobs:
    uses: scylladb/github-automation/.github/workflows/main_sync_milestone_to_jira_release.yml@main
    with:
      # Comma-separated list of Jira project keys
-      jira_project_keys: "SCYLLADB,CUSTOMER"
+      jira_project_keys: "SCYLLADB,CUSTOMER,SMI"
    secrets:
      caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
--- a/.github/workflows/close_issue_for_scylla_associate.yml
+++ b/.github/workflows/close_issue_for_scylla_associate.yml
@@ -0,0 +1,62 @@
+name: Close issues created by Scylla associates
+
+on:
+  issues:
+    types: [opened, reopened]
+
+permissions:
+  issues: write
+
+jobs:
+  comment-and-close:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Comment and close if author email is scylladb.com
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const issue = context.payload.issue;
+            const actor = context.actor;
+
+            // Get user data (only public email is available)
+            const { data: user } = await github.rest.users.getByUsername({
+              username: actor,
+            });
+
+            const email = user.email || "";
+            console.log(`Actor: ${actor}, public email: ${email || "<none>"}`);
+
+            // Only continue if email exists and ends with @scylladb.com
+            if (!email || !email.toLowerCase().endsWith("@scylladb.com")) {
+              console.log("User is not a scylladb.com email (or email not public); skipping.");
+              return;
+            }
+
+            const owner = context.repo.owner;
+            const repo = context.repo.repo;
+            const issue_number = issue.number;
+
+            const body = "Issues in this repository are closed automatically. Scylla associates should use Jira to manage issues.\nPlease move this issue to Jira https://scylladb.atlassian.net/jira/software/c/projects/SCYLLADB/list";
+
+            // Add the comment
+            await github.rest.issues.createComment({
+              owner,
+              repo,
+              issue_number,
+              body,
+            });
+
+            console.log(`Comment added to #${issue_number}`);
+
+            // Close the issue
+            await github.rest.issues.update({
+              owner,
+              repo,
+              issue_number,
+              state: "closed",
+              state_reason: "not_planned"
+            });
+
+            console.log(`Issue #${issue_number} closed.`);
--- a/.github/workflows/codespell.yaml
+++ b/.github/workflows/codespell.yaml
@@ -13,5 +13,5 @@ jobs:
      - uses: codespell-project/actions-codespell@master
        with:
          only_warn: 1
-          ignore_words_list: "ans,datas,fo,ser,ue,crate,nd,reenable,strat,stap,te,raison"
+          ignore_words_list: "ans,datas,fo,ser,ue,crate,nd,reenable,strat,stap,te,raison,iif,tread"
          skip: "./.git,./build,./tools,*.js,*.lock,./test,./licenses,./redis/lolwut.cc,*.svg"
--- a/.github/workflows/docs-pages.yaml
+++ b/.github/workflows/docs-pages.yaml
@@ -18,6 +18,8 @@ on:

 jobs:
  release:
+    permissions:
+      contents: write
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
--- a/.github/workflows/docs-pr.yaml
+++ b/.github/workflows/docs-pr.yaml
@@ -2,6 +2,9 @@ name: "Docs / Build PR"
 # For more information,
 # see https://sphinx-theme.scylladb.com/stable/deployment/production.html#available-workflows

+permissions:
+  contents: read
+
 env:
  FLAG: ${{ github.repository == 'scylladb/scylla-enterprise' && 'enterprise' || 'opensource' }}

--- a/.github/workflows/docs-validate-metrics.yml
+++ b/.github/workflows/docs-validate-metrics.yml
@@ -1,5 +1,8 @@
 name: Docs / Validate metrics

+permissions:
+  contents: read
+
 on:
  pull_request:
    branches:
--- a/.github/workflows/iwyu.yaml
+++ b/.github/workflows/iwyu.yaml
@@ -14,7 +14,8 @@ env:
  CLEANER_DIRS: test/unit exceptions alternator api auth cdc compaction db dht gms index lang message mutation mutation_writer node_ops raft redis replica service
  SEASTAR_BAD_INCLUDE_OUTPUT_PATH: build/seastar-bad-include.log

-permissions: {}
+permissions:
+  contents: read

 # cancel the in-progress run upon a repush
 concurrency:
@@ -34,8 +35,6 @@ jobs:
      - uses: actions/checkout@v4
        with:
          submodules: true
-      - run: |
-          sudo dnf -y install clang-tools-extra
      - name: Generate compilation database
        run: |
          cmake                                         \
--- a/.github/workflows/read-toolchain.yaml
+++ b/.github/workflows/read-toolchain.yaml
@@ -10,6 +10,8 @@ on:
 jobs:
  read-toolchain:
    runs-on: ubuntu-latest
+    permissions:
+      contents: read
    outputs:
      image: ${{ steps.read.outputs.image }}
    steps:
--- a/.github/workflows/trigger-scylla-ci.yaml
+++ b/.github/workflows/trigger-scylla-ci.yaml
@@ -9,16 +9,52 @@ on:

 jobs:
  trigger-jenkins:
-    if: (github.event.comment.user.login != 'scylladbbot' && contains(github.event.comment.body, '@scylladbbot') && contains(github.event.comment.body, 'trigger-ci')) || github.event.label.name == 'conflicts'
+    if: (github.event_name == 'issue_comment' && github.event.comment.user.login != 'scylladbbot') || github.event.label.name == 'conflicts'
    runs-on: ubuntu-latest
    steps:
+      - name: Verify Org Membership
+        id: verify_author
+        shell: bash
+        run: |
+          if [[ "${{ github.event_name }}" == "pull_request_target" ]]; then
+            AUTHOR="${{ github.event.pull_request.user.login }}"
+            ASSOCIATION="${{ github.event.pull_request.author_association }}"
+          else
+            AUTHOR="${{ github.event.comment.user.login }}"
+            ASSOCIATION="${{ github.event.comment.author_association }}"
+          fi
+          if [[ "$ASSOCIATION" == "MEMBER" || "$ASSOCIATION" == "OWNER" ]]; then
+            echo "member=true" >> $GITHUB_OUTPUT
+          else
+            echo "::warning::${AUTHOR} is not a member of scylladb (association: ${ASSOCIATION}); skipping CI trigger."
+            echo "member=false" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Validate Comment Trigger
+        if: github.event_name == 'issue_comment'
+        id: verify_comment
+        shell: bash
+        run: |
+          BODY=$(cat << 'EOF'
+          ${{ github.event.comment.body }}
+          EOF
+          )
+          CLEAN_BODY=$(echo "$BODY" | grep -v '^[[:space:]]*>')
+
+          if echo "$CLEAN_BODY" | grep -qi '@scylladbbot' && echo "$CLEAN_BODY" | grep -qi 'trigger-ci'; then
+            echo "trigger=true" >> $GITHUB_OUTPUT
+          else
+            echo "trigger=false" >> $GITHUB_OUTPUT
+          fi
+
      - name: Trigger Scylla-CI-Route Jenkins Job
+        if: steps.verify_author.outputs.member == 'true' && (github.event_name == 'pull_request_target' || steps.verify_comment.outputs.trigger == 'true')
        env:
          JENKINS_USER: ${{ secrets.JENKINS_USERNAME }}
          JENKINS_API_TOKEN: ${{ secrets.JENKINS_TOKEN }}
          JENKINS_URL: "https://jenkins.scylladb.com"
+          PR_NUMBER: "${{ github.event.issue.number || github.event.pull_request.number }}"
+          PR_REPO_NAME: "${{ github.event.repository.full_name }}"
        run: |
-          PR_NUMBER=${{ github.event.issue.number }}
-          PR_REPO_NAME=${{ github.event.repository.full_name }}
          curl -X POST "$JENKINS_URL/job/releng/job/Scylla-CI-Route/buildWithParameters?PR_NUMBER=$PR_NUMBER&PR_REPO_NAME=$PR_REPO_NAME" \
-          --user "$JENKINS_USER:$JENKINS_API_TOKEN" --fail -i -v
+            --user "$JENKINS_USER:$JENKINS_API_TOKEN" --fail
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -300,7 +300,6 @@ add_subdirectory(locator)
 add_subdirectory(message)
 add_subdirectory(mutation)
 add_subdirectory(mutation_writer)
-add_subdirectory(node_ops)
 add_subdirectory(readers)
 add_subdirectory(replica)
 add_subdirectory(raft)
--- a/README.md
+++ b/README.md
@@ -43,7 +43,7 @@ For further information, please see:

 [developer documentation]: HACKING.md
 [build documentation]: docs/dev/building.md
-[docker image build documentation]: dist/docker/debian/README.md
+[docker image build documentation]: dist/docker/redhat/README.md

 ## Running Scylla

--- a/2
+++ b/2
@@ -78,7 +78,7 @@ fi

 # Default scylla product/version tags
 PRODUCT=scylla
-VERSION=2026.1.0-dev
+VERSION=2026.2.0-dev

 if test -f version
 then
--- a/alternator/CMakeLists.txt
+++ b/alternator/CMakeLists.txt
@@ -18,6 +18,7 @@ target_sources(alternator
    consumed_capacity.cc
    ttl.cc
    parsed_expression_cache.cc
+    http_compression.cc
    ${cql_grammar_srcs})
 target_include_directories(alternator
  PUBLIC
--- a/alternator/conditions.cc
+++ b/alternator/conditions.cc
@@ -618,7 +618,7 @@ conditional_operator_type get_conditional_operator(const rjson::value& req) {
 // Check if the existing values of the item (previous_item) match the
 // conditions given by the Expected and ConditionalOperator parameters
 // (if they exist) in the request (an UpdateItem, PutItem or DeleteItem).
-// This function can throw an ValidationException API error if there
+// This function can throw a ValidationException API error if there
 // are errors in the format of the condition itself.
 bool verify_expected(const rjson::value& req, const rjson::value* previous_item) {
    const rjson::value* expected = rjson::find(req, "Expected");
--- a/alternator/consumed_capacity.cc
+++ b/alternator/consumed_capacity.cc
@@ -45,7 +45,7 @@ bool consumed_capacity_counter::should_add_capacity(const rjson::value& request)
 }

 void consumed_capacity_counter::add_consumed_capacity_to_response_if_needed(rjson::value& response) const noexcept {
-    if (_should_add_to_reponse) {
+    if (_should_add_to_response) {
        auto consumption = rjson::empty_object();
        rjson::add(consumption, "CapacityUnits", get_consumed_capacity_units());
        rjson::add(response, "ConsumedCapacity", std::move(consumption));
--- a/alternator/consumed_capacity.hh
+++ b/alternator/consumed_capacity.hh
@@ -28,9 +28,9 @@ namespace alternator {
 class consumed_capacity_counter {
 public:
    consumed_capacity_counter() = default;
-    consumed_capacity_counter(bool should_add_to_reponse) : _should_add_to_reponse(should_add_to_reponse){}
+    consumed_capacity_counter(bool should_add_to_response) : _should_add_to_response(should_add_to_response){}
    bool operator()() const noexcept {
-        return _should_add_to_reponse;
+        return _should_add_to_response;
    }

    consumed_capacity_counter& operator +=(uint64_t bytes);
@@ -44,7 +44,7 @@ public:
    uint64_t _total_bytes = 0;
    static bool should_add_capacity(const rjson::value& request);
 protected:
-    bool _should_add_to_reponse = false;
+    bool _should_add_to_response = false;
 };

 class rcu_consumed_capacity_counter : public consumed_capacity_counter {
--- a/alternator/controller.cc
+++ b/alternator/controller.cc
@@ -28,6 +28,7 @@ static logging::logger logger("alternator_controller");
 controller::controller(
        sharded<gms::gossiper>& gossiper,
        sharded<service::storage_proxy>& proxy,
+        sharded<service::storage_service>& ss,
        sharded<service::migration_manager>& mm,
        sharded<db::system_distributed_keyspace>& sys_dist_ks,
        sharded<cdc::generation_service>& cdc_gen_svc,
@@ -39,6 +40,7 @@ controller::controller(
    : protocol_server(sg)
    , _gossiper(gossiper)
    , _proxy(proxy)
+    , _ss(ss)
    , _mm(mm)
    , _sys_dist_ks(sys_dist_ks)
    , _cdc_gen_svc(cdc_gen_svc)
@@ -89,7 +91,7 @@ future<> controller::start_server() {
        auto get_timeout_in_ms = [] (const db::config& cfg) -> utils::updateable_value<uint32_t> {
            return cfg.alternator_timeout_in_ms;
        };
-        _executor.start(std::ref(_gossiper), std::ref(_proxy), std::ref(_mm), std::ref(_sys_dist_ks),
+        _executor.start(std::ref(_gossiper), std::ref(_proxy), std::ref(_ss), std::ref(_mm), std::ref(_sys_dist_ks),
                        sharded_parameter(get_cdc_metadata, std::ref(_cdc_gen_svc)), _ssg.value(),
                        sharded_parameter(get_timeout_in_ms, std::ref(_config))).get();
        _server.start(std::ref(_executor), std::ref(_proxy), std::ref(_gossiper), std::ref(_auth_service), std::ref(_sl_controller)).get();
@@ -103,11 +105,23 @@ future<> controller::start_server() {
            alternator_port = _config.alternator_port();
            _listen_addresses.push_back({addr, *alternator_port});
        }
+        std::optional<uint16_t> alternator_port_proxy_protocol;
+        if (_config.alternator_port_proxy_protocol()) {
+            alternator_port_proxy_protocol = _config.alternator_port_proxy_protocol();
+            _listen_addresses.push_back({addr, *alternator_port_proxy_protocol});
+        }
        std::optional<uint16_t> alternator_https_port;
+        std::optional<uint16_t> alternator_https_port_proxy_protocol;
        std::optional<tls::credentials_builder> creds;
-        if (_config.alternator_https_port()) {
-            alternator_https_port = _config.alternator_https_port();
-            _listen_addresses.push_back({addr, *alternator_https_port});
+        if (_config.alternator_https_port() || _config.alternator_https_port_proxy_protocol()) {
+            if (_config.alternator_https_port()) {
+                alternator_https_port = _config.alternator_https_port();
+                _listen_addresses.push_back({addr, *alternator_https_port});
+            }
+            if (_config.alternator_https_port_proxy_protocol()) {
+                alternator_https_port_proxy_protocol = _config.alternator_https_port_proxy_protocol();
+                _listen_addresses.push_back({addr, *alternator_https_port_proxy_protocol});
+            }
            creds.emplace();
            auto opts = _config.alternator_encryption_options();
            if (opts.empty()) {
@@ -133,20 +147,29 @@ future<> controller::start_server() {
            }
        }
        _server.invoke_on_all(
-                [this, addr, alternator_port, alternator_https_port, creds = std::move(creds)] (server& server) mutable {
-            return server.init(addr, alternator_port, alternator_https_port, creds,
+                [this, addr, alternator_port, alternator_https_port, alternator_port_proxy_protocol, alternator_https_port_proxy_protocol, creds = std::move(creds)] (server& server) mutable {
+            return server.init(addr, alternator_port, alternator_https_port, alternator_port_proxy_protocol, alternator_https_port_proxy_protocol, creds,
                    _config.alternator_enforce_authorization,
                    _config.alternator_warn_authorization,
                    _config.alternator_max_users_query_size_in_trace_output,
                    &_memory_limiter.local().get_semaphore(),
                    _config.max_concurrent_requests_per_shard);
-        }).handle_exception([this, addr, alternator_port, alternator_https_port] (std::exception_ptr ep) {
-            logger.error("Failed to set up Alternator HTTP server on {} port {}, TLS port {}: {}",
-                    addr, alternator_port ? std::to_string(*alternator_port) : "OFF", alternator_https_port ? std::to_string(*alternator_https_port) : "OFF", ep);
+        }).handle_exception([this, addr, alternator_port, alternator_https_port, alternator_port_proxy_protocol, alternator_https_port_proxy_protocol] (std::exception_ptr ep) {
+            logger.error("Failed to set up Alternator HTTP server on {} port {}, TLS port {}, proxy-protocol port {}, TLS proxy-protocol port {}: {}",
+                    addr,
+                    alternator_port ? std::to_string(*alternator_port) : "OFF",
+                    alternator_https_port ? std::to_string(*alternator_https_port) : "OFF",
+                    alternator_port_proxy_protocol ? std::to_string(*alternator_port_proxy_protocol) : "OFF",
+                    alternator_https_port_proxy_protocol ? std::to_string(*alternator_https_port_proxy_protocol) : "OFF",
+                    ep);
            return stop_server().then([ep = std::move(ep)] { return make_exception_future<>(ep); });
-        }).then([addr, alternator_port, alternator_https_port] {
-            logger.info("Alternator server listening on {}, HTTP port {}, HTTPS port {}",
-                    addr, alternator_port ? std::to_string(*alternator_port) : "OFF", alternator_https_port ? std::to_string(*alternator_https_port) : "OFF");
+        }).then([addr, alternator_port, alternator_https_port, alternator_port_proxy_protocol, alternator_https_port_proxy_protocol] {
+            logger.info("Alternator server listening on {}, HTTP port {}, HTTPS port {}, proxy-protocol port {}, TLS proxy-protocol port {}",
+                    addr,
+                    alternator_port ? std::to_string(*alternator_port) : "OFF",
+                    alternator_https_port ? std::to_string(*alternator_https_port) : "OFF",
+                    alternator_port_proxy_protocol ? std::to_string(*alternator_port_proxy_protocol) : "OFF",
+                    alternator_https_port_proxy_protocol ? std::to_string(*alternator_https_port_proxy_protocol) : "OFF");
        }).get();
    });
 }
--- a/alternator/controller.hh
+++ b/alternator/controller.hh
@@ -15,6 +15,7 @@

 namespace service {
 class storage_proxy;
+class storage_service;
 class migration_manager;
 class memory_limiter;
 }
@@ -57,6 +58,7 @@ class server;
 class controller : public protocol_server {
    sharded<gms::gossiper>& _gossiper;
    sharded<service::storage_proxy>& _proxy;
+    sharded<service::storage_service>& _ss;
    sharded<service::migration_manager>& _mm;
    sharded<db::system_distributed_keyspace>& _sys_dist_ks;
    sharded<cdc::generation_service>& _cdc_gen_svc;
@@ -74,6 +76,7 @@ public:
    controller(
        sharded<gms::gossiper>& gossiper,
        sharded<service::storage_proxy>& proxy,
+        sharded<service::storage_service>& ss,
        sharded<service::migration_manager>& mm,
        sharded<db::system_distributed_keyspace>& sys_dist_ks,
        sharded<cdc::generation_service>& cdc_gen_svc,
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
@@ -17,6 +17,7 @@
 #include "auth/service.hh"
 #include "db/config.hh"
 #include "db/view/view_build_status.hh"
+#include "locator/tablets.hh"
 #include "mutation/tombstone.hh"
 #include "locator/abstract_replication_strategy.hh"
 #include "utils/log.hh"
@@ -62,11 +63,20 @@
 #include "types/types.hh"
 #include "db/system_keyspace.hh"
 #include "cql3/statements/ks_prop_defs.hh"
+#include "alternator/ttl_tag.hh"

 using namespace std::chrono_literals;

 logging::logger elogger("alternator-executor");

+namespace std {
+    template <> struct hash<std::pair<sstring, sstring>> {
+        size_t operator () (const std::pair<sstring, sstring>& p) const {
+            return std::hash<sstring>()(p.first) * 1009 + std::hash<sstring>()(p.second) * 3;
+        }
+    };
+}
+
 namespace alternator {

 // Alternator-specific table properties stored as hidden table tags:
@@ -155,7 +165,7 @@ static map_type attrs_type() {

 static const column_definition& attrs_column(const schema& schema) {
    const column_definition* cdef = schema.get_column_definition(bytes(executor::ATTRS_COLUMN_NAME));
-    SCYLLA_ASSERT(cdef);
+    throwing_assert(cdef);
    return *cdef;
 }

@@ -228,7 +238,7 @@ static void validate_is_object(const rjson::value& value, const char* caller) {
 }

 // This function assumes the given value is an object and returns requested member value.
-// If it is not possible an api_error::validation is thrown.
+// If it is not possible, an api_error::validation is thrown.
 static const rjson::value& get_member(const rjson::value& obj, const char* member_name, const char* caller) {
    validate_is_object(obj, caller);
    const rjson::value* ret = rjson::find(obj, member_name);
@@ -240,7 +250,7 @@ static const rjson::value& get_member(const rjson::value& obj, const char* membe


 // This function assumes the given value is an object with a single member, and returns this member.
-// In case the requirements are not met an api_error::validation is thrown.
+// In case the requirements are not met, an api_error::validation is thrown.
 static const rjson::value::Member& get_single_member(const rjson::value& v, const char* caller) {
    if (!v.IsObject() || v.MemberCount() != 1) {
        throw api_error::validation(format("{}: expected an object with a single member.", caller));
@@ -248,14 +258,66 @@ static const rjson::value::Member& get_single_member(const rjson::value& v, cons
    return *(v.MemberBegin());
 }

+class executor::describe_table_info_manager : public service::migration_listener::empty_listener {
+    executor &_executor;
+
+    struct table_info {
+        utils::simple_value_with_expiry<std::uint64_t> size_in_bytes;
+    };
+    std::unordered_map<std::pair<sstring, sstring>, table_info> info_for_tables;
+    bool active = false;
+
+public:
+    describe_table_info_manager(executor& executor) : _executor(executor) {
+        _executor._proxy.data_dictionary().real_database_ptr()->get_notifier().register_listener(this);
+        active = true;
+    }
+    describe_table_info_manager(const describe_table_info_manager &) = delete;
+    describe_table_info_manager(describe_table_info_manager&&) = delete;
+    ~describe_table_info_manager() {
+        if (active) {
+            on_fatal_internal_error(elogger, "describe_table_info_manager was not stopped before destruction");
+        }
+    }
+
+    describe_table_info_manager &operator = (const describe_table_info_manager &) = delete;
+    describe_table_info_manager &operator = (describe_table_info_manager&&) = delete;
+
+    static std::chrono::high_resolution_clock::time_point now() {
+        return std::chrono::high_resolution_clock::now();
+    }
+
+    std::optional<std::uint64_t> get_cached_size_in_bytes(const sstring &ks_name, const sstring &cf_name) const {
+        auto it = info_for_tables.find({ks_name, cf_name});
+        if (it != info_for_tables.end()) {
+            return it->second.size_in_bytes.get();
+        }
+        return std::nullopt;
+    }
+    void cache_size_in_bytes(sstring ks_name, sstring cf_name, std::uint64_t size_in_bytes, std::chrono::high_resolution_clock::time_point expiry) {
+        info_for_tables[{std::move(ks_name), std::move(cf_name)}].size_in_bytes.set_if_longer_expiry(size_in_bytes, expiry);
+    }
+    future<> stop() {
+        co_await _executor._proxy.data_dictionary().real_database_ptr()->get_notifier().unregister_listener(this);
+        active = false;
+        co_return;
+    }
+    void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) override {
+        if (!ks_name.starts_with(executor::KEYSPACE_NAME_PREFIX)) return;
+        info_for_tables.erase({ks_name, cf_name});
+    }
+};
+
 executor::executor(gms::gossiper& gossiper,
         service::storage_proxy& proxy,
+         service::storage_service& ss,
         service::migration_manager& mm,
         db::system_distributed_keyspace& sdks,
         cdc::metadata& cdc_metadata,
         smp_service_group ssg,
         utils::updateable_value<uint32_t> default_timeout_in_ms)
    : _gossiper(gossiper),
+      _ss(ss),
      _proxy(proxy),
      _mm(mm),
      _sdks(sdks),
@@ -268,6 +330,7 @@ executor::executor(gms::gossiper& gossiper,
        _stats))
 {
    s_default_timeout_in_ms = std::move(default_timeout_in_ms);
+    _describe_table_info_manager = std::make_unique<describe_table_info_manager>(*this);
    register_metrics(_metrics, _stats);
 }

@@ -620,7 +683,7 @@ static std::optional<int> get_int_attribute(const rjson::value& value, std::stri
 }

 // Sets a KeySchema object inside the given JSON parent describing the key
-// attributes of the the given schema as being either HASH or RANGE keys.
+// attributes of the given schema as being either HASH or RANGE keys.
 // Additionally, adds to a given map mappings between the key attribute
 // names and their type (as a DynamoDB type string).
 void executor::describe_key_schema(rjson::value& parent, const schema& schema, std::unordered_map<std::string,std::string>* attribute_types, const std::map<sstring, sstring> *tags) {
@@ -752,12 +815,44 @@ static future<bool> is_view_built(

 }

-static future<rjson::value> fill_table_description(schema_ptr schema, table_status tbl_status, service::storage_proxy& proxy, service::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit)
+future<> executor::cache_newly_calculated_size_on_all_shards(schema_ptr schema, std::uint64_t size_in_bytes, std::chrono::nanoseconds ttl) {
+    auto expiry = describe_table_info_manager::now() + ttl;
+    return container().invoke_on_all(
+        [schema, size_in_bytes, expiry] (executor& exec) {
+            exec._describe_table_info_manager->cache_size_in_bytes(schema->ks_name(), schema->cf_name(), size_in_bytes, expiry);
+        });
+}
+
+future<> executor::fill_table_size(rjson::value &table_description, schema_ptr schema, bool deleting) {
+    auto cached_size = _describe_table_info_manager->get_cached_size_in_bytes(schema->ks_name(), schema->cf_name());
+    std::uint64_t total_size = 0;
+    if (cached_size) {
+        total_size = *cached_size;
+    } else {
+        // there's no point in trying to estimate value of table that is being deleted, as other nodes more often than not might
+        // move forward with deletion faster than we calculate the size
+        if (!deleting) {
+            total_size = co_await _ss.estimate_total_sstable_volume(schema->id(), service::storage_service::ignore_errors::yes);
+            const auto expiry = std::chrono::seconds{ _proxy.data_dictionary().get_config().alternator_describe_table_info_cache_validity_in_seconds() };
+            // Note: we don't care when the notification of other shards will finish, as long as it will be done
+            // it's possible to get into race condition (next DescribeTable comes to other shard, that new shard doesn't have
+            // the size yet, so it will calculate it again) - this is not a problem, because it will call cache_newly_calculated_size_on_all_shards
+            // with expiry, which is extremely unlikely to be exactly the same as the previous one, all shards will keep the size coming with expiry that is further into the future.
+            // In case of the same expiry, some shards will have different size, which means DescribeTable will return different values depending on the shard
+            // which is also fine, as the specification doesn't give precision guarantees of any kind.
+            co_await cache_newly_calculated_size_on_all_shards(schema, total_size, expiry);
+        }
+    }
+    rjson::add(table_description, "TableSizeBytes", total_size);
+}
+
+future<rjson::value> executor::fill_table_description(schema_ptr schema, table_status tbl_status, service::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit)
 {
    rjson::value table_description = rjson::empty_object();
    auto tags_ptr = db::get_tags_of_table(schema);

    rjson::add(table_description, "TableName", rjson::from_string(schema->cf_name()));
+    co_await fill_table_size(table_description, schema, tbl_status == table_status::deleting);

    auto creation_timestamp = get_table_creation_time(*schema);

@@ -801,9 +896,7 @@ static future<rjson::value> fill_table_description(schema_ptr schema, table_stat
    rjson::add(table_description["ProvisionedThroughput"], "WriteCapacityUnits", wcu);
    rjson::add(table_description["ProvisionedThroughput"], "NumberOfDecreasesToday", 0);

-
-
-    data_dictionary::table t = proxy.data_dictionary().find_column_family(schema);
+    data_dictionary::table t = _proxy.data_dictionary().find_column_family(schema);

    if (tbl_status != table_status::deleting) {
        rjson::add(table_description, "CreationDateTime", rjson::value(creation_timestamp));
@@ -824,7 +917,7 @@ static future<rjson::value> fill_table_description(schema_ptr schema, table_stat
                sstring index_name = cf_name.substr(delim_it + 1);
                rjson::add(view_entry, "IndexName", rjson::from_string(index_name));
                rjson::add(view_entry, "IndexArn", generate_arn_for_index(*schema, index_name));
-                // Add indexes's KeySchema and collect types for AttributeDefinitions:
+                // Add index's KeySchema and collect types for AttributeDefinitions:
                executor::describe_key_schema(view_entry, *vptr, key_attribute_types, db::get_tags_of_table(vptr));
                // Add projection type
                rjson::value projection = rjson::empty_object();
@@ -840,7 +933,7 @@ static future<rjson::value> fill_table_description(schema_ptr schema, table_stat
                // (for a built view) or CREATING+Backfilling (if view building
                // is in progress).
                if (!is_lsi) {
-                    if (co_await is_view_built(vptr, proxy, client_state, trace_state, permit)) {
+                    if (co_await is_view_built(vptr, _proxy, client_state, trace_state, permit)) {
                        rjson::add(view_entry, "IndexStatus", "ACTIVE");
                    } else {
                        rjson::add(view_entry, "IndexStatus", "CREATING");
@@ -868,9 +961,8 @@ static future<rjson::value> fill_table_description(schema_ptr schema, table_stat
        }
        rjson::add(table_description, "AttributeDefinitions", std::move(attribute_definitions));
    }
-    executor::supplement_table_stream_info(table_description, *schema, proxy);
+    executor::supplement_table_stream_info(table_description, *schema, _proxy);

-    // FIXME: still missing some response fields (issue #5026)
    co_return table_description;
 }

@@ -890,7 +982,7 @@ future<executor::request_return_type> executor::describe_table(client_state& cli
    get_stats_from_schema(_proxy, *schema)->api_operations.describe_table++;
    tracing::add_alternator_table_name(trace_state, schema->cf_name());

-    rjson::value table_description = co_await fill_table_description(schema, table_status::active, _proxy, client_state, trace_state, permit);
+    rjson::value table_description = co_await fill_table_description(schema, table_status::active, client_state, trace_state, permit);
    rjson::value response = rjson::empty_object();
    rjson::add(response, "Table", std::move(table_description));
    elogger.trace("returning {}", response);
@@ -993,7 +1085,7 @@ future<executor::request_return_type> executor::delete_table(client_state& clien
    auto& p = _proxy.container();

    schema_ptr schema = get_table(_proxy, request);
-    rjson::value table_description = co_await fill_table_description(schema, table_status::deleting, _proxy, client_state, trace_state, permit);
+    rjson::value table_description = co_await fill_table_description(schema, table_status::deleting, client_state, trace_state, permit);
    co_await verify_permission(_enforce_authorization, _warn_authorization, client_state, schema, auth::permission::DROP, _stats);
    co_await _mm.container().invoke_on(0, [&, cs = client_state.move_to_other_shard()] (service::migration_manager& mm) -> future<> {
        size_t retries = mm.get_concurrent_ddl_retries();
@@ -1557,9 +1649,8 @@ static future<> mark_view_schemas_as_built(utils::chunked_vector<mutation>& out,
    }
 }

-static future<executor::request_return_type> create_table_on_shard0(service::client_state&& client_state, tracing::trace_state_ptr trace_state, rjson::value request,
-            service::storage_proxy& sp, service::migration_manager& mm, gms::gossiper& gossiper, bool enforce_authorization, bool warn_authorization, stats& stats, const db::tablets_mode_t::mode tablets_mode) {
-    SCYLLA_ASSERT(this_shard_id() == 0);
+future<executor::request_return_type> executor::create_table_on_shard0(service::client_state&& client_state, tracing::trace_state_ptr trace_state, rjson::value request, bool enforce_authorization, bool warn_authorization, const db::tablets_mode_t::mode tablets_mode) {
+    throwing_assert(this_shard_id() == 0);

    // We begin by parsing and validating the content of the CreateTable
    // command. We can't inspect the current database schema at this point
@@ -1745,7 +1836,7 @@ static future<executor::request_return_type> create_table_on_shard0(service::cli

    rjson::value* stream_specification = rjson::find(request, "StreamSpecification");
    if (stream_specification && stream_specification->IsObject()) {
-        if (executor::add_stream_options(*stream_specification, builder, sp)) {
+        if (executor::add_stream_options(*stream_specification, builder, _proxy)) {
            validate_cdc_log_name_length(builder.cf_name());
        }
    }
@@ -1764,7 +1855,7 @@ static future<executor::request_return_type> create_table_on_shard0(service::cli
    set_table_creation_time(tags_map, db_clock::now());
    builder.add_extension(db::tags_extension::NAME, ::make_shared<db::tags_extension>(tags_map));

-    co_await verify_create_permission(enforce_authorization, warn_authorization, client_state, stats);
+    co_await verify_create_permission(enforce_authorization, warn_authorization, client_state, _stats);

    schema_ptr schema = builder.build();
    for (auto& view_builder : view_builders) {
@@ -1780,38 +1871,49 @@ static future<executor::request_return_type> create_table_on_shard0(service::cli
        view_builder.with_view_info(schema, include_all_columns, ""/*where clause*/);
    }

-    size_t retries = mm.get_concurrent_ddl_retries();
+    size_t retries = _mm.get_concurrent_ddl_retries();
    for (;;) {
-        auto group0_guard = co_await mm.start_group0_operation();
+        auto group0_guard = co_await _mm.start_group0_operation();
        auto ts = group0_guard.write_timestamp();
        utils::chunked_vector<mutation> schema_mutations;
-        auto ksm = create_keyspace_metadata(keyspace_name, sp, gossiper, ts, tags_map, sp.features(), tablets_mode);
+        auto ksm = create_keyspace_metadata(keyspace_name, _proxy, _gossiper, ts, tags_map, _proxy.features(), tablets_mode);
+        locator::replication_strategy_params params(ksm->strategy_options(), ksm->initial_tablets(), ksm->consistency_option());
+        const auto& topo = _proxy.local_db().get_token_metadata().get_topology();
+        auto rs = locator::abstract_replication_strategy::create_replication_strategy(ksm->strategy_name(), params, topo);
        // Alternator Streams doesn't yet work when the table uses tablets (#23838)
        if (stream_specification && stream_specification->IsObject()) {
            auto stream_enabled = rjson::find(*stream_specification, "StreamEnabled");
            if (stream_enabled && stream_enabled->IsBool() && stream_enabled->GetBool()) {
-                locator::replication_strategy_params params(ksm->strategy_options(), ksm->initial_tablets(), ksm->consistency_option());
-                const auto& topo = sp.local_db().get_token_metadata().get_topology();
-                auto rs = locator::abstract_replication_strategy::create_replication_strategy(ksm->strategy_name(), params, topo);
                if (rs->uses_tablets()) {
                    co_return api_error::validation("Streams not yet supported on a table using tablets (issue #23838). "
                    "If you want to use streams, create a table with vnodes by setting the tag 'system:initial_tablets' set to 'none'.");
                }
            }
        }
-        // Creating an index in tablets mode requires the rf_rack_valid_keyspaces option to be enabled.
-        // GSI and LSI indexes are based on materialized views which require this option to avoid consistency issues.
-        if (!view_builders.empty() && ksm->uses_tablets() && !sp.data_dictionary().get_config().rf_rack_valid_keyspaces()) {
-            co_return api_error::validation("GlobalSecondaryIndexes and LocalSecondaryIndexes with tablets require the rf_rack_valid_keyspaces option to be enabled.");
+        // Creating an index in tablets mode requires the keyspace to be RF-rack-valid.
+        // GSI and LSI indexes are based on materialized views which require RF-rack-validity to avoid consistency issues.
+        if (!view_builders.empty() || _proxy.data_dictionary().get_config().rf_rack_valid_keyspaces()) {
+            try {
+                locator::assert_rf_rack_valid_keyspace(keyspace_name, _proxy.local_db().get_token_metadata_ptr(), *rs);
+            } catch (const std::invalid_argument& ex) {
+                if (!view_builders.empty()) {
+                    co_return api_error::validation(fmt::format("GlobalSecondaryIndexes and LocalSecondaryIndexes on a table "
+                        "using tablets require the number of racks in the cluster to be either 1 or 3"));
+                } else {
+                    co_return api_error::validation(fmt::format("Cannot create table '{}' with tablets: the configuration "
+                        "option 'rf_rack_valid_keyspaces' is enabled, which enforces that tables using tablets can only be created in clusters "
+                        "that have either 1 or 3 racks", table_name));
+                }
+            }
        }
        try {
-            schema_mutations = service::prepare_new_keyspace_announcement(sp.local_db(), ksm, ts);
+            schema_mutations = service::prepare_new_keyspace_announcement(_proxy.local_db(), ksm, ts);
        } catch (exceptions::already_exists_exception&) {
-            if (sp.data_dictionary().has_schema(keyspace_name, table_name)) {
+            if (_proxy.data_dictionary().has_schema(keyspace_name, table_name)) {
                co_return api_error::resource_in_use(fmt::format("Table {} already exists", table_name));
            }
        }
-        if (sp.data_dictionary().try_find_table(schema->id())) {
+        if (_proxy.data_dictionary().try_find_table(schema->id())) {
            // This should never happen, the ID is supposed to be unique
            co_return api_error::internal(format("Table with ID {} already exists", schema->id()));
        }
@@ -1820,9 +1922,9 @@ static future<executor::request_return_type> create_table_on_shard0(service::cli
        for (schema_builder& view_builder : view_builders) {
            schemas.push_back(view_builder.build());
        }
-        co_await service::prepare_new_column_families_announcement(schema_mutations, sp, *ksm, schemas, ts);
+        co_await service::prepare_new_column_families_announcement(schema_mutations, _proxy, *ksm, schemas, ts);
        if (ksm->uses_tablets()) {
-            co_await mark_view_schemas_as_built(schema_mutations, schemas, ts, sp);
+            co_await mark_view_schemas_as_built(schema_mutations, schemas, ts, _proxy);
        }

        // If a role is allowed to create a table, we must give it permissions to
@@ -1847,7 +1949,7 @@ static future<executor::request_return_type> create_table_on_shard0(service::cli
        }
        std::tie(schema_mutations, group0_guard) = co_await std::move(mc).extract();
        try {
-            co_await mm.announce(std::move(schema_mutations), std::move(group0_guard), fmt::format("alternator-executor: create {} table", table_name));
+            co_await _mm.announce(std::move(schema_mutations), std::move(group0_guard), fmt::format("alternator-executor: create {} table", table_name));
            break;
        }  catch (const service::group0_concurrent_modification& ex) {
            elogger.info("Failed to execute CreateTable {} due to concurrent schema modifications. {}.",
@@ -1859,9 +1961,9 @@ static future<executor::request_return_type> create_table_on_shard0(service::cli
        }
    }

-    co_await mm.wait_for_schema_agreement(sp.local_db(), db::timeout_clock::now() + 10s, nullptr);
+    co_await _mm.wait_for_schema_agreement(_proxy.local_db(), db::timeout_clock::now() + 10s, nullptr);
    rjson::value status = rjson::empty_object();
-    executor::supplement_table_info(request, *schema, sp);
+    executor::supplement_table_info(request, *schema, _proxy);
    rjson::add(status, "TableDescription", std::move(request));
    co_return rjson::print(std::move(status));
 }
@@ -1870,10 +1972,11 @@ future<executor::request_return_type> executor::create_table(client_state& clien
    _stats.api_operations.create_table++;
    elogger.trace("Creating table {}", request);

-    co_return co_await _mm.container().invoke_on(0, [&, tr = tracing::global_trace_state_ptr(trace_state), request = std::move(request), &sp = _proxy.container(), &g = _gossiper.container(), &e = this->container(), client_state_other_shard = client_state.move_to_other_shard(), enforce_authorization = bool(_enforce_authorization), warn_authorization = bool(_warn_authorization)]
+    co_return co_await _mm.container().invoke_on(0, [&, tr = tracing::global_trace_state_ptr(trace_state), request = std::move(request), &e = this->container(), client_state_other_shard = client_state.move_to_other_shard(), enforce_authorization = bool(_enforce_authorization), warn_authorization = bool(_warn_authorization)]
                                        (service::migration_manager& mm) mutable -> future<executor::request_return_type> {
        const db::tablets_mode_t::mode tablets_mode = _proxy.data_dictionary().get_config().tablets_mode_for_new_keyspaces(); // type cast
-        co_return co_await create_table_on_shard0(client_state_other_shard.get(), tr, std::move(request), sp.local(), mm, g.local(), enforce_authorization, warn_authorization, e.local()._stats, std::move(tablets_mode));
+        // `invoke_on` hopped us to shard 0, but `this` points to `executor` is from 'old' shard, we need to hop it too.
+        co_return co_await e.local().create_table_on_shard0(client_state_other_shard.get(), tr, std::move(request), enforce_authorization, warn_authorization, std::move(tablets_mode));
    });
 }

@@ -2024,9 +2127,12 @@ future<executor::request_return_type> executor::update_table(client_state& clien
                            co_return api_error::validation(fmt::format(
                                "LSI {} already exists in table {}, can't use same name for GSI", index_name, table_name));
                        }
-                        if (p.local().local_db().find_keyspace(keyspace_name).get_replication_strategy().uses_tablets() &&
-                                !p.local().data_dictionary().get_config().rf_rack_valid_keyspaces()) {
-                            co_return api_error::validation("GlobalSecondaryIndexes with tablets require the rf_rack_valid_keyspaces option to be enabled.");
+                        try {
+                            locator::assert_rf_rack_valid_keyspace(keyspace_name, p.local().local_db().get_token_metadata_ptr(),
+                                    p.local().local_db().find_keyspace(keyspace_name).get_replication_strategy());
+                        } catch (const std::invalid_argument& ex) {
+                            co_return api_error::validation(fmt::format("GlobalSecondaryIndexes on a table "
+                                "using tablets require the number of racks in the cluster to be either 1 or 3"));
                        }

                        elogger.trace("Adding GSI {}", index_name);
@@ -2330,7 +2436,7 @@ std::unordered_map<bytes, std::string> si_key_attributes(data_dictionary::table
 //   case, this function simply won't be called for this attribute.)
 //
 // This function checks if the given attribute update is an update to some
-// GSI's key, and if the value is unsuitable, a api_error::validation is
+// GSI's key, and if the value is unsuitable, an api_error::validation is
 // thrown. The checking here is similar to the checking done in
 // get_key_from_typed_value() for the base table's key columns.
 //
@@ -2732,14 +2838,12 @@ future<executor::request_return_type> rmw_operation::execute(service::storage_pr
        }
    } else if (_write_isolation != write_isolation::LWT_ALWAYS) {
        std::optional<mutation> m = apply(nullptr, api::new_timestamp(), cdc_opts);
-        SCYLLA_ASSERT(m); // !needs_read_before_write, so apply() did not check a condition
+        throwing_assert(m); // !needs_read_before_write, so apply() did not check a condition
        return proxy.mutate(utils::chunked_vector<mutation>{std::move(*m)}, db::consistency_level::LOCAL_QUORUM, executor::default_timeout(), trace_state, std::move(permit), db::allow_per_partition_rate_limit::yes, false, std::move(cdc_opts)).then([this, &wcu_total] () mutable {
            return rmw_operation_return(std::move(_return_attributes), _consumed_capacity, wcu_total);
        });
    }
-    if (!cas_shard) {
-        on_internal_error(elogger, "cas_shard is not set");
-    }
+    throwing_assert(cas_shard);
    // If we're still here, we need to do this write using LWT:
    global_stats.write_using_lwt++;
    per_table_stats.write_using_lwt++;
@@ -3443,7 +3547,7 @@ static bool hierarchy_filter(rjson::value& val, const attribute_path_map_node<T>
    return true;
 }

-// Add a path to a attribute_path_map. Throws a validation error if the path
+// Add a path to an attribute_path_map. Throws a validation error if the path
 // "overlaps" with one already in the filter (one is a sub-path of the other)
 // or "conflicts" with it (both a member and index is requested).
 template<typename T>
@@ -5308,7 +5412,7 @@ static future<executor::request_return_type> do_query(service::storage_proxy& pr
 }

 static dht::token token_for_segment(int segment, int total_segments) {
-    SCYLLA_ASSERT(total_segments > 1 && segment >= 0 && segment < total_segments);
+    throwing_assert(total_segments > 1 && segment >= 0 && segment < total_segments);
    uint64_t delta = std::numeric_limits<uint64_t>::max() / total_segments;
    return dht::token::from_int64(std::numeric_limits<int64_t>::min() + delta * segment);
 }
@@ -5896,6 +6000,11 @@ future<executor::request_return_type> executor::list_tables(client_state& client
    _stats.api_operations.list_tables++;
    elogger.trace("Listing tables {}", request);

+    co_await utils::get_local_injector().inject("alternator_list_tables", [] (auto& handler) -> future<> {
+        handler.set("waiting", true);
+        co_await handler.wait_for_message(std::chrono::steady_clock::now() + std::chrono::minutes{5});
+    });
+
    rjson::value* exclusive_start_json = rjson::find(request, "ExclusiveStartTableName");
    rjson::value* limit_json = rjson::find(request, "Limit");
    std::string exclusive_start = exclusive_start_json ? rjson::to_string(*exclusive_start_json) : "";
@@ -6087,9 +6196,10 @@ future<> executor::start() {
 }

 future<> executor::stop() {
+    co_await _describe_table_info_manager->stop();
    // disconnect from the value source, but keep the value unchanged.
    s_default_timeout_in_ms = utils::updateable_value<uint32_t>{s_default_timeout_in_ms()};
-    return _parsed_expression_cache->stop();
+    co_await _parsed_expression_cache->stop();
 }

 } // namespace alternator
--- a/alternator/executor.hh
+++ b/alternator/executor.hh
@@ -17,11 +17,13 @@
 #include "service/client_state.hh"
 #include "service_permit.hh"
 #include "db/timeout_clock.hh"
+#include "db/config.hh"

 #include "alternator/error.hh"
 #include "stats.hh"
 #include "utils/rjson.hh"
 #include "utils/updateable_value.hh"
+#include "utils/simple_value_with_expiry.hh"

 #include "tracing/trace_state.hh"

@@ -41,6 +43,7 @@ namespace cql3::selection {
 namespace service {
    class storage_proxy;
    class cas_shard;
+    class storage_service;
 }

 namespace cdc {
@@ -57,6 +60,7 @@ class schema_builder;

 namespace alternator {

+enum class table_status;
 class rmw_operation;
 class put_or_delete_item;

@@ -136,6 +140,7 @@ class expression_cache;

 class executor : public peering_sharded_service<executor> {
    gms::gossiper& _gossiper;
+    service::storage_service& _ss;
    service::storage_proxy& _proxy;
    service::migration_manager& _mm;
    db::system_distributed_keyspace& _sdks;
@@ -148,6 +153,11 @@ class executor : public peering_sharded_service<executor> {

    std::unique_ptr<parsed::expression_cache> _parsed_expression_cache;

+    struct describe_table_info_manager;
+    std::unique_ptr<describe_table_info_manager> _describe_table_info_manager;
+
+    future<> cache_newly_calculated_size_on_all_shards(schema_ptr schema, std::uint64_t size_in_bytes, std::chrono::nanoseconds ttl);
+    future<> fill_table_size(rjson::value &table_description, schema_ptr schema, bool deleting);
 public:
    using client_state = service::client_state;
    // request_return_type is the return type of the executor methods, which
@@ -173,6 +183,7 @@ public:

    executor(gms::gossiper& gossiper,
             service::storage_proxy& proxy,
+             service::storage_service& ss,
             service::migration_manager& mm,
             db::system_distributed_keyspace& sdks,
             cdc::metadata& cdc_metadata,
@@ -220,6 +231,8 @@ private:
    friend class rmw_operation;

    static void describe_key_schema(rjson::value& parent, const schema&, std::unordered_map<std::string,std::string> * = nullptr, const std::map<sstring, sstring> *tags = nullptr);
+    future<rjson::value> fill_table_description(schema_ptr schema, table_status tbl_status, service::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit);
+    future<executor::request_return_type> create_table_on_shard0(service::client_state&& client_state, tracing::trace_state_ptr trace_state, rjson::value request, bool enforce_authorization, bool warn_authorization, const db::tablets_mode_t::mode tablets_mode);

    future<> do_batch_write(
        std::vector<std::pair<schema_ptr, put_or_delete_item>> mutation_builders,
--- a/alternator/expressions_types.hh
+++ b/alternator/expressions_types.hh
@@ -50,7 +50,7 @@ public:
        _operators.emplace_back(i);
        check_depth_limit();
    }
-    void add_dot(std::string(name)) {
+    void add_dot(std::string name) {
        _operators.emplace_back(std::move(name));
        check_depth_limit();
    }
@@ -85,7 +85,7 @@ struct constant {
    }
 };

-// "value" is is a value used in the right hand side of an assignment
+// "value" is a value used in the right hand side of an assignment
 // expression, "SET a = ...". It can be a constant (a reference to a value
 // included in the request, e.g., ":val"), a path to an attribute from the
 // existing item (e.g., "a.b[3].c"), or a function of other such values.
@@ -205,7 +205,7 @@ public:
 // The supported primitive conditions are:
 // 1. Binary operators - v1 OP v2, where OP is =, <>, <, <=, >, or >= and
 //    v1 and v2 are values - from the item (an attribute path), the query
-//    (a ":val" reference), or a function of the the above (only the size()
+//    (a ":val" reference), or a function of the above (only the size()
 //    function is supported).
 // 2. Ternary operator - v1 BETWEEN v2 and v3 (means v1 >= v2 AND v1 <= v3).
 // 3. N-ary operator - v1 IN ( v2, v3, ... )
--- a/alternator/http_compression.cc
+++ b/alternator/http_compression.cc
@@ -0,0 +1,301 @@
+/*
+ * Copyright 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#include "alternator/http_compression.hh"
+#include "alternator/server.hh"
+#include <seastar/coroutine/maybe_yield.hh>
+#include <zlib.h>
+
+static logging::logger slogger("alternator-http-compression");
+
+namespace alternator {
+
+
+static constexpr size_t compressed_buffer_size = 1024;
+class zlib_compressor {
+    z_stream _zs;
+    temporary_buffer<char> _output_buf;
+    noncopyable_function<future<>(temporary_buffer<char>&&)> _write_func;
+public:
+    zlib_compressor(bool gzip, int compression_level, noncopyable_function<future<>(temporary_buffer<char>&&)> write_func)
+     : _write_func(std::move(write_func)) {
+        memset(&_zs, 0, sizeof(_zs));
+        if (deflateInit2(&_zs, std::clamp(compression_level, Z_NO_COMPRESSION, Z_BEST_COMPRESSION), Z_DEFLATED,
+                (gzip ? 16 : 0) + MAX_WBITS, 8, Z_DEFAULT_STRATEGY) != Z_OK) {
+            // Should only happen if memory allocation fails
+            throw std::bad_alloc();
+        }
+    }
+    ~zlib_compressor() {
+        deflateEnd(&_zs);
+    }
+    future<> close() {
+        return compress(nullptr, 0, true);
+    }
+
+    future<> compress(const char* buf, size_t len, bool is_last_chunk = false) {
+        _zs.next_in = reinterpret_cast<unsigned char*>(const_cast<char*>(buf));
+        _zs.avail_in = (uInt) len;
+        int mode = is_last_chunk ? Z_FINISH : Z_NO_FLUSH;
+        while(_zs.avail_in > 0 || is_last_chunk) {
+            co_await coroutine::maybe_yield();
+            if (_output_buf.empty()) {
+                if (is_last_chunk) {
+                    uint32_t max_buffer_size = 0;
+                    deflatePending(&_zs, &max_buffer_size, nullptr);
+                    max_buffer_size += deflateBound(&_zs, _zs.avail_in) + 1;
+                    _output_buf = temporary_buffer<char>(std::min(compressed_buffer_size, (size_t) max_buffer_size));
+                } else {
+                    _output_buf = temporary_buffer<char>(compressed_buffer_size);
+                }
+                _zs.next_out = reinterpret_cast<unsigned char*>(_output_buf.get_write());
+                _zs.avail_out = compressed_buffer_size;
+            }
+            int e = deflate(&_zs, mode);
+            if (e < Z_OK) {
+                throw api_error::internal("Error during compression of response body");
+            }
+            if (e == Z_STREAM_END || _zs.avail_out < compressed_buffer_size / 4) {
+                _output_buf.trim(compressed_buffer_size - _zs.avail_out);
+                co_await _write_func(std::move(_output_buf));
+                if (e == Z_STREAM_END) {
+                    break;
+                }
+            }
+        }
+    }
+};
+
+// Helper string_view functions for parsing Accept-Encoding header
+struct case_insensitive_cmp_sv {
+    bool operator()(std::string_view s1, std::string_view s2) const {
+        return std::equal(s1.begin(), s1.end(), s2.begin(), s2.end(),
+            [](char a, char b) { return ::tolower(a) == ::tolower(b); });
+    }
+};
+static inline std::string_view trim_left(std::string_view sv) {
+    while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.front())))
+        sv.remove_prefix(1);
+    return sv;
+}
+static inline std::string_view trim_right(std::string_view sv) {
+    while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back())))
+        sv.remove_suffix(1);
+    return sv;
+}
+static inline std::string_view trim(std::string_view sv) {
+    return trim_left(trim_right(sv));
+}
+
+inline std::vector<std::string_view> split(std::string_view text, char separator) {
+    std::vector<std::string_view> tokens;
+    if (text == "") {
+        return tokens;
+    }
+
+    while (true) {
+        auto pos = text.find_first_of(separator);
+        if (pos != std::string_view::npos) {
+            tokens.emplace_back(text.data(), pos);
+            text.remove_prefix(pos + 1);
+        } else {
+            tokens.emplace_back(text);
+            break;
+        }
+    }
+    return tokens;
+}
+
+constexpr response_compressor::compression_type response_compressor::get_compression_type(std::string_view encoding) {
+    for (size_t i = 0; i < static_cast<size_t>(compression_type::count); ++i) {
+        if (case_insensitive_cmp_sv{}(encoding, compression_names[i])) {
+            return static_cast<compression_type>(i);
+        }
+    }
+    return compression_type::unknown;
+}
+
+response_compressor::compression_type response_compressor::find_compression(std::string_view accept_encoding, size_t response_size) {
+    std::optional<float> ct_q[static_cast<size_t>(compression_type::count)];
+    ct_q[static_cast<size_t>(compression_type::none)] = std::numeric_limits<float>::min(); // enabled, but lowest priority
+    compression_type selected_ct = compression_type::none;
+
+    std::vector<std::string_view> entries = split(accept_encoding, ',');
+    for (auto& e : entries) {
+        std::vector<std::string_view> params = split(e, ';');
+        if (params.size() == 0) {
+            continue;
+        }
+        compression_type ct = get_compression_type(trim(params[0]));
+        if (ct == compression_type::unknown) {
+            continue; // ignore unknown encoding types
+        }
+        if (ct_q[static_cast<size_t>(ct)].has_value() && ct_q[static_cast<size_t>(ct)] != 0.0f) {
+            continue; // already processed this encoding
+        }
+        if (response_size < _threshold[static_cast<size_t>(ct)]) {
+            continue; // below threshold treat as unknown
+        }
+        for (size_t i = 1; i < params.size(); ++i) { // find "q=" parameter
+            auto pos = params[i].find("q=");
+            if (pos == std::string_view::npos) {
+                continue;
+            }
+            std::string_view param = params[i].substr(pos + 2);
+            param = trim(param);
+            // parse quality value
+            float q_value = 1.0f;
+            auto [ptr, ec] = std::from_chars(param.data(), param.data() + param.size(), q_value);
+            if (ec != std::errc() || ptr != param.data() + param.size()) {
+                continue;
+            }
+            if (q_value < 0.0) {
+                q_value = 0.0;
+            } else if (q_value > 1.0) {
+                q_value = 1.0;
+            }
+            ct_q[static_cast<size_t>(ct)] = q_value;
+            break; // we parsed quality value
+        }
+        if (!ct_q[static_cast<size_t>(ct)].has_value()) {
+            ct_q[static_cast<size_t>(ct)] = 1.0f; // default quality value
+        }
+        // keep the highest encoding (in the order, unless 'any')
+        if (selected_ct == compression_type::any) {
+            if (ct_q[static_cast<size_t>(ct)] >= ct_q[static_cast<size_t>(selected_ct)]) {
+                selected_ct = ct;
+            }
+        } else {
+            if (ct_q[static_cast<size_t>(ct)] > ct_q[static_cast<size_t>(selected_ct)]) {
+                selected_ct = ct;
+            }
+        }
+    }
+    if (selected_ct == compression_type::any) {
+        // select any not mentioned or highest quality
+        selected_ct = compression_type::none;
+        for (size_t i = 0; i < static_cast<size_t>(compression_type::compressions_count); ++i) {
+            if (!ct_q[i].has_value()) {
+                return static_cast<compression_type>(i);
+            }
+            if (ct_q[i] > ct_q[static_cast<size_t>(selected_ct)]) {
+                selected_ct = static_cast<compression_type>(i);
+            }
+        }
+    }
+    return selected_ct;
+}
+
+static future<chunked_content> compress(response_compressor::compression_type ct, const db::config& cfg, std::string str) {
+    chunked_content compressed;
+    auto write = [&compressed](temporary_buffer<char>&& buf) -> future<> {
+        compressed.push_back(std::move(buf));
+        return make_ready_future<>();
+    };
+    zlib_compressor compressor(ct != response_compressor::compression_type::deflate,
+        cfg.alternator_response_gzip_compression_level(), std::move(write));
+    co_await compressor.compress(str.data(), str.size(), true);
+    co_return compressed;
+}
+
+static sstring flatten(chunked_content&& cc) {
+    size_t total_size = 0;
+    for (const auto& chunk : cc) {
+        total_size += chunk.size();
+    }
+    sstring result = sstring{ sstring::initialized_later{}, total_size };
+    size_t offset = 0;
+    for (const auto& chunk : cc) {
+        std::copy(chunk.begin(), chunk.end(), result.begin() + offset);
+        offset += chunk.size();
+    }
+    return result;
+}
+
+future<std::unique_ptr<http::reply>> response_compressor::generate_reply(std::unique_ptr<http::reply> rep, sstring accept_encoding, const char* content_type, std::string&& response_body) {
+    response_compressor::compression_type ct = find_compression(accept_encoding, response_body.size());
+    if (ct != response_compressor::compression_type::none) {
+        rep->add_header("Content-Encoding", get_encoding_name(ct));
+        rep->set_content_type(content_type);
+        return compress(ct, cfg, std::move(response_body)).then([rep = std::move(rep)] (chunked_content compressed) mutable {
+            rep->_content = flatten(std::move(compressed));
+            return make_ready_future<std::unique_ptr<http::reply>>(std::move(rep));
+        });
+    } else {
+        // Note that despite the move, there is a copy here -
+        // as str is std::string and rep->_content is sstring.
+        rep->_content = std::move(response_body);
+        rep->set_content_type(content_type);
+    }
+    return make_ready_future<std::unique_ptr<http::reply>>(std::move(rep));
+}
+
+template<typename Compressor>
+class compressed_data_sink_impl : public data_sink_impl {
+    output_stream<char> _out;
+    Compressor _compressor;
+public:
+    template<typename... Args>
+    compressed_data_sink_impl(output_stream<char>&& out, Args&&... args)
+     : _out(std::move(out)), _compressor(std::forward<Args>(args)..., [this](temporary_buffer<char>&& buf) {
+        return _out.write(std::move(buf));
+    }) { }
+
+    future<> put(std::span<temporary_buffer<char>> data) override {
+        return data_sink_impl::fallback_put(data, [this] (temporary_buffer<char>&& buf) {
+            return do_put(std::move(buf));
+        });
+    }
+
+private:
+    future<> do_put(temporary_buffer<char> buf) {
+        co_return co_await _compressor.compress(buf.get(), buf.size());
+
+    }
+    future<> close() override {
+        return _compressor.close().then([this] {
+            return _out.close();
+        });
+    }
+};
+
+executor::body_writer compress(response_compressor::compression_type ct, const db::config& cfg, executor::body_writer&& bw) {
+    return [bw = std::move(bw), ct, level = cfg.alternator_response_gzip_compression_level()](output_stream<char>&& out) mutable -> future<> {
+        output_stream_options opts;
+        opts.trim_to_size = true;
+        std::unique_ptr<data_sink_impl> data_sink_impl;
+        switch (ct) {
+            case response_compressor::compression_type::gzip:
+                data_sink_impl = std::make_unique<compressed_data_sink_impl<zlib_compressor>>(std::move(out), true, level);
+                break;
+            case response_compressor::compression_type::deflate:
+                data_sink_impl = std::make_unique<compressed_data_sink_impl<zlib_compressor>>(std::move(out), false, level);
+                break;
+            case response_compressor::compression_type::none:
+            case response_compressor::compression_type::any:
+            case response_compressor::compression_type::unknown:
+                on_internal_error(slogger,"Compression not selected");
+            default:
+                on_internal_error(slogger, "Unsupported compression type for data sink");
+        }
+        return bw(output_stream<char>(data_sink(std::move(data_sink_impl)), compressed_buffer_size, opts));
+    };
+}
+
+future<std::unique_ptr<http::reply>> response_compressor::generate_reply(std::unique_ptr<http::reply> rep, sstring accept_encoding, const char* content_type, executor::body_writer&& body_writer) {
+    response_compressor::compression_type ct = find_compression(accept_encoding, std::numeric_limits<size_t>::max());
+    if (ct != response_compressor::compression_type::none) {
+        rep->add_header("Content-Encoding", get_encoding_name(ct));
+        rep->write_body(content_type, compress(ct, cfg, std::move(body_writer)));
+    } else {
+        rep->write_body(content_type, std::move(body_writer));
+    }
+    return make_ready_future<std::unique_ptr<http::reply>>(std::move(rep));
+}
+
+} // namespace alternator
--- a/alternator/http_compression.hh
+++ b/alternator/http_compression.hh
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#pragma once
+
+#include "alternator/executor.hh"
+#include <seastar/http/httpd.hh>
+#include "db/config.hh"
+
+namespace alternator {
+
+class response_compressor {
+public:
+    enum class compression_type {
+        gzip,
+        deflate,
+        compressions_count,
+        any = compressions_count,
+        none,
+        count,
+        unknown = count
+    };
+    static constexpr std::string_view compression_names[] = {
+        "gzip",
+        "deflate",
+        "*",
+        "identity"
+    };
+
+    static sstring get_encoding_name(compression_type ct) {
+        return sstring(compression_names[static_cast<size_t>(ct)]);
+    }
+    static constexpr compression_type get_compression_type(std::string_view encoding);
+
+    sstring get_accepted_encoding(const http::request& req) {
+        if (get_threshold() == 0) {
+            return "";
+        }
+        return req.get_header("Accept-Encoding");
+    }
+    compression_type find_compression(std::string_view accept_encoding, size_t response_size);
+
+    response_compressor(const db::config& cfg)
+        : cfg(cfg)
+        ,_gzip_level_observer(
+            cfg.alternator_response_gzip_compression_level.observe([this](int v) {
+                    update_threshold();
+                }))
+        ,_gzip_threshold_observer(
+            cfg.alternator_response_compression_threshold_in_bytes.observe([this](uint32_t v) {
+                    update_threshold();
+                }))
+    {
+        update_threshold();
+    }
+    response_compressor(const response_compressor& rhs) : response_compressor(rhs.cfg) {}
+
+private:
+    const db::config& cfg;
+    utils::observable<int>::observer _gzip_level_observer;
+    utils::observable<uint32_t>::observer _gzip_threshold_observer;
+    uint32_t _threshold[static_cast<size_t>(compression_type::count)];
+
+    size_t get_threshold() { return _threshold[static_cast<size_t>(compression_type::any)]; }
+    void update_threshold() {
+        _threshold[static_cast<size_t>(compression_type::none)] = std::numeric_limits<uint32_t>::max();
+        _threshold[static_cast<size_t>(compression_type::any)] = std::numeric_limits<uint32_t>::max();
+        uint32_t gzip = cfg.alternator_response_gzip_compression_level() <= 0 ? std::numeric_limits<uint32_t>::max()
+            : cfg.alternator_response_compression_threshold_in_bytes();
+        _threshold[static_cast<size_t>(compression_type::gzip)] = gzip;
+        _threshold[static_cast<size_t>(compression_type::deflate)] = gzip;
+        for (size_t i = 0; i < static_cast<size_t>(compression_type::compressions_count); ++i) {
+            if (_threshold[i] < _threshold[static_cast<size_t>(compression_type::any)]) {
+                _threshold[static_cast<size_t>(compression_type::any)] = _threshold[i];
+            }
+        }
+    }
+
+public:
+    future<std::unique_ptr<http::reply>> generate_reply(std::unique_ptr<http::reply> rep,
+         sstring accept_encoding, const char* content_type, std::string&& response_body);
+    future<std::unique_ptr<http::reply>> generate_reply(std::unique_ptr<http::reply> rep,
+         sstring accept_encoding, const char* content_type, executor::body_writer&& body_writer);
+};
+
+}
--- a/alternator/serialization.hh
+++ b/alternator/serialization.hh
@@ -55,7 +55,7 @@ partition_key pk_from_json(const rjson::value& item, schema_ptr schema);
 clustering_key ck_from_json(const rjson::value& item, schema_ptr schema);
 position_in_partition pos_from_json(const rjson::value& item, schema_ptr schema);

-// If v encodes a number (i.e., it is a {"N": [...]}, returns an object representing it.  Otherwise,
+// If v encodes a number (i.e., it is a {"N": [...]}), returns an object representing it.  Otherwise,
 // raises ValidationException with diagnostic.
 big_decimal unwrap_number(const rjson::value& v, std::string_view diagnostic);

--- a/alternator/server.cc
+++ b/alternator/server.cc
@@ -34,6 +34,7 @@
 #include "client_data.hh"
 #include "utils/updateable_value.hh"
 #include <zlib.h>
+#include "alternator/http_compression.hh"

 static logging::logger slogger("alternator-server");

@@ -111,9 +112,12 @@ class api_handler : public handler_base {
    // type applies to all replies, both success and error.
    static constexpr const char* REPLY_CONTENT_TYPE = "application/x-amz-json-1.0";
 public:
-    api_handler(const std::function<future<executor::request_return_type>(std::unique_ptr<request> req)>& _handle) : _f_handle(
+    api_handler(const std::function<future<executor::request_return_type>(std::unique_ptr<request> req)>& _handle,
+                const db::config& config) : _response_compressor(config), _f_handle(
         [this, _handle](std::unique_ptr<request> req, std::unique_ptr<reply> rep) {
-         return seastar::futurize_invoke(_handle, std::move(req)).then_wrapped([this, rep = std::move(rep)](future<executor::request_return_type> resf) mutable {
+         sstring accept_encoding = _response_compressor.get_accepted_encoding(*req);
+         return seastar::futurize_invoke(_handle, std::move(req)).then_wrapped(
+            [this, rep = std::move(rep), accept_encoding=std::move(accept_encoding)](future<executor::request_return_type> resf) mutable {
             if (resf.failed()) {
                 // Exceptions of type api_error are wrapped as JSON and
                 // returned to the client as expected. Other types of
@@ -133,22 +137,20 @@ public:
                 return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
             }
             auto res = resf.get();
-             std::visit(overloaded_functor {
+             return std::visit(overloaded_functor {
                [&] (std::string&& str) {
-                    // Note that despite the move, there is a copy here -
-                    // as str is std::string and rep->_content is sstring.
-                    rep->_content = std::move(str);
-                    rep->set_content_type(REPLY_CONTENT_TYPE);
+                    return _response_compressor.generate_reply(std::move(rep), std::move(accept_encoding),
+                                                               REPLY_CONTENT_TYPE, std::move(str));
                },
                [&] (executor::body_writer&& body_writer) {
-                    rep->write_body(REPLY_CONTENT_TYPE, std::move(body_writer));
+                    return _response_compressor.generate_reply(std::move(rep), std::move(accept_encoding),
+                                                               REPLY_CONTENT_TYPE, std::move(body_writer));
                },
                [&] (const api_error& err) {
                    generate_error_reply(*rep, err);
+                    return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
                }
             }, std::move(res));
-
-             return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
         });
    }) { }

@@ -177,6 +179,7 @@ protected:
        slogger.trace("api_handler error case: {}", rep._content);
    }

+    response_compressor _response_compressor;
    future_handler_function _f_handle;
 };

@@ -371,13 +374,40 @@ future<std::string> server::verify_signature(const request& req, const chunked_c
    for (const auto& header : signed_headers) {
        signed_headers_map.emplace(header, std::string_view());
    }
+    std::vector<std::string> modified_values;
    for (auto& header : req._headers) {
        std::string header_str;
        header_str.resize(header.first.size());
        std::transform(header.first.begin(), header.first.end(), header_str.begin(), ::tolower);
        auto it = signed_headers_map.find(header_str);
        if (it != signed_headers_map.end()) {
-            it->second = std::string_view(header.second);
+            // replace multiple spaces in the header value header.second with
+            // a single space, as required by AWS SigV4 header canonization.
+            // If we modify the value, we need to save it in modified_values
+            // to keep it alive.
+            std::string value;
+            value.reserve(header.second.size());
+            bool prev_space = false;
+            bool modified = false;
+            for (char ch : header.second) {
+                if (ch == ' ') {
+                    if (!prev_space) {
+                        value += ch;
+                        prev_space = true;
+                    } else {
+                        modified = true; // skip a space
+                    }
+                } else {
+                    value += ch;
+                    prev_space = false;
+                }
+            }
+            if (modified) {
+                modified_values.emplace_back(std::move(value));
+                it->second = std::string_view(modified_values.back());
+            } else {
+                it->second = std::string_view(header.second);
+            }
        }
    }

@@ -390,6 +420,7 @@ future<std::string> server::verify_signature(const request& req, const chunked_c
                                                    datestamp = std::move(datestamp),
                                                    signed_headers_str = std::move(signed_headers_str),
                                                    signed_headers_map = std::move(signed_headers_map),
+                                                    modified_values = std::move(modified_values),
                                                    region = std::move(region),
                                                    service = std::move(service),
                                                    user_signature = std::move(user_signature)] (future<key_cache::value_ptr> key_ptr_fut) {
@@ -560,11 +591,11 @@ read_entire_stream(input_stream<char>& inp, size_t length_limit) {
 class safe_gzip_zstream {
    z_stream _zs;
 public:
-    safe_gzip_zstream() {
+    // If gzip is true, decode a gzip header (for "Content-Encoding: gzip").
+    // Otherwise, a zlib header (for "Content-Encoding: deflate").
+    safe_gzip_zstream(bool gzip = true) {
        memset(&_zs, 0, sizeof(_zs));
-        // The strange 16 + WMAX_BITS tells zlib to expect and decode
-        // a gzip header, not a zlib header.
-        if (inflateInit2(&_zs, 16 + MAX_WBITS) != Z_OK) {
+        if (inflateInit2(&_zs, gzip ? 16 + MAX_WBITS : MAX_WBITS) != Z_OK) {
            // Should only happen if memory allocation fails
            throw std::bad_alloc();
        }
@@ -583,19 +614,21 @@ public:
    }
 };

-// ungzip() takes a chunked_content with a gzip-compressed request body,
-// uncompresses it, and returns the uncompressed content as a chunked_content.
+// ungzip() takes a chunked_content of a compressed request body, and returns
+// the uncompressed content as a chunked_content. If gzip is true, we expect
+// gzip header (for "Content-Encoding: gzip"), if gzip is false, we expect a
+// zlib header (for "Content-Encoding: deflate").
 // If the uncompressed content exceeds length_limit, an error is thrown.
 static future<chunked_content>
-ungzip(chunked_content&& compressed_body, size_t length_limit) {
+ungzip(chunked_content&& compressed_body, size_t length_limit, bool gzip = true) {
    chunked_content ret;
    // output_buf can be any size - when uncompressing input_buf, it doesn't
    // need to fit in a single output_buf, we'll use multiple output_buf for
    // a single input_buf if needed.
    constexpr size_t OUTPUT_BUF_SIZE = 4096;
    temporary_buffer<char> output_buf;
-    safe_gzip_zstream strm;
-    bool complete_stream = false; // empty input is not a valid gzip
+    safe_gzip_zstream strm(gzip);
+    bool complete_stream = false; // empty input is not a valid gzip/deflate
    size_t total_out_bytes = 0;
    for (const temporary_buffer<char>& input_buf : compressed_body) {
        if (input_buf.empty()) {
@@ -677,7 +710,7 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
        ++_executor._stats.requests_blocked_memory;
    }
    auto units = co_await std::move(units_fut);
-    SCYLLA_ASSERT(req->content_stream);
+    throwing_assert(req->content_stream);
    chunked_content content = co_await read_entire_stream(*req->content_stream, request_content_length_limit);
    // If the request had no Content-Length, we reserved too many units
    // so need to return some
@@ -698,6 +731,8 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
    sstring content_encoding = req->get_header("Content-Encoding");
    if (content_encoding == "gzip") {
        content = co_await ungzip(std::move(content), request_content_length_limit);
+    } else if (content_encoding == "deflate") {
+        content = co_await ungzip(std::move(content), request_content_length_limit, false);
    } else if (!content_encoding.empty()) {
        // DynamoDB returns a 500 error for unsupported Content-Encoding.
        // I'm not sure if this is the best error code, but let's do it too.
@@ -758,7 +793,7 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
 void server::set_routes(routes& r) {
    api_handler* req_handler = new api_handler([this] (std::unique_ptr<request> req) mutable {
        return handle_api_request(std::move(req));
-    });
+    }, _proxy.data_dictionary().get_config());

    r.put(operation_type::POST, "/", req_handler);
    r.put(operation_type::GET, "/", new health_handler(_pending_requests));
@@ -869,7 +904,9 @@ server::server(executor& exec, service::storage_proxy& proxy, gms::gossiper& gos
    } {
 }

-future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds,
+future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port,
+        std::optional<uint16_t> port_proxy_protocol, std::optional<uint16_t> https_port_proxy_protocol,
+        std::optional<tls::credentials_builder> creds,
        utils::updateable_value<bool> enforce_authorization, utils::updateable_value<bool> warn_authorization, utils::updateable_value<uint64_t> max_users_query_size_in_trace_output,
        semaphore* memory_limiter, utils::updateable_value<uint32_t> max_concurrent_requests) {
    _memory_limiter = memory_limiter;
@@ -877,20 +914,28 @@ future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std:
    _warn_authorization = std::move(warn_authorization);
    _max_concurrent_requests = std::move(max_concurrent_requests);
    _max_users_query_size_in_trace_output = std::move(max_users_query_size_in_trace_output);
-    if (!port && !https_port) {
+    if (!port && !https_port && !port_proxy_protocol && !https_port_proxy_protocol) {
        return make_exception_future<>(std::runtime_error("Either regular port or TLS port"
                " must be specified in order to init an alternator HTTP server instance"));
    }
-    return seastar::async([this, addr, port, https_port, creds] {
+    return seastar::async([this, addr, port, https_port, port_proxy_protocol, https_port_proxy_protocol, creds] {
        _executor.start().get();

-        if (port) {
+        if (port || port_proxy_protocol) {
            set_routes(_http_server._routes);
            _http_server.set_content_streaming(true);
-            _http_server.listen(socket_address{addr, *port}).get();
+            if (port) {
+                _http_server.listen(socket_address{addr, *port}).get();
+            }
+            if (port_proxy_protocol) {
+                listen_options lo;
+                lo.reuse_address = true;
+                lo.proxy_protocol = true;
+                _http_server.listen(socket_address{addr, *port_proxy_protocol}, lo).get();
+            }
            _enabled_servers.push_back(std::ref(_http_server));
        }
-        if (https_port) {
+        if (https_port || https_port_proxy_protocol) {
            set_routes(_https_server._routes);
            _https_server.set_content_streaming(true);

@@ -910,7 +955,15 @@ future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std:
            } else {
                _credentials = creds->build_server_credentials();
            }
-            _https_server.listen(socket_address{addr, *https_port}, _credentials).get();
+            if (https_port) {
+                _https_server.listen(socket_address{addr, *https_port}, _credentials).get();
+            }
+            if (https_port_proxy_protocol) {
+                listen_options lo;
+                lo.reuse_address = true;
+                lo.proxy_protocol = true;
+                _https_server.listen(socket_address{addr, *https_port_proxy_protocol}, lo, _credentials).get();
+            }
            _enabled_servers.push_back(std::ref(_https_server));
        }
    });
@@ -983,9 +1036,8 @@ client_data server::ongoing_request::make_client_data() const {
    // and keep "driver_version" unset.
    cd.driver_name = _user_agent;
    // Leave "protocol_version" unset, it has no meaning in Alternator.
-    // Leave "hostname", "ssl_protocol" and "ssl_cipher_suite" unset.
-    // As reported in issue #9216, we never set these fields in CQL
-    // either (see cql_server::connection::make_client_data()).
+    // Leave "hostname", "ssl_protocol" and "ssl_cipher_suite" unset for Alternator.
+    // Note: CQL sets ssl_protocol and ssl_cipher_suite via generic_server::connection base class.
    return cd;
 }

--- a/alternator/server.hh
+++ b/alternator/server.hh
@@ -100,7 +100,9 @@ class server : public peering_sharded_service<server> {
 public:
    server(executor& executor, service::storage_proxy& proxy, gms::gossiper& gossiper, auth::service& service, qos::service_level_controller& sl_controller);

-    future<> init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds,
+    future<> init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port,
+            std::optional<uint16_t> port_proxy_protocol, std::optional<uint16_t> https_port_proxy_protocol,
+            std::optional<tls::credentials_builder> creds,
            utils::updateable_value<bool> enforce_authorization, utils::updateable_value<bool> warn_authorization, utils::updateable_value<uint64_t> max_users_query_size_in_trace_output,
            semaphore* memory_limiter, utils::updateable_value<uint32_t> max_concurrent_requests);
    future<> stop();
--- a/alternator/streams.cc
+++ b/alternator/streams.cc
@@ -491,7 +491,7 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl

    if (!opts.enabled()) {
        rjson::add(ret, "StreamDescription", std::move(stream_desc));
-        return make_ready_future<executor::request_return_type>(rjson::print(std::move(ret)));
+        co_return rjson::print(std::move(ret));
    }

    // TODO: label
@@ -502,123 +502,121 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
    // filter out cdc generations older than the table or now() - cdc::ttl (typically dynamodb_streams_max_window - 24h)
    auto low_ts = std::max(as_timepoint(schema->id()), db_clock::now() - ttl);

-    return _sdks.cdc_get_versioned_streams(low_ts, { normal_token_owners }).then([db, shard_start, limit, ret = std::move(ret), stream_desc = std::move(stream_desc)] (std::map<db_clock::time_point, cdc::streams_version> topologies) mutable {
+    std::map<db_clock::time_point, cdc::streams_version> topologies = co_await _sdks.cdc_get_versioned_streams(low_ts, { normal_token_owners });
+    auto e = topologies.end();
+    auto prev = e;
+    auto shards = rjson::empty_array();

-        auto e = topologies.end();
-        auto prev = e;
-        auto shards = rjson::empty_array();
+    std::optional<shard_id> last;

-        std::optional<shard_id> last;
+    auto i = topologies.begin();
+    // if we're a paged query, skip to the generation where we left of.
+    if (shard_start) {
+        i = topologies.find(shard_start->time);
+    }

-        auto i = topologies.begin();
-        // if we're a paged query, skip to the generation where we left of.
-        if (shard_start) {
-            i = topologies.find(shard_start->time);
-        }
+    // for parent-child stuff we need id:s to be sorted by token
+    // (see explanation above) since we want to find closest
+    // token boundary when determining parent.
+    // #7346 - we processed and searched children/parents in
+    // stored order, which is not necessarily token order,
+    // so the finding of "closest" token boundary (using upper bound)
+    // could give somewhat weird results.
+    static auto token_cmp = [](const cdc::stream_id& id1, const cdc::stream_id& id2) {
+        return id1.token() < id2.token();
+    };

-        // for parent-child stuff we need id:s to be sorted by token
-        // (see explanation above) since we want to find closest
-        // token boundary when determining parent.
-        // #7346 - we processed and searched children/parents in
-        // stored order, which is not necessarily token order,
-        // so the finding of "closest" token boundary (using upper bound)
-        // could give somewhat weird results.
-        static auto token_cmp = [](const cdc::stream_id& id1, const cdc::stream_id& id2) {
-            return id1.token() < id2.token();
-        };
+    // #7409 - shards must be returned in lexicographical order,
+    // normal bytes compare is string_traits<int8_t>::compare.
+    // thus bytes 0x8000 is less than 0x0000. By doing unsigned
+    // compare instead we inadvertently will sort in string lexical.
+    static auto id_cmp = [](const cdc::stream_id& id1, const cdc::stream_id& id2) {
+        return compare_unsigned(id1.to_bytes(), id2.to_bytes()) < 0;
+    };
+
+    // need a prev even if we are skipping stuff
+    if (i != topologies.begin()) {
+        prev = std::prev(i);
+    }
+
+    for (; limit > 0 && i != e; prev = i, ++i) {
+        auto& [ts, sv] = *i;
+
+        last = std::nullopt;
+
+        auto lo = sv.streams.begin();
+        auto end = sv.streams.end();

        // #7409 - shards must be returned in lexicographical order,
-        // normal bytes compare is string_traits<int8_t>::compare.
-        // thus bytes 0x8000 is less than 0x0000. By doing unsigned
-        // compare instead we inadvertently will sort in string lexical.
-        static auto id_cmp = [](const cdc::stream_id& id1, const cdc::stream_id& id2) {
-            return compare_unsigned(id1.to_bytes(), id2.to_bytes()) < 0;
-        };
+        std::sort(lo, end, id_cmp);

-        // need a prev even if we are skipping stuff
-        if (i != topologies.begin()) {
-            prev = std::prev(i);
+        if (shard_start) {
+            // find next shard position
+            lo = std::upper_bound(lo, end, shard_start->id, id_cmp);
+            shard_start = std::nullopt;
        }

-        for (; limit > 0 && i != e; prev = i, ++i) {
-            auto& [ts, sv] = *i;
+        if (lo != end && prev != e) {
+            // We want older stuff sorted in token order so we can find matching
+            // token range when determining parent shard.
+            std::stable_sort(prev->second.streams.begin(), prev->second.streams.end(), token_cmp);
+        }
+
+        auto expired = [&]() -> std::optional<db_clock::time_point> {
+            auto j = std::next(i);
+            if (j == e) {
+                return std::nullopt;
+            }
+            // add this so we sort of match potential 
+            // sequence numbers in get_records result.
+            return j->first + confidence_interval(db);
+        }();
+
+        while (lo != end) {
+            auto& id = *lo++;
+
+            auto shard = rjson::empty_object();
+
+            if (prev != e) {
+                auto& pids = prev->second.streams;
+                auto pid = std::upper_bound(pids.begin(), pids.end(), id.token(), [](const dht::token& t, const cdc::stream_id& id) {
+                    return t < id.token();
+                });
+                if (pid != pids.begin()) {
+                    pid = std::prev(pid);
+                }
+                if (pid != pids.end()) {
+                    rjson::add(shard, "ParentShardId", shard_id(prev->first, *pid));
+                }
+            }
+
+            last.emplace(ts, id);
+            rjson::add(shard, "ShardId", *last);
+            auto range = rjson::empty_object();
+            rjson::add(range, "StartingSequenceNumber", sequence_number(utils::UUID_gen::min_time_UUID(ts.time_since_epoch())));
+            if (expired) {
+                rjson::add(range, "EndingSequenceNumber", sequence_number(utils::UUID_gen::min_time_UUID(expired->time_since_epoch())));
+            }
+
+            rjson::add(shard, "SequenceNumberRange", std::move(range));
+            rjson::push_back(shards, std::move(shard));
+            
+            if (--limit == 0) {
+                break;
+            }

            last = std::nullopt;
-
-            auto lo = sv.streams.begin();
-            auto end = sv.streams.end();
-
-            // #7409 - shards must be returned in lexicographical order,
-            std::sort(lo, end, id_cmp);
-
-            if (shard_start) {
-                // find next shard position
-                lo = std::upper_bound(lo, end, shard_start->id, id_cmp);
-                shard_start = std::nullopt;
-            }
-
-            if (lo != end && prev != e) {
-                // We want older stuff sorted in token order so we can find matching
-                // token range when determining parent shard.
-                std::stable_sort(prev->second.streams.begin(), prev->second.streams.end(), token_cmp);
-            }
-
-            auto expired = [&]() -> std::optional<db_clock::time_point> {
-                auto j = std::next(i);
-                if (j == e) {
-                    return std::nullopt;
-                }
-                // add this so we sort of match potential 
-                // sequence numbers in get_records result.
-                return j->first + confidence_interval(db);
-            }();
-
-            while (lo != end) {
-                auto& id = *lo++;
-
-                auto shard = rjson::empty_object();
-
-                if (prev != e) {
-                    auto& pids = prev->second.streams;
-                    auto pid = std::upper_bound(pids.begin(), pids.end(), id.token(), [](const dht::token& t, const cdc::stream_id& id) {
-                        return t < id.token();
-                    });
-                    if (pid != pids.begin()) {
-                        pid = std::prev(pid);
-                    }
-                    if (pid != pids.end()) {
-                        rjson::add(shard, "ParentShardId", shard_id(prev->first, *pid));
-                    }
-                }
-
-                last.emplace(ts, id);
-                rjson::add(shard, "ShardId", *last);
-                auto range = rjson::empty_object();
-                rjson::add(range, "StartingSequenceNumber", sequence_number(utils::UUID_gen::min_time_UUID(ts.time_since_epoch())));
-                if (expired) {
-                    rjson::add(range, "EndingSequenceNumber", sequence_number(utils::UUID_gen::min_time_UUID(expired->time_since_epoch())));
-                }
-
-                rjson::add(shard, "SequenceNumberRange", std::move(range));
-                rjson::push_back(shards, std::move(shard));
-                
-                if (--limit == 0) {
-                    break;
-                }
-
-                last = std::nullopt;
-            }
        }
+    }

-        if (last) {
-            rjson::add(stream_desc, "LastEvaluatedShardId", *last);
-        }
+    if (last) {
+        rjson::add(stream_desc, "LastEvaluatedShardId", *last);
+    }

-        rjson::add(stream_desc, "Shards", std::move(shards));
-        rjson::add(ret, "StreamDescription", std::move(stream_desc));
-            
-        return make_ready_future<executor::request_return_type>(rjson::print(std::move(ret)));
-    });
+    rjson::add(stream_desc, "Shards", std::move(shards));
+    rjson::add(ret, "StreamDescription", std::move(stream_desc));
+        
+    co_return rjson::print(std::move(ret));
 }

 enum class shard_iterator_type {
@@ -898,172 +896,169 @@ future<executor::request_return_type> executor::get_records(client_state& client
    auto command = ::make_lw_shared<query::read_command>(schema->id(), schema->version(), partition_slice, _proxy.get_max_result_size(partition_slice),
            query::tombstone_limit(_proxy.get_tombstone_limit()), query::row_limit(limit * mul));

-    co_return co_await _proxy.query(schema, std::move(command), std::move(partition_ranges), cl, service::storage_proxy::coordinator_query_options(default_timeout(), std::move(permit), client_state)).then(
-            [this, schema, partition_slice = std::move(partition_slice), selection = std::move(selection), start_time = std::move(start_time), limit, key_names = std::move(key_names), attr_names = std::move(attr_names), type, iter, high_ts] (service::storage_proxy::coordinator_query_result qr) mutable {       
-        cql3::selection::result_set_builder builder(*selection, gc_clock::now());
-        query::result_view::consume(*qr.query_result, partition_slice, cql3::selection::result_set_builder::visitor(builder, *schema, *selection));
+    service::storage_proxy::coordinator_query_result qr = co_await _proxy.query(schema, std::move(command), std::move(partition_ranges), cl, service::storage_proxy::coordinator_query_options(default_timeout(), std::move(permit), client_state));
+    cql3::selection::result_set_builder builder(*selection, gc_clock::now());
+    query::result_view::consume(*qr.query_result, partition_slice, cql3::selection::result_set_builder::visitor(builder, *schema, *selection));

-        auto result_set = builder.build();
-        auto records = rjson::empty_array();
+    auto result_set = builder.build();
+    auto records = rjson::empty_array();

-        auto& metadata = result_set->get_metadata();
+    auto& metadata = result_set->get_metadata();

-        auto op_index = std::distance(metadata.get_names().begin(), 
-            std::find_if(metadata.get_names().begin(), metadata.get_names().end(), [](const lw_shared_ptr<cql3::column_specification>& cdef) {
-                return cdef->name->name() == op_column_name;
-            })
-        );
-        auto ts_index = std::distance(metadata.get_names().begin(), 
-            std::find_if(metadata.get_names().begin(), metadata.get_names().end(), [](const lw_shared_ptr<cql3::column_specification>& cdef) {
-                return cdef->name->name() == timestamp_column_name;
-            })
-        );
-        auto eor_index = std::distance(metadata.get_names().begin(), 
-            std::find_if(metadata.get_names().begin(), metadata.get_names().end(), [](const lw_shared_ptr<cql3::column_specification>& cdef) {
-                return cdef->name->name() == eor_column_name;
-            })
-        );
+    auto op_index = std::distance(metadata.get_names().begin(), 
+        std::find_if(metadata.get_names().begin(), metadata.get_names().end(), [](const lw_shared_ptr<cql3::column_specification>& cdef) {
+            return cdef->name->name() == op_column_name;
+        })
+    );
+    auto ts_index = std::distance(metadata.get_names().begin(), 
+        std::find_if(metadata.get_names().begin(), metadata.get_names().end(), [](const lw_shared_ptr<cql3::column_specification>& cdef) {
+            return cdef->name->name() == timestamp_column_name;
+        })
+    );
+    auto eor_index = std::distance(metadata.get_names().begin(), 
+        std::find_if(metadata.get_names().begin(), metadata.get_names().end(), [](const lw_shared_ptr<cql3::column_specification>& cdef) {
+            return cdef->name->name() == eor_column_name;
+        })
+    );

-        std::optional<utils::UUID> timestamp;
-        auto dynamodb = rjson::empty_object();
-        auto record = rjson::empty_object();
-        const auto dc_name = _proxy.get_token_metadata_ptr()->get_topology().get_datacenter();
+    std::optional<utils::UUID> timestamp;
+    auto dynamodb = rjson::empty_object();
+    auto record = rjson::empty_object();
+    const auto dc_name = _proxy.get_token_metadata_ptr()->get_topology().get_datacenter();

-        using op_utype = std::underlying_type_t<cdc::operation>;
+    using op_utype = std::underlying_type_t<cdc::operation>;

-        auto maybe_add_record = [&] {
-            if (!dynamodb.ObjectEmpty()) {
-                rjson::add(record, "dynamodb", std::move(dynamodb));
-                dynamodb = rjson::empty_object();
-            }
-            if (!record.ObjectEmpty()) {
-                rjson::add(record, "awsRegion", rjson::from_string(dc_name));
-                rjson::add(record, "eventID", event_id(iter.shard.id, *timestamp));
-                rjson::add(record, "eventSource", "scylladb:alternator");
-                rjson::add(record, "eventVersion", "1.1");
-                rjson::push_back(records, std::move(record));
-                record = rjson::empty_object();
-                --limit;
-            }
-        };
+    auto maybe_add_record = [&] {
+        if (!dynamodb.ObjectEmpty()) {
+            rjson::add(record, "dynamodb", std::move(dynamodb));
+            dynamodb = rjson::empty_object();
+        }
+        if (!record.ObjectEmpty()) {
+            rjson::add(record, "awsRegion", rjson::from_string(dc_name));
+            rjson::add(record, "eventID", event_id(iter.shard.id, *timestamp));
+            rjson::add(record, "eventSource", "scylladb:alternator");
+            rjson::add(record, "eventVersion", "1.1");
+            rjson::push_back(records, std::move(record));
+            record = rjson::empty_object();
+            --limit;
+        }
+    };

-        for (auto& row : result_set->rows()) {
-            auto op = static_cast<cdc::operation>(value_cast<op_utype>(data_type_for<op_utype>()->deserialize(*row[op_index])));
-            auto ts = value_cast<utils::UUID>(data_type_for<utils::UUID>()->deserialize(*row[ts_index]));
-            auto eor = row[eor_index].has_value() ? value_cast<bool>(boolean_type->deserialize(*row[eor_index])) : false;
+    for (auto& row : result_set->rows()) {
+        auto op = static_cast<cdc::operation>(value_cast<op_utype>(data_type_for<op_utype>()->deserialize(*row[op_index])));
+        auto ts = value_cast<utils::UUID>(data_type_for<utils::UUID>()->deserialize(*row[ts_index]));
+        auto eor = row[eor_index].has_value() ? value_cast<bool>(boolean_type->deserialize(*row[eor_index])) : false;

-            if (!dynamodb.HasMember("Keys")) {
-                auto keys = rjson::empty_object();
-                describe_single_item(*selection, row, key_names, keys);
-                rjson::add(dynamodb, "Keys", std::move(keys));
-                rjson::add(dynamodb, "ApproximateCreationDateTime", utils::UUID_gen::unix_timestamp_in_sec(ts).count());
-                rjson::add(dynamodb, "SequenceNumber", sequence_number(ts));
-                rjson::add(dynamodb, "StreamViewType", type);
-                // TODO: SizeBytes
-            }
-
-            /**
-             * We merge rows with same timestamp into a single event.
-             * This is pretty much needed, because a CDC row typically
-             * encodes ~half the info of an alternator write. 
-             * 
-             * A big, big downside to how alternator records are written
-             * (i.e. CQL), is that the distinction between INSERT and UPDATE
-             * is somewhat lost/unmappable to actual eventName. 
-             * A write (currently) always looks like an insert+modify
-             * regardless whether we wrote existing record or not. 
-             * 
-             * Maybe RMW ops could be done slightly differently so 
-             * we can distinguish them here...
-             * 
-             * For now, all writes will become MODIFY.
-             * 
-             * Note: we do not check the current pre/post
-             * flags on CDC log, instead we use data to 
-             * drive what is returned. This is (afaict)
-             * consistent with dynamo streams
-             */
-            switch (op) {
-            case cdc::operation::pre_image:
-            case cdc::operation::post_image:
-            {
-                auto item = rjson::empty_object();
-                describe_single_item(*selection, row, attr_names, item, nullptr, true);
-                describe_single_item(*selection, row, key_names, item);
-                rjson::add(dynamodb, op == cdc::operation::pre_image ? "OldImage" : "NewImage", std::move(item));
-                break;
-            }
-            case cdc::operation::update:
-                rjson::add(record, "eventName", "MODIFY");
-                break;
-            case cdc::operation::insert:
-                rjson::add(record, "eventName", "INSERT");
-                break;
-            case cdc::operation::service_row_delete:
-            case cdc::operation::service_partition_delete:
-            {
-                auto user_identity = rjson::empty_object();
-                rjson::add(user_identity, "Type", "Service");
-                rjson::add(user_identity, "PrincipalId", "dynamodb.amazonaws.com");
-                rjson::add(record, "userIdentity", std::move(user_identity));
-                rjson::add(record, "eventName", "REMOVE");
-                break;
-            }
-            default:
-                rjson::add(record, "eventName", "REMOVE");
-                break;
-            }
-            if (eor) {
-                maybe_add_record();
-                timestamp = ts;
-                if (limit == 0) {
-                    break;
-                }
-            }
+        if (!dynamodb.HasMember("Keys")) {
+            auto keys = rjson::empty_object();
+            describe_single_item(*selection, row, key_names, keys);
+            rjson::add(dynamodb, "Keys", std::move(keys));
+            rjson::add(dynamodb, "ApproximateCreationDateTime", utils::UUID_gen::unix_timestamp_in_sec(ts).count());
+            rjson::add(dynamodb, "SequenceNumber", sequence_number(ts));
+            rjson::add(dynamodb, "StreamViewType", type);
+            // TODO: SizeBytes
        }

-        auto ret = rjson::empty_object();
-        auto nrecords = records.Size();
-        rjson::add(ret, "Records", std::move(records));
-
-        if (nrecords != 0) {
-            // #9642. Set next iterators threshold to > last
-            shard_iterator next_iter(iter.table, iter.shard, *timestamp, false);
-            // Note that here we unconditionally return NextShardIterator,
-            // without checking if maybe we reached the end-of-shard. If the
-            // shard did end, then the next read will have nrecords == 0 and
-            // will notice end end of shard and not return NextShardIterator.
-            rjson::add(ret, "NextShardIterator", next_iter);
-            _stats.api_operations.get_records_latency.mark(std::chrono::steady_clock::now() - start_time);
-            return make_ready_future<executor::request_return_type>(rjson::print(std::move(ret)));
+        /**
+         * We merge rows with same timestamp into a single event.
+         * This is pretty much needed, because a CDC row typically
+         * encodes ~half the info of an alternator write. 
+         * 
+         * A big, big downside to how alternator records are written
+         * (i.e. CQL), is that the distinction between INSERT and UPDATE
+         * is somewhat lost/unmappable to actual eventName. 
+         * A write (currently) always looks like an insert+modify
+         * regardless whether we wrote existing record or not. 
+         * 
+         * Maybe RMW ops could be done slightly differently so 
+         * we can distinguish them here...
+         * 
+         * For now, all writes will become MODIFY.
+         * 
+         * Note: we do not check the current pre/post
+         * flags on CDC log, instead we use data to 
+         * drive what is returned. This is (afaict)
+         * consistent with dynamo streams
+         */
+        switch (op) {
+        case cdc::operation::pre_image:
+        case cdc::operation::post_image:
+        {
+            auto item = rjson::empty_object();
+            describe_single_item(*selection, row, attr_names, item, nullptr, true);
+            describe_single_item(*selection, row, key_names, item);
+            rjson::add(dynamodb, op == cdc::operation::pre_image ? "OldImage" : "NewImage", std::move(item));
+            break;
        }
-
-        // ugh. figure out if we are and end-of-shard
-        auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();
-
-        return _sdks.cdc_current_generation_timestamp({ normal_token_owners }).then([this, iter, high_ts, start_time, ret = std::move(ret)](db_clock::time_point ts) mutable {
-            auto& shard = iter.shard;            
-
-            if (shard.time < ts && ts < high_ts) {
-                // The DynamoDB documentation states that when a shard is
-                // closed, reading it until the end has NextShardIterator
-                // "set to null". Our test test_streams_closed_read
-                // confirms that by "null" they meant not set at all.
-            } else {
-                // We could have return the same iterator again, but we did
-                // a search from it until high_ts and found nothing, so we
-                // can also start the next search from high_ts.
-                // TODO: but why? It's simpler just to leave the iterator be.
-                shard_iterator next_iter(iter.table, iter.shard, utils::UUID_gen::min_time_UUID(high_ts.time_since_epoch()), true);
-                rjson::add(ret, "NextShardIterator", iter);
+        case cdc::operation::update:
+            rjson::add(record, "eventName", "MODIFY");
+            break;
+        case cdc::operation::insert:
+            rjson::add(record, "eventName", "INSERT");
+            break;
+        case cdc::operation::service_row_delete:
+        case cdc::operation::service_partition_delete:
+        {
+            auto user_identity = rjson::empty_object();
+            rjson::add(user_identity, "Type", "Service");
+            rjson::add(user_identity, "PrincipalId", "dynamodb.amazonaws.com");
+            rjson::add(record, "userIdentity", std::move(user_identity));
+            rjson::add(record, "eventName", "REMOVE");
+            break;
+        }
+        default:
+            rjson::add(record, "eventName", "REMOVE");
+            break;
+        }
+        if (eor) {
+            maybe_add_record();
+            timestamp = ts;
+            if (limit == 0) {
+                break;
            }
-            _stats.api_operations.get_records_latency.mark(std::chrono::steady_clock::now() - start_time);
-            if (is_big(ret)) {
-                return make_ready_future<executor::request_return_type>(make_streamed(std::move(ret)));
-            }
-            return make_ready_future<executor::request_return_type>(rjson::print(std::move(ret)));
-        });
-    });
+        }
+    }
+
+    auto ret = rjson::empty_object();
+    auto nrecords = records.Size();
+    rjson::add(ret, "Records", std::move(records));
+
+    if (nrecords != 0) {
+        // #9642. Set next iterators threshold to > last
+        shard_iterator next_iter(iter.table, iter.shard, *timestamp, false);
+        // Note that here we unconditionally return NextShardIterator,
+        // without checking if maybe we reached the end-of-shard. If the
+        // shard did end, then the next read will have nrecords == 0 and
+        // will notice end end of shard and not return NextShardIterator.
+        rjson::add(ret, "NextShardIterator", next_iter);
+        _stats.api_operations.get_records_latency.mark(std::chrono::steady_clock::now() - start_time);
+        co_return rjson::print(std::move(ret));
+    }
+
+    // ugh. figure out if we are and end-of-shard
+    auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();
+
+    db_clock::time_point ts = co_await _sdks.cdc_current_generation_timestamp({ normal_token_owners });
+    auto& shard = iter.shard;
+
+    if (shard.time < ts && ts < high_ts) {
+        // The DynamoDB documentation states that when a shard is
+        // closed, reading it until the end has NextShardIterator
+        // "set to null". Our test test_streams_closed_read
+        // confirms that by "null" they meant not set at all.
+    } else {
+        // We could have return the same iterator again, but we did
+        // a search from it until high_ts and found nothing, so we
+        // can also start the next search from high_ts.
+        // TODO: but why? It's simpler just to leave the iterator be.
+        shard_iterator next_iter(iter.table, iter.shard, utils::UUID_gen::min_time_UUID(high_ts.time_since_epoch()), true);
+        rjson::add(ret, "NextShardIterator", iter);
+    }
+    _stats.api_operations.get_records_latency.mark(std::chrono::steady_clock::now() - start_time);
+    if (is_big(ret)) {
+        co_return make_streamed(std::move(ret));
+    }
+    co_return rjson::print(std::move(ret));
 }

 bool executor::add_stream_options(const rjson::value& stream_specification, schema_builder& builder, service::storage_proxy& sp) {
--- a/alternator/ttl.cc
+++ b/alternator/ttl.cc
@@ -46,6 +46,7 @@
 #include "alternator/executor.hh"
 #include "alternator/controller.hh"
 #include "alternator/serialization.hh"
+#include "alternator/ttl_tag.hh"
 #include "dht/sharder.hh"
 #include "db/config.hh"
 #include "db/tags/utils.hh"
@@ -57,19 +58,10 @@ static logging::logger tlogger("alternator_ttl");

 namespace alternator {

-// We write the expiration-time attribute enabled on a table in a
-// tag TTL_TAG_KEY.
-// Currently, the *value* of this tag is simply the name of the attribute,
-// and the expiration scanner interprets it as an Alternator attribute name -
-// It can refer to a real column or if that doesn't exist, to a member of
-// the ":attrs" map column. Although this is designed for Alternator, it may
-// be good enough for CQL as well (there, the ":attrs" column won't exist).
-extern const sstring TTL_TAG_KEY;
-
 future<executor::request_return_type> executor::update_time_to_live(client_state& client_state, service_permit permit, rjson::value request) {
    _stats.api_operations.update_time_to_live++;
    if (!_proxy.features().alternator_ttl) {
-        co_return api_error::unknown_operation("UpdateTimeToLive not yet supported. Experimental support is available if the 'alternator-ttl' experimental feature is enabled on all nodes.");
+        co_return api_error::unknown_operation("UpdateTimeToLive not yet supported. Upgrade all nodes to a version that supports it.");
    }

    schema_ptr schema = get_table(_proxy, request);
@@ -141,7 +133,7 @@ future<executor::request_return_type> executor::describe_time_to_live(client_sta

 // expiration_service is a sharded service responsible for cleaning up expired
 // items in all tables with per-item expiration enabled. Currently, this means
-// Alternator tables with TTL configured via a UpdateTimeToLive request.
+// Alternator tables with TTL configured via an UpdateTimeToLive request.
 //
 // Here is a brief overview of how the expiration service works:
 //
@@ -324,9 +316,7 @@ static future<std::vector<std::pair<dht::token_range, locator::host_id>>> get_se
    const auto& tm = *erm->get_token_metadata_ptr();
    const auto& sorted_tokens = tm.sorted_tokens();
    std::vector<std::pair<dht::token_range, locator::host_id>> ret;
-    if (sorted_tokens.empty()) {
-        on_internal_error(tlogger, "Token metadata is empty");
-    }
+    throwing_assert(!sorted_tokens.empty());
    auto prev_tok = sorted_tokens.back();
    for (const auto& tok : sorted_tokens) {
        co_await coroutine::maybe_yield();
@@ -563,7 +553,7 @@ static future<> scan_table_ranges(
        expiration_service::stats& expiration_stats)
 {
    const schema_ptr& s = scan_ctx.s;
-    SCYLLA_ASSERT (partition_ranges.size() == 1); // otherwise issue #9167 will cause incorrect results.
+    throwing_assert(partition_ranges.size() == 1); // otherwise issue #9167 will cause incorrect results.
    auto p = service::pager::query_pagers::pager(proxy, s, scan_ctx.selection, *scan_ctx.query_state_ptr,
            *scan_ctx.query_options, scan_ctx.command, std::move(partition_ranges), nullptr);
    while (!p->is_exhausted()) {
@@ -593,7 +583,7 @@ static future<> scan_table_ranges(
            if (retries >= 10) {
                // Don't get stuck forever asking the same page, maybe there's
                // a bug or a real problem in several replicas. Give up on
-                // this scan an retry the scan from a random position later,
+                // this scan and retry the scan from a random position later,
                // in the next scan period.
                throw runtime_exception("scanner thread failed after too many timeouts for the same page");
            }
@@ -640,13 +630,38 @@ static future<> scan_table_ranges(
                }
            } else {
                // For a real column to contain an expiration time, it
-                // must be a numeric type.
-                // FIXME: Currently we only support decimal_type (which is
-                // what Alternator uses), but other numeric types can be
-                // supported as well to make this feature more useful in CQL.
-                // Note that kind::decimal is also checked above.
-                big_decimal n = value_cast<big_decimal>(v);
-                expired = is_expired(n, now);
+                // must be a numeric type. We currently support decimal
+                // (used by Alternator TTL) as well as bigint, int and
+                // timestamp (used by CQL per-row TTL).
+                switch (meta[*expiration_column]->type->get_kind()) {
+                    case abstract_type::kind::decimal:
+                        // Used by Alternator TTL for key columns not stored
+                        // in the map. The value is in seconds, fractional
+                        // part is ignored.
+                        expired = is_expired(value_cast<big_decimal>(v), now);
+                        break;
+                    case abstract_type::kind::long_kind:
+                        // Used by CQL per-row TTL. The value is in seconds.
+                        expired = is_expired(gc_clock::time_point(std::chrono::seconds(value_cast<int64_t>(v))), now);
+                        break;
+                    case abstract_type::kind::int32:
+                        // Used by CQL per-row TTL. The value is in seconds.
+                        // Using int type is not recommended because it will
+                        // overflow in 2038, but we support it to allow users
+                        // to use existing int columns for expiration.
+                        expired = is_expired(gc_clock::time_point(std::chrono::seconds(value_cast<int32_t>(v))), now);
+                        break;
+                    case abstract_type::kind::timestamp:
+                        // Used by CQL per-row TTL. The value is in milliseconds
+                        // but we truncate it to gc_clock's precision (whole seconds).
+                        expired = is_expired(gc_clock::time_point(std::chrono::duration_cast<gc_clock::duration>(value_cast<db_clock::time_point>(v).time_since_epoch())), now);
+                        break;
+                    default:
+                        // Should never happen - we verified the column's type
+                        // before starting the scan.
+                        [[unlikely]]
+                        on_internal_error(tlogger, format("expiration scanner value of unsupported type {} in column {}", meta[*expiration_column]->type->cql3_type_name(), scan_ctx.column_name) );
+                }
            }
            if (expired) {
                expiration_stats.items_deleted++;
@@ -708,16 +723,12 @@ static future<bool> scan_table(
        co_return false;
    }
    // attribute_name may be one of the schema's columns (in Alternator, this
-    // means it's a key column), or an element in Alternator's attrs map
-    // encoded in Alternator's JSON encoding.
-    // FIXME: To make this less Alternators-specific, we should encode in the
-    // single key's value three things:
-    // 1. The name of a column
-    // 2. Optionally if column is a map, a member in the map
-    // 3. The deserializer for the value: CQL or Alternator (JSON).
-    // The deserializer can be guessed: If the given column or map item is
-    // numeric, it can be used directly. If it is a "bytes" type, it needs to
-    // be deserialized using Alternator's deserializer.
+    // means a key column, in CQL it's a regular column), or an element in
+    // Alternator's attrs map encoded in Alternator's JSON encoding (which we
+    // decode). If attribute_name is a real column, in Alternator it will have
+    // the type decimal, counting seconds since the UNIX epoch, while in CQL
+    // it will one of the types bigint or int (counting seconds) or timestamp
+    // (counting milliseconds).
    bytes column_name = to_bytes(*attribute_name);
    const column_definition *cd = s->get_column_definition(column_name);
    std::optional<std::string> member;
@@ -736,11 +747,14 @@ static future<bool> scan_table(
    data_type column_type = cd->type;
    // Verify that the column has the right type: If "member" exists
    // the column must be a map, and if it doesn't, the column must
-    // (currently) be a decimal_type. If the column has the wrong type
-    // nothing can get expired in this table, and it's pointless to
-    // scan it.
+    // be decimal_type (Alternator), bigint, int or timestamp (CQL).
+    // If the column has the wrong type nothing can get expired in
+    // this table, and it's pointless to scan it.
    if ((member && column_type->get_kind() != abstract_type::kind::map) ||
-        (!member && column_type->get_kind() != abstract_type::kind::decimal)) {
+        (!member && column_type->get_kind() != abstract_type::kind::decimal &&
+         column_type->get_kind() != abstract_type::kind::long_kind &&
+         column_type->get_kind() != abstract_type::kind::int32 &&
+         column_type->get_kind() != abstract_type::kind::timestamp)) {
        tlogger.info("table {} TTL column has unsupported type, not scanning", s->cf_name());
        co_return false;
    }
@@ -767,7 +781,7 @@ static future<bool> scan_table(
                // by tasking another node to take over scanning of the dead node's primary
                // ranges. What we do here is that this node will also check expiration
                // on its *secondary* ranges - but only those whose primary owner is down.
-                auto tablet_secondary_replica = tablet_map.get_secondary_replica(*tablet); // throws if no secondary replica
+                auto tablet_secondary_replica = tablet_map.get_secondary_replica(*tablet, erm->get_topology()); // throws if no secondary replica
                if (tablet_secondary_replica.host == my_host_id && tablet_secondary_replica.shard == this_shard_id()) {
                    if (!gossiper.is_alive(tablet_primary_replica.host)) {
                        co_await scan_tablet(*tablet, proxy, abort_source, page_sem, expiration_stats, scan_ctx, tablet_map);
@@ -878,12 +892,10 @@ future<> expiration_service::run() {
 future<> expiration_service::start() {
    // Called by main() on each shard to start the expiration-service
    // thread. Just runs run() in the background and allows stop().
-    if (_db.features().alternator_ttl) {
-        if (!shutting_down()) {
-            _end = run().handle_exception([] (std::exception_ptr ep) {
-                tlogger.error("expiration_service failed: {}", ep);
-            });
-        }
+    if (!shutting_down()) {
+        _end = run().handle_exception([] (std::exception_ptr ep) {
+            tlogger.error("expiration_service failed: {}", ep);
+        });
    }
    return make_ready_future<>();
 }
--- a/alternator/ttl.hh
+++ b/alternator/ttl.hh
@@ -30,7 +30,7 @@ namespace alternator {

 // expiration_service is a sharded service responsible for cleaning up expired
 // items in all tables with per-item expiration enabled. Currently, this means
-// Alternator tables with TTL configured via a UpdateTimeToLeave request.
+// Alternator tables with TTL configured via an UpdateTimeToLive request.
 class expiration_service final : public seastar::peering_sharded_service<expiration_service> {
 public:
    // Object holding per-shard statistics related to the expiration service.
@@ -52,7 +52,7 @@ private:
    data_dictionary::database _db;
    service::storage_proxy& _proxy;
    gms::gossiper& _gossiper;
-    // _end is set by start(), and resolves when the the background service
+    // _end is set by start(), and resolves when the background service
    // started by it ends. To ask the background service to end, _abort_source
    // should be triggered. stop() below uses both _abort_source and _end.
    std::optional<future<>> _end;
--- a/alternator/ttl_tag.hh
+++ b/alternator/ttl_tag.hh
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2026-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#pragma once
+
+#include "seastarx.hh"
+#include <seastar/core/sstring.hh>
+
+namespace alternator {
+// We use the table tag TTL_TAG_KEY ("system:ttl_attribute") to remember
+// which attribute was chosen as the expiration-time attribute for
+// Alternator's TTL and CQL's per-row TTL features.
+// Currently, the *value* of this tag is simply the name of the attribute:
+// It can refer to a real column or if that doesn't exist, to a member of
+// the ":attrs" map column (which Alternator uses).
+extern const sstring TTL_TAG_KEY;
+} // namespace alternator
+
+// let users use TTL_TAG_KEY without the "alternator::" prefix,
+// to make it easier to move it to a different namespace later.
+using alternator::TTL_TAG_KEY;
--- a/api/api-doc/authorization_cache.json
+++ b/api/api-doc/authorization_cache.json
@@ -12,7 +12,7 @@
      "operations":[
        {
          "method":"POST",
-          "summary":"Reset cache",
+          "summary":"Resets authorized prepared statements cache",
          "type":"void",
          "nickname":"authorization_cache_reset",
          "produces":[
--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -3051,7 +3051,7 @@
                  },
                  {
                     "name":"incremental_mode",
-                     "description":"Set the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to 'disabled' mode.",
+                     "description":"Set the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to incremental mode.",
                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
@@ -3085,6 +3085,48 @@
            }
         ]
      },
+
+      {
+         "path":"/storage_service/tablets/snapshots",
+         "operations":[
+            {
+               "method":"POST",
+               "summary":"Takes the snapshot for the given keyspaces/tables. A snapshot name must be specified.",
+               "type":"void",
+               "nickname":"take_cluster_snapshot",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"tag",
+                     "description":"the tag given to the snapshot",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"keyspace",
+                     "description":"Keyspace(s) to snapshot. Multiple keyspaces can be provided using a comma-separated list. If omitted, snapshot all keyspaces.",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"table",
+                     "description":"Table(s) to snapshot. Multiple tables (in a single keyspace) can be provided using a comma-separated list. If omitted, snapshot all tables in the given keyspace(s).",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  }
+               ]
+            }
+         ]
+      },
+
      {
         "path":"/storage_service/quiesce_topology",
         "operations":[
--- a/api/api.hh
+++ b/api/api.hh
@@ -23,31 +23,6 @@

 namespace api {

-template<class T>
-std::vector<T> map_to_key_value(const std::map<sstring, sstring>& map) {
-    std::vector<T> res;
-    res.reserve(map.size());
-
-    for (const auto& [key, value] : map) {
-        res.push_back(T());
-        res.back().key = key;
-        res.back().value = value;
-    }
-    return res;
-}
-
-template<class T, class MAP>
-std::vector<T>& map_to_key_value(const MAP& map, std::vector<T>& res) {
-    res.reserve(res.size() + std::size(map));
-
-    for (const auto& [key, value] : map) {
-        T val;
-        val.key = fmt::to_string(key);
-        val.value = fmt::to_string(value);
-        res.push_back(val);
-    }
-    return res;
-}
 template <typename T, typename S = T>
 T map_sum(T&& dest, const S& src) {
    for (const auto& i : src) {
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -515,6 +515,15 @@ void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>&
        auto sstables = parsed.GetArray() |
            std::views::transform([] (const auto& s) { return sstring(rjson::to_string_view(s)); }) |
            std::ranges::to<std::vector>();
+        apilog.info("Restore invoked with following parameters: keyspace={}, table={}, endpoint={}, bucket={}, prefix={}, sstables_count={}, scope={}, primary_replica_only={}",
+                    keyspace,
+                    table,
+                    endpoint,
+                    bucket,
+                    prefix,
+                    sstables.size(),
+                    scope,
+                    primary_replica_only);
        auto task_id = co_await sst_loader.local().download_new_sstables(keyspace, table, prefix, std::move(sstables), endpoint, bucket, scope, primary_replica_only);
        co_return json::json_return_type(fmt::to_string(task_id));
    });
@@ -527,13 +536,15 @@ void unset_sstables_loader(http_context& ctx, routes& r) {
 }

 void set_view_builder(http_context& ctx, routes& r, sharded<db::view::view_builder>& vb, sharded<gms::gossiper>& g) {
-    ss::view_build_statuses.set(r, [&ctx, &vb, &g] (std::unique_ptr<http::request> req) {
+    ss::view_build_statuses.set(r, [&ctx, &vb, &g] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
        auto keyspace = validate_keyspace(ctx, req);
        auto view = req->get_path_param("view");
-        return vb.local().view_build_statuses(std::move(keyspace), std::move(view), g.local()).then([] (std::unordered_map<sstring, sstring> status) {
-            std::vector<storage_service_json::mapper> res;
-            return make_ready_future<json::json_return_type>(map_to_key_value(std::move(status), res));
-        });
+        co_return json::json_return_type(stream_range_as_array(co_await vb.local().view_build_statuses(std::move(keyspace), std::move(view), g.local()), [] (const auto& i) {
+            storage_service_json::mapper res;
+            res.key = i.first;
+            res.value = i.second;
+            return res;
+        }));
    });

    cf::get_built_indexes.set(r, [&vb](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
@@ -571,6 +582,16 @@ static future<json::json_return_type> describe_ring_as_json_for_table(const shar
    co_return json::json_return_type(stream_range_as_array(co_await ss.local().describe_ring_for_table(keyspace, table), token_range_endpoints_to_json));
 }

+namespace {
+template <typename Key, typename Value>
+storage_service_json::mapper map_to_json(const std::pair<Key, Value>& i) {
+    storage_service_json::mapper val;
+    val.key = fmt::to_string(i.first);
+    val.value = fmt::to_string(i.second);
+    return val;
+}
+}
+
 static
 future<json::json_return_type>
 rest_get_token_endpoint(http_context& ctx, sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
@@ -588,12 +609,7 @@ rest_get_token_endpoint(http_context& ctx, sharded<service::storage_service>& ss
            throw bad_param_exception("Either provide both keyspace and table (for tablet table) or neither (for vnodes)");
        }

-        co_return json::json_return_type(stream_range_as_array(token_endpoints, [](const auto& i) {
-            storage_service_json::mapper val;
-            val.key = fmt::to_string(i.first);
-            val.value = fmt::to_string(i.second);
-            return val;
-        }));
+        co_return json::json_return_type(stream_range_as_array(token_endpoints, &map_to_json<dht::token, gms::inet_address>));
 }

 static
@@ -677,7 +693,6 @@ rest_get_range_to_endpoint_map(http_context& ctx, sharded<service::storage_servi
            table_id = validate_table(ctx.db.local(), keyspace, table);
        }

-        std::vector<ss::maplist_mapper> res;
        co_return stream_range_as_array(co_await ss.local().get_range_to_address_map(keyspace, table_id),
                [](const std::pair<dht::token_range, inet_address_vector_replica_set>& entry){
            ss::maplist_mapper m;
@@ -768,17 +783,13 @@ rest_cleanup_all(http_context& ctx, sharded<service::storage_service>& ss, std::

        apilog.info("cleanup_all global={}", global);

-        auto done = !global ? false : co_await ss.invoke_on(0, [] (service::storage_service& ss) -> future<bool> {
-            if (!ss.is_topology_coordinator_enabled()) {
-                co_return false;
-            }
-            co_await ss.do_clusterwide_vnodes_cleanup();
-            co_return true;
-        });
-        if (done) {
+        if (global) {
+            co_await ss.invoke_on(0, [] (service::storage_service& ss) -> future<> {
+                co_return co_await ss.do_clusterwide_vnodes_cleanup();
+            });
            co_return json::json_return_type(0);
        }
-        // fall back to the local cleanup if topology coordinator is not enabled or local cleanup is requested
+        // fall back to the local cleanup if local cleanup is requested
        auto& db = ctx.db;
        auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
        auto task = co_await compaction_module.make_and_start_task<compaction::global_cleanup_compaction_task_impl>({}, db);
@@ -786,9 +797,7 @@ rest_cleanup_all(http_context& ctx, sharded<service::storage_service>& ss, std::

        // Mark this node as clean
        co_await ss.invoke_on(0, [] (service::storage_service& ss) -> future<> {
-            if (ss.is_topology_coordinator_enabled()) {
-                co_await ss.reset_cleanup_needed();
-            }
+            co_await ss.reset_cleanup_needed();
        });

        co_return json::json_return_type(0);
@@ -799,9 +808,6 @@ future<json::json_return_type>
 rest_reset_cleanup_needed(http_context& ctx, sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
        apilog.info("reset_cleanup_needed");
        co_await ss.invoke_on(0, [] (service::storage_service& ss) {
-            if (!ss.is_topology_coordinator_enabled()) {
-                throw std::runtime_error("mark_node_as_clean is only supported when topology over raft is enabled");
-            }
            return ss.reset_cleanup_needed();
        });
        co_return json_void();
@@ -1308,10 +1314,7 @@ rest_get_ownership(http_context& ctx, sharded<service::storage_service>& ss, std
            throw httpd::bad_param_exception("storage_service/ownership cannot be used when a keyspace uses tablets");
        }

-        return ss.local().get_ownership().then([] (auto&& ownership) {
-            std::vector<storage_service_json::mapper> res;
-            return make_ready_future<json::json_return_type>(map_to_key_value(ownership, res));
-        });
+        co_return json::json_return_type(stream_range_as_array(co_await ss.local().get_ownership(), &map_to_json<gms::inet_address, float>));
 }

 static
@@ -1328,10 +1331,7 @@ rest_get_effective_ownership(http_context& ctx, sharded<service::storage_service
            }
        }

-        return ss.local().effective_ownership(keyspace_name, table_name).then([] (auto&& ownership) {
-            std::vector<storage_service_json::mapper> res;
-            return make_ready_future<json::json_return_type>(map_to_key_value(ownership, res));
-        });
+        co_return json::json_return_type(stream_range_as_array(co_await ss.local().effective_ownership(keyspace_name, table_name), &map_to_json<gms::inet_address, float>));
 }

 static
@@ -1341,7 +1341,7 @@ rest_estimate_compression_ratios(http_context& ctx, sharded<service::storage_ser
        apilog.warn("estimate_compression_ratios: called before the cluster feature was enabled");
        throw std::runtime_error("estimate_compression_ratios requires all nodes to support the SSTABLE_COMPRESSION_DICTS cluster feature");
    }
-    auto ticket = get_units(ss.local().get_do_sample_sstables_concurrency_limiter(), 1);
+    auto ticket = co_await get_units(ss.local().get_do_sample_sstables_concurrency_limiter(), 1);
    auto ks = api::req_param<sstring>(*req, "keyspace", {}).value;
    auto cf = api::req_param<sstring>(*req, "cf", {}).value;
    apilog.debug("estimate_compression_ratios: called with ks={} cf={}", ks, cf);
@@ -1407,7 +1407,7 @@ rest_retrain_dict(http_context& ctx, sharded<service::storage_service>& ss, serv
        apilog.warn("retrain_dict: called before the cluster feature was enabled");
        throw std::runtime_error("retrain_dict requires all nodes to support the SSTABLE_COMPRESSION_DICTS cluster feature");
    }
-    auto ticket = get_units(ss.local().get_do_sample_sstables_concurrency_limiter(), 1);
+    auto ticket = co_await get_units(ss.local().get_do_sample_sstables_concurrency_limiter(), 1);
    auto ks = api::req_param<sstring>(*req, "keyspace", {}).value;
    auto cf = api::req_param<sstring>(*req, "cf", {}).value;
    apilog.debug("retrain_dict: called with ks={} cf={}", ks, cf);
@@ -1565,16 +1565,7 @@ rest_reload_raft_topology_state(sharded<service::storage_service>& ss, service::
 static
 future<json::json_return_type>
 rest_upgrade_to_raft_topology(sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
-        apilog.info("Requested to schedule upgrade to raft topology");
-        try {
-            co_await ss.invoke_on(0, [] (auto& ss) {
-                return ss.start_upgrade_to_raft_topology();
-            });
-        } catch (...) {
-            auto ex = std::current_exception();
-            apilog.error("Failed to schedule upgrade to raft topology: {}", ex);
-            std::rethrow_exception(std::move(ex));
-        }
+        apilog.info("Requested to schedule upgrade to raft topology, but this version does not need it since it uses raft topology by default.");
        co_return json_void();
 }

@@ -2016,12 +2007,16 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
        auto tag = req->get_query_param("tag");
        auto column_families = split(req->get_query_param("cf"), ",");
        auto sfopt = req->get_query_param("sf");
-        auto sf = db::snapshot_ctl::skip_flush(strcasecmp(sfopt.c_str(), "true") == 0);
+        auto tcopt = req->get_query_param("tc");
+
+        db::snapshot_options opts = {
+            .skip_flush = strcasecmp(sfopt.c_str(), "true") == 0,
+        };

        std::vector<sstring> keynames = split(req->get_query_param("kn"), ",");
        try {
            if (column_families.empty()) {
-                co_await snap_ctl.local().take_snapshot(tag, keynames, sf);
+                co_await snap_ctl.local().take_snapshot(tag, keynames, opts);
            } else {
                if (keynames.empty()) {
                    throw httpd::bad_param_exception("The keyspace of column families must be specified");
@@ -2029,7 +2024,7 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
                if (keynames.size() > 1) {
                    throw httpd::bad_param_exception("Only one keyspace allowed when specifying a column family");
                }
-                co_await snap_ctl.local().take_column_family_snapshot(keynames[0], column_families, tag, sf);
+                co_await snap_ctl.local().take_column_family_snapshot(keynames[0], column_families, tag, opts);
            }
            co_return json_void();
        } catch (...) {
@@ -2038,6 +2033,27 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
        }
    });

+    ss::take_cluster_snapshot.set(r, [&snap_ctl](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
+        apilog.info("take_cluster_snapshot: {}", req->get_query_params());
+        auto tag = req->get_query_param("tag");
+        auto column_families = split(req->get_query_param("table"), ",");
+        // Note: not published/active. Retain as internal option, but...
+        auto sfopt = req->get_query_param("skip_flush");
+
+        db::snapshot_options opts = {
+            .skip_flush = strcasecmp(sfopt.c_str(), "true") == 0,
+        };
+
+        std::vector<sstring> keynames = split(req->get_query_param("keyspace"), ",");
+        try {
+            co_await snap_ctl.local().take_cluster_column_family_snapshot(keynames, column_families, tag, opts);
+            co_return json_void();
+        } catch (...) {
+            apilog.error("take_cluster_snapshot failed: {}", std::current_exception());
+            throw;
+        }
+    });
+
    ss::del_snapshot.set(r, [&snap_ctl](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
        apilog.info("del_snapshot: {}", req->get_query_params());
        auto tag = req->get_query_param("tag");
@@ -2064,7 +2080,8 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
        auto info = parse_scrub_options(ctx, std::move(req));

        if (!info.snapshot_tag.empty()) {
-            co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, db::snapshot_ctl::skip_flush::no);
+            db::snapshot_options opts = {.skip_flush = false};
+            co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, opts);
        }

        compaction::compaction_stats stats;
--- a/api/task_manager.cc
+++ b/api/task_manager.cc
@@ -9,6 +9,7 @@
 #include <seastar/core/chunked_fifo.hh>
 #include <seastar/core/coroutine.hh>
 #include <seastar/coroutine/exception.hh>
+#include <seastar/coroutine/maybe_yield.hh>
 #include <seastar/http/exception.hh>

 #include "task_manager.hh"
@@ -264,7 +265,7 @@ void set_task_manager(http_context& ctx, routes& r, sharded<tasks::task_manager>
                if (id) {
                    module->unregister_task(id);
                }
-                co_await maybe_yield();
+                co_await coroutine::maybe_yield();
            }
        });
        co_return json_void();
--- a/api/tasks.cc
+++ b/api/tasks.cc
@@ -146,7 +146,8 @@ void set_tasks_compaction_module(http_context& ctx, routes& r, sharded<service::
        auto info = parse_scrub_options(ctx, std::move(req));

        if (!info.snapshot_tag.empty()) {
-            co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, db::snapshot_ctl::skip_flush::no);
+            db::snapshot_options opts = {.skip_flush = false};
+            co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, opts);
        }

        auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
--- a/audit/audit.cc
+++ b/audit/audit.cc
@@ -209,15 +209,11 @@ future<> audit::stop_audit() {
    });
 }

-audit_info_ptr audit::create_audit_info(statement_category cat, const sstring& keyspace, const sstring& table) {
+audit_info_ptr audit::create_audit_info(statement_category cat, const sstring& keyspace, const sstring& table, bool batch) {
    if (!audit_instance().local_is_initialized()) {
        return nullptr;
    }
-    return std::make_unique<audit_info>(cat, keyspace, table);
-}
-
-audit_info_ptr audit::create_no_audit_info() {
-    return audit_info_ptr();
+    return std::make_unique<audit_info>(cat, keyspace, table, batch);
 }

 future<> audit::start(const db::config& cfg) {
@@ -267,18 +263,21 @@ future<> audit::log_login(const sstring& username, socket_address client_ip, boo
 }

 future<> inspect(shared_ptr<cql3::cql_statement> statement, service::query_state& query_state, const cql3::query_options& options, bool error) {
-    cql3::statements::batch_statement* batch = dynamic_cast<cql3::statements::batch_statement*>(statement.get());
-    if (batch != nullptr) {
+    auto audit_info = statement->get_audit_info();
+    if (!audit_info) {
+        return make_ready_future<>();
+    }
+    if (audit_info->batch()) {
+        cql3::statements::batch_statement* batch = static_cast<cql3::statements::batch_statement*>(statement.get());
        return do_for_each(batch->statements().begin(), batch->statements().end(), [&query_state, &options, error] (auto&& m) {
            return inspect(m.statement, query_state, options, error);
        });
    } else {
-        auto audit_info = statement->get_audit_info();
-        if (bool(audit_info) && audit::local_audit_instance().should_log(audit_info)) {
+        if (audit::local_audit_instance().should_log(audit_info)) {
            return audit::local_audit_instance().log(audit_info, query_state, options, error);
        }
+        return make_ready_future<>();
    }
-    return make_ready_future<>();
 }

 future<> inspect_login(const sstring& username, socket_address client_ip, bool error) {
--- a/audit/audit.hh
+++ b/audit/audit.hh
@@ -75,11 +75,13 @@ class audit_info final {
    sstring _keyspace;
    sstring _table;
    sstring _query;
+    bool _batch;
 public:
-    audit_info(statement_category cat, sstring keyspace, sstring table)
+    audit_info(statement_category cat, sstring keyspace, sstring table, bool batch)
        : _category(cat)
        , _keyspace(std::move(keyspace))
        , _table(std::move(table))
+        , _batch(batch)
    { }
    void set_query_string(const std::string_view& query_string) {
        _query = sstring(query_string);
@@ -89,6 +91,7 @@ public:
    const sstring& query() const { return _query; }
    sstring category_string() const;
    statement_category category() const { return _category; }
+    bool batch() const { return _batch; }
 };

 using audit_info_ptr = std::unique_ptr<audit_info>;
@@ -126,8 +129,7 @@ public:
    }
    static future<> start_audit(const db::config& cfg, sharded<locator::shared_token_metadata>& stm, sharded<cql3::query_processor>& qp, sharded<service::migration_manager>& mm);
    static future<> stop_audit();
-    static audit_info_ptr create_audit_info(statement_category cat, const sstring& keyspace, const sstring& table);
-    static audit_info_ptr create_no_audit_info();
+    static audit_info_ptr create_audit_info(statement_category cat, const sstring& keyspace, const sstring& table, bool batch = false);
    audit(locator::shared_token_metadata& stm,
          cql3::query_processor& qp,
          service::migration_manager& mm,
--- a/audit/audit_syslog_storage_helper.cc
+++ b/audit/audit_syslog_storage_helper.cc
@@ -53,10 +53,10 @@ static std::string json_escape(std::string_view str) {

 }

-future<> audit_syslog_storage_helper::syslog_send_helper(const sstring& msg) {
+future<> audit_syslog_storage_helper::syslog_send_helper(temporary_buffer<char> msg) {
    try {
        auto lock = co_await get_units(_semaphore, 1, std::chrono::hours(1));
-        co_await _sender.send(_syslog_address, net::packet{msg.data(), msg.size()});
+        co_await _sender.send(_syslog_address, std::span(&msg, 1));
    }
    catch (const std::exception& e) {
        auto error_msg = seastar::format(
@@ -90,7 +90,7 @@ future<> audit_syslog_storage_helper::start(const db::config& cfg) {
        co_return;
    }

-    co_await syslog_send_helper("Initializing syslog audit backend.");
+    co_await syslog_send_helper(temporary_buffer<char>::copy_of("Initializing syslog audit backend."));
 }

 future<> audit_syslog_storage_helper::stop() {
@@ -120,7 +120,7 @@ future<> audit_syslog_storage_helper::write(const audit_info* audit_info,
                                    audit_info->table(),
                                    username);

-    co_await syslog_send_helper(msg);
+    co_await syslog_send_helper(std::move(msg).release());
 }

 future<> audit_syslog_storage_helper::write_login(const sstring& username,
@@ -139,7 +139,7 @@ future<> audit_syslog_storage_helper::write_login(const sstring& username,
                                    client_ip,
                                    username);

-    co_await syslog_send_helper(msg.c_str());
+    co_await syslog_send_helper(std::move(msg).release());
 }

 }
--- a/audit/audit_syslog_storage_helper.hh
+++ b/audit/audit_syslog_storage_helper.hh
@@ -26,7 +26,7 @@ class audit_syslog_storage_helper : public storage_helper {
    net::datagram_channel _sender;
    seastar::semaphore _semaphore;

-    future<> syslog_send_helper(const sstring& msg);
+    future<> syslog_send_helper(seastar::temporary_buffer<char> msg);
 public:
    explicit audit_syslog_storage_helper(cql3::query_processor&, service::migration_manager&);
    virtual ~audit_syslog_storage_helper();
--- a/auth/CMakeLists.txt
+++ b/auth/CMakeLists.txt
@@ -17,7 +17,6 @@ target_sources(scylla_auth
    password_authenticator.cc
    passwords.cc
    permission.cc
-    permissions_cache.cc
    resource.cc
    role_or_anonymous.cc
    roles-metadata.cc
--- a/auth/cache.cc
+++ b/auth/cache.cc
@@ -8,6 +8,7 @@

 #include "auth/cache.hh"
 #include "auth/common.hh"
+#include "auth/role_or_anonymous.hh"
 #include "auth/roles-metadata.hh"
 #include "cql3/query_processor.hh"
 #include "cql3/untyped_result_set.hh"
@@ -15,16 +16,35 @@
 #include "db/system_keyspace.hh"
 #include "schema/schema.hh"
 #include <iterator>
+#include <seastar/core/abort_source.hh>
 #include <seastar/coroutine/maybe_yield.hh>
 #include <seastar/core/format.hh>
+#include <seastar/core/metrics.hh>
+#include <seastar/core/do_with.hh>

 namespace auth {

 logging::logger logger("auth-cache");

-cache::cache(cql3::query_processor& qp) noexcept
+cache::cache(cql3::query_processor& qp, abort_source& as) noexcept
    : _current_version(0)
-    , _qp(qp) {
+    , _qp(qp)
+    , _loading_sem(1)
+    , _as(as)
+    , _permission_loader(nullptr)
+    , _permission_loader_sem(8) {
+    namespace sm = seastar::metrics;
+    _metrics.add_group("auth_cache", {
+        sm::make_gauge("roles", [this] { return _roles.size(); },
+                sm::description("Number of roles currently cached")),
+        sm::make_gauge("permissions", [this] {
+            return _cached_permissions_count;
+        }, sm::description("Total number of permission sets currently cached across all roles"))
+    });
+}
+
+void cache::set_permission_loader(permission_loader_func loader) {
+    _permission_loader = std::move(loader);
 }

 lw_shared_ptr<const cache::role_record> cache::get(const role_name_t& role) const noexcept {
@@ -35,6 +55,83 @@ lw_shared_ptr<const cache::role_record> cache::get(const role_name_t& role) cons
    return it->second;
 }

+future<permission_set> cache::get_permissions(const role_or_anonymous& role, const resource& r) {
+    std::unordered_map<resource, permission_set>* perms_cache;
+    lw_shared_ptr<role_record> role_ptr;
+
+    if (is_anonymous(role)) {
+        perms_cache = &_anonymous_permissions;
+    } else {
+        const auto& role_name = *role.name;
+        auto role_it = _roles.find(role_name);
+        if (role_it == _roles.end()) {
+            // Role might have been deleted but there are some connections
+            // left which reference it. They should no longer have access to anything.
+            return make_ready_future<permission_set>(permissions::NONE);
+        }
+        role_ptr = role_it->second;
+        perms_cache = &role_ptr->cached_permissions;
+    }
+
+    if (auto it = perms_cache->find(r); it != perms_cache->end()) {
+        return make_ready_future<permission_set>(it->second);
+    }
+    // keep alive role_ptr as it holds perms_cache (except anonymous)
+    return do_with(std::move(role_ptr), [this, &role, &r, perms_cache] (auto& role_ptr) {
+        return load_permissions(role, r, perms_cache);
+    });
+}
+
+future<permission_set> cache::load_permissions(const role_or_anonymous& role, const resource& r, std::unordered_map<resource, permission_set>* perms_cache) {
+    SCYLLA_ASSERT(_permission_loader);
+    auto units = co_await get_units(_permission_loader_sem, 1, _as);
+
+    // Check again, perhaps we were blocked and other call loaded
+    // the permissions already. This is a protection against misses storm.
+    if (auto it = perms_cache->find(r); it != perms_cache->end()) {
+        co_return it->second;
+    }
+    auto perms = co_await _permission_loader(role, r);
+    add_permissions(*perms_cache, r, perms);
+    co_return perms;
+}
+
+future<> cache::prune(const resource& r) {
+    auto units = co_await get_units(_loading_sem, 1, _as);
+    _anonymous_permissions.erase(r);
+    for (auto& it : _roles) {
+        // Prunning can run concurrently with other functions but it
+        // can only cause cached_permissions extra reload via get_permissions.
+        remove_permissions(it.second->cached_permissions, r);
+        co_await coroutine::maybe_yield();
+    }
+}
+
+future<> cache::reload_all_permissions() noexcept {
+    SCYLLA_ASSERT(_permission_loader);
+    auto units = co_await get_units(_loading_sem, 1, _as);
+    auto copy_keys = [] (const std::unordered_map<resource, permission_set>& m) {
+        std::vector<resource> keys;
+        keys.reserve(m.size());
+        for (const auto& [res, _] : m) {
+            keys.push_back(res);
+        }
+        return keys;
+    };
+    const role_or_anonymous anon;
+    for (const auto& res : copy_keys(_anonymous_permissions)) {
+        _anonymous_permissions[res] = co_await _permission_loader(anon, res);
+    }
+    for (auto& [role, entry] : _roles) {
+        auto& perms_cache = entry->cached_permissions;
+        auto r = role_or_anonymous(role);
+        for (const auto& res : copy_keys(perms_cache)) {
+            perms_cache[res] = co_await _permission_loader(r, res);
+        }
+    }
+    logger.debug("Reloaded auth cache with {} entries", _roles.size());
+}
+
 future<lw_shared_ptr<cache::role_record>> cache::fetch_role(const role_name_t& role) const {
    auto rec = make_lw_shared<role_record>();
    rec->version = _current_version;
@@ -102,7 +199,7 @@ future<lw_shared_ptr<cache::role_record>> cache::fetch_role(const role_name_t& r
 future<> cache::prune_all() noexcept {
    for (auto it = _roles.begin(); it != _roles.end(); ) {
        if (it->second->version != _current_version) {
-            _roles.erase(it++);
+            remove_role(it++);
            co_await coroutine::maybe_yield();
        } else {
            ++it;
@@ -116,6 +213,8 @@ future<> cache::load_all() {
        co_return;
    }
    SCYLLA_ASSERT(this_shard_id() == 0);
+    auto units = co_await get_units(_loading_sem, 1, _as);
+
    ++_current_version;

    logger.info("Loading all roles");
@@ -124,7 +223,7 @@ future<> cache::load_all() {
        const auto name = r.get_as<sstring>("role");
        auto role = co_await fetch_role(name);
        if (role) {
-            _roles[name] = role;
+            add_role(name, role);
        }
        co_return stop_iteration::no;
    };
@@ -137,36 +236,74 @@ future<> cache::load_all() {
        co_await distribute_role(name, role);
    }
    co_await container().invoke_on_others([this](cache& c) -> future<> {
+        auto units = co_await get_units(c._loading_sem, 1, c._as);
        c._current_version = _current_version;
        co_await c.prune_all();
    });
 }

+future<> cache::gather_inheriting_roles(std::unordered_set<role_name_t>& roles, lw_shared_ptr<cache::role_record> role, const role_name_t& name) {
+    if (!role) {
+        // Role might have been removed or not yet added, either way
+        // their members will be handled by another top call to this function.
+        co_return;
+    }
+    for (const auto& member_name : role->members) {
+        bool is_new = roles.insert(member_name).second;
+        if (!is_new) {
+            continue;
+        }
+        lw_shared_ptr<cache::role_record> member_role;
+        auto r = _roles.find(member_name);
+        if (r != _roles.end()) {
+            member_role = r->second;
+        }
+        co_await gather_inheriting_roles(roles, member_role, member_name);
+    }
+}
+
 future<> cache::load_roles(std::unordered_set<role_name_t> roles) {
    if (legacy_mode(_qp)) {
        co_return;
    }
+    SCYLLA_ASSERT(this_shard_id() == 0);
+    auto units = co_await get_units(_loading_sem, 1, _as);
+
+    std::unordered_set<role_name_t> roles_to_clear_perms;
    for (const auto& name : roles) {
        logger.info("Loading role {}", name);
        auto role = co_await fetch_role(name);
         if (role) {
-            _roles[name] = role;
+            add_role(name, role);
+            co_await gather_inheriting_roles(roles_to_clear_perms, role, name);
        } else {
-            _roles.erase(name);
+            if (auto it = _roles.find(name); it != _roles.end()) {
+                auto old_role = it->second;
+                remove_role(it);
+                co_await gather_inheriting_roles(roles_to_clear_perms, old_role, name);
+            }
        }
        co_await distribute_role(name, role);
    }
+
+    co_await container().invoke_on_all([&roles_to_clear_perms] (cache& c) -> future<> {
+        for (const auto& name : roles_to_clear_perms) {
+            c.clear_role_permissions(name);
+            co_await coroutine::maybe_yield();
+        }
+    });
 }

 future<> cache::distribute_role(const role_name_t& name, lw_shared_ptr<role_record> role) {
    auto role_ptr = role.get();
-    co_await container().invoke_on_others([&name, role_ptr](cache& c) {
+    co_await container().invoke_on_others([&name, role_ptr](cache& c) -> future<> {
+        auto units = co_await get_units(c._loading_sem, 1, c._as);
        if (!role_ptr) {
-            c._roles.erase(name);
-            return;
+            c.remove_role(name);
+            co_return;
        }
        auto role_copy = make_lw_shared<role_record>(*role_ptr);
-        c._roles[name] = std::move(role_copy);
+        c.add_role(name, std::move(role_copy));
    });
 }

@@ -177,4 +314,40 @@ bool cache::includes_table(const table_id& id) noexcept {
            || id == db::system_keyspace::role_permissions()->id();
 }

+void cache::add_role(const role_name_t& name, lw_shared_ptr<role_record> role) {
+    if (auto it = _roles.find(name); it != _roles.end()) {
+        _cached_permissions_count -= it->second->cached_permissions.size();
+    }
+    _cached_permissions_count += role->cached_permissions.size();
+    _roles[name] = std::move(role);
+}
+
+void cache::remove_role(const role_name_t& name) {
+    if (auto it = _roles.find(name); it != _roles.end()) {
+        remove_role(it);
+    }
+}
+
+void cache::remove_role(roles_map::iterator it) {
+    _cached_permissions_count -= it->second->cached_permissions.size();
+    _roles.erase(it);
+}
+
+void cache::clear_role_permissions(const role_name_t& name) {
+    if (auto it = _roles.find(name); it != _roles.end()) {
+        _cached_permissions_count -= it->second->cached_permissions.size();
+        it->second->cached_permissions.clear();
+    }
+}
+
+void cache::add_permissions(std::unordered_map<resource, permission_set>& cache, const resource& r, permission_set perms) {
+    if (cache.emplace(r, perms).second) {
+        ++_cached_permissions_count;
+    }
+}
+
+void cache::remove_permissions(std::unordered_map<resource, permission_set>& cache, const resource& r) {
+    _cached_permissions_count -= cache.erase(r);
+}
+
 } // namespace auth
--- a/auth/cache.hh
+++ b/auth/cache.hh
@@ -8,6 +8,7 @@

 #pragma once

+#include <seastar/core/abort_source.hh>
 #include <unordered_set>
 #include <unordered_map>

@@ -15,11 +16,15 @@
 #include <seastar/core/future.hh>
 #include <seastar/core/sharded.hh>
 #include <seastar/core/shared_ptr.hh>
+#include <seastar/core/semaphore.hh>
+#include <seastar/core/metrics_registration.hh>

 #include <absl/container/flat_hash_map.h>

 #include "auth/permission.hh"
 #include "auth/common.hh"
+#include "auth/resource.hh"
+#include "auth/role_or_anonymous.hh"

 namespace cql3 { class query_processor; }

@@ -29,6 +34,7 @@ class cache : public peering_sharded_service<cache> {
 public:
    using role_name_t = sstring;
    using version_tag_t = char;
+    using permission_loader_func = std::function<future<permission_set>(const role_or_anonymous&, const resource&)>;

 	struct role_record {
        bool can_login = false;
@@ -38,11 +44,19 @@ public:
        sstring salted_hash;
        std::unordered_map<sstring, sstring> attributes;
        std::unordered_map<sstring, permission_set> permissions;
+    private:
+        friend cache;
+        // cached permissions include effects of role's inheritance
+        std::unordered_map<resource, permission_set> cached_permissions;
        version_tag_t version; // used for seamless cache reloads
    };

-    explicit cache(cql3::query_processor& qp) noexcept;
+    explicit cache(cql3::query_processor& qp, abort_source& as) noexcept;
    lw_shared_ptr<const role_record> get(const role_name_t& role) const noexcept;
+    void set_permission_loader(permission_loader_func loader);
+    future<permission_set> get_permissions(const role_or_anonymous& role, const resource& r);
+    future<> prune(const resource& r);
+    future<> reload_all_permissions() noexcept;
    future<> load_all();
    future<> load_roles(std::unordered_set<role_name_t> roles);
    static bool includes_table(const table_id&) noexcept;
@@ -50,12 +64,31 @@ public:
 private:
    using roles_map = absl::flat_hash_map<role_name_t, lw_shared_ptr<role_record>>;
    roles_map _roles;
+    // anonymous permissions map exists mainly due to compatibility with
+    // higher layers which use role_or_anonymous to get permissions.
+    std::unordered_map<resource, permission_set> _anonymous_permissions;
    version_tag_t _current_version;
    cql3::query_processor& _qp;
+    semaphore _loading_sem; // protects iteration of _roles map
+    abort_source& _as;
+    permission_loader_func _permission_loader;
+    semaphore _permission_loader_sem; // protects against reload storms on a single role change
+    metrics::metric_groups _metrics;
+    size_t _cached_permissions_count = 0;

    future<lw_shared_ptr<role_record>> fetch_role(const role_name_t& role) const;
    future<> prune_all() noexcept;
    future<> distribute_role(const role_name_t& name, const lw_shared_ptr<role_record> role);
+    future<> gather_inheriting_roles(std::unordered_set<role_name_t>& roles, lw_shared_ptr<cache::role_record> role, const role_name_t& name);
+
+    void add_role(const role_name_t& name, lw_shared_ptr<role_record> role);
+    void remove_role(const role_name_t& name);
+    void remove_role(roles_map::iterator it);
+    void clear_role_permissions(const role_name_t& name);
+    void add_permissions(std::unordered_map<resource, permission_set>& cache, const resource& r, permission_set perms);
+    void remove_permissions(std::unordered_map<resource, permission_set>& cache, const resource& r);
+
+    future<permission_set> load_permissions(const role_or_anonymous& role, const resource& r, std::unordered_map<resource, permission_set>* perms_cache);
 };

 } // namespace auth
--- a/auth/ldap_role_manager.cc
+++ b/auth/ldap_role_manager.cc
@@ -88,10 +88,16 @@ static const class_registrator<

 ldap_role_manager::ldap_role_manager(
        std::string_view query_template, std::string_view target_attr, std::string_view bind_name, std::string_view bind_password,
+        uint32_t permissions_update_interval_in_ms,
+        utils::observer<uint32_t>  permissions_update_interval_in_ms_observer,
        cql3::query_processor& qp, ::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache)
        : _std_mgr(qp, rg0c, mm, cache), _group0_client(rg0c), _query_template(query_template), _target_attr(target_attr), _bind_name(bind_name)
        , _bind_password(bind_password)
-        , _connection_factory(bind(std::mem_fn(&ldap_role_manager::reconnect), std::ref(*this))) {
+        , _permissions_update_interval_in_ms(permissions_update_interval_in_ms)
+        , _permissions_update_interval_in_ms_observer(std::move(permissions_update_interval_in_ms_observer))
+        , _connection_factory(bind(std::mem_fn(&ldap_role_manager::reconnect), std::ref(*this)))
+        , _cache(cache)
+        , _cache_pruner(make_ready_future<>()) {
 }

 ldap_role_manager::ldap_role_manager(cql3::query_processor& qp, ::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache)
@@ -100,6 +106,8 @@ ldap_role_manager::ldap_role_manager(cql3::query_processor& qp, ::service::raft_
            qp.db().get_config().ldap_attr_role(),
            qp.db().get_config().ldap_bind_dn(),
            qp.db().get_config().ldap_bind_passwd(),
+            qp.db().get_config().permissions_update_interval_in_ms(),
+            qp.db().get_config().permissions_update_interval_in_ms.observe([this] (const uint32_t& v) { _permissions_update_interval_in_ms = v; }),
            qp,
            rg0c,
            mm,
@@ -119,6 +127,22 @@ future<> ldap_role_manager::start() {
        return make_exception_future(
                std::runtime_error(fmt::format("error getting LDAP server address from template {}", _query_template)));
    }
+    _cache_pruner = futurize_invoke([this] () -> future<> {
+        while (true) {
+            try {
+                co_await seastar::sleep_abortable(std::chrono::milliseconds(_permissions_update_interval_in_ms), _as);
+            } catch (const seastar::sleep_aborted&) {
+                co_return; // ignore
+            }
+            co_await _cache.container().invoke_on_all([] (cache& c) -> future<> {
+                try {
+                    co_await c.reload_all_permissions();
+                } catch (...) {
+                    mylog.warn("Cache reload all permissions failed: {}", std::current_exception());
+                }
+            });
+        }
+    });
    return _std_mgr.start();
 }

@@ -175,7 +199,11 @@ future<conn_ptr> ldap_role_manager::reconnect() {

 future<> ldap_role_manager::stop() {
    _as.request_abort();
-    return _std_mgr.stop().then([this] { return _connection_factory.stop(); });
+    return std::move(_cache_pruner).then([this] {
+        return _std_mgr.stop();
+    }).then([this] {
+        return _connection_factory.stop();
+    });
 }

 future<> ldap_role_manager::create(std::string_view name, const role_config& config, ::service::group0_batch& mc) {
--- a/auth/ldap_role_manager.hh
+++ b/auth/ldap_role_manager.hh
@@ -10,6 +10,7 @@
 #pragma once

 #include <seastar/core/abort_source.hh>
+#include <seastar/core/future.hh>
 #include <stdexcept>

 #include "ent/ldap/ldap_connection.hh"
@@ -34,14 +35,22 @@ class ldap_role_manager : public role_manager {
    seastar::sstring _target_attr; ///< LDAP entry attribute containing the Scylla role name.
    seastar::sstring _bind_name; ///< Username for LDAP simple bind.
    seastar::sstring _bind_password; ///< Password for LDAP simple bind.
+
+    uint32_t _permissions_update_interval_in_ms;
+    utils::observer<uint32_t> _permissions_update_interval_in_ms_observer;
+
    mutable ldap_reuser _connection_factory; // Potentially modified by query_granted().
    seastar::abort_source _as;
+    cache& _cache;
+    seastar::future<> _cache_pruner;
  public:
    ldap_role_manager(
            std::string_view query_template, ///< LDAP query template as described in Scylla documentation.
            std::string_view target_attr, ///< LDAP entry attribute containing the Scylla role name.
            std::string_view bind_name, ///< LDAP bind credentials.
            std::string_view bind_password, ///< LDAP bind credentials.
+            uint32_t permissions_update_interval_in_ms,
+            utils::observer<uint32_t> permissions_update_interval_in_ms_observer,
            cql3::query_processor& qp, ///< Passed to standard_role_manager.
            ::service::raft_group0_client& rg0c, ///< Passed to standard_role_manager.
            ::service::migration_manager& mm, ///< Passed to standard_role_manager.
--- a/auth/passwords.hh
+++ b/auth/passwords.hh
@@ -76,11 +76,14 @@ sstring generate_salt(RandomNumberEngine& g, scheme scheme) {

 ///
 /// Hash a password combined with an implementation-specific salt string.
-/// Deprecated in favor of `hash_with_salt_async`.
+/// Deprecated in favor of `hash_with_salt_async`. This function is still used
+/// when generating password hashes for storage to ensure that
+/// `hash_with_salt` and `hash_with_salt_async` produce identical results,
+/// preserving backward compatibility.
 ///
 /// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
 ///
-[[deprecated("Use hash_with_salt_async instead")]] sstring hash_with_salt(const sstring& pass, const sstring& salt);
+sstring hash_with_salt(const sstring& pass, const sstring& salt);

 ///
 /// Async version of `hash_with_salt` that returns a future.
--- a/auth/permissions_cache.cc
+++ b/auth/permissions_cache.cc
@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2017-present ScyllaDB
- */
-
-/*
- * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
- */
-
-#include "auth/permissions_cache.hh"
-
-#include <fmt/ranges.h>
-#include "auth/authorizer.hh"
-#include "auth/service.hh"
-
-namespace auth {
-
-permissions_cache::permissions_cache(const utils::loading_cache_config& c, service& ser, logging::logger& log)
-        : _cache(c, log, [&ser, &log](const key_type& k) {
-              log.debug("Refreshing permissions for {}", k.first);
-              return ser.get_uncached_permissions(k.first, k.second);
-          }) {
-}
-
-bool permissions_cache::update_config(utils::loading_cache_config c) {
-    return _cache.update_config(std::move(c));
-}
-
-void permissions_cache::reset() {
-    _cache.reset();
-}
-
-future<permission_set> permissions_cache::get(const role_or_anonymous& maybe_role, const resource& r) {
-    return do_with(key_type(maybe_role, r), [this](const auto& k) {
-        return _cache.get(k);
-    });
-}
-
-}
--- a/auth/permissions_cache.hh
+++ b/auth/permissions_cache.hh
@@ -1,66 +0,0 @@
-/*
- * Copyright (C) 2017-present ScyllaDB
- */
-
-/*
- * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
- */
-
-#pragma once
-
-#include <iostream>
-#include <utility>
-
-#include <fmt/core.h>
-#include <seastar/core/future.hh>
-
-#include "auth/permission.hh"
-#include "auth/resource.hh"
-#include "auth/role_or_anonymous.hh"
-#include "utils/log.hh"
-#include "utils/hash.hh"
-#include "utils/loading_cache.hh"
-
-namespace std {
-
-inline std::ostream& operator<<(std::ostream& os, const pair<auth::role_or_anonymous, auth::resource>& p) {
-    fmt::print(os, "{{role: {}, resource: {}}}", p.first, p.second);
-    return os;
-}
-
-}
-
-namespace db {
-class config;
-}
-
-namespace auth {
-
-class service;
-
-class permissions_cache final {
-    using cache_type = utils::loading_cache<
-            std::pair<role_or_anonymous, resource>,
-            permission_set,
-            1,
-            utils::loading_cache_reload_enabled::yes,
-            utils::simple_entry_size<permission_set>,
-            utils::tuple_hash>;
-
-    using key_type = typename cache_type::key_type;
-
-    cache_type _cache;
-
-public:
-    explicit permissions_cache(const utils::loading_cache_config&, service&, logging::logger&);
-
-    future <> stop() {
-        return _cache.stop();
-    }
-
-    bool update_config(utils::loading_cache_config);
-    void reset();
-    future<permission_set> get(const role_or_anonymous&, const resource&);
-};
-
-}
--- a/auth/service.cc
+++ b/auth/service.cc
@@ -64,11 +64,11 @@ static const sstring superuser_col_name("super");
 static logging::logger log("auth_service");

 class auth_migration_listener final : public ::service::migration_listener {
-    authorizer& _authorizer;
+    service& _service;
    cql3::query_processor& _qp;

 public:
-    explicit auth_migration_listener(authorizer& a, cql3::query_processor& qp) : _authorizer(a),  _qp(qp) {
+    explicit auth_migration_listener(service& s, cql3::query_processor& qp) : _service(s),  _qp(qp) {
    }

 private:
@@ -92,14 +92,14 @@ private:
            return;
        }
        // Do it in the background.
-        (void)do_with(::service::group0_batch::unused(), [this, &ks_name] (auto& mc) mutable {
-            return _authorizer.revoke_all(auth::make_data_resource(ks_name), mc);
+        (void)do_with(auth::make_data_resource(ks_name), ::service::group0_batch::unused(), [this] (auto& r, auto& mc) mutable {
+            return _service.revoke_all(r, mc);
        }).handle_exception([] (std::exception_ptr e) {
            log.error("Unexpected exception while revoking all permissions on dropped keyspace: {}", e);
        });

-        (void)do_with(::service::group0_batch::unused(), [this, &ks_name] (auto& mc) mutable {
-            return _authorizer.revoke_all(auth::make_functions_resource(ks_name), mc);
+        (void)do_with(auth::make_functions_resource(ks_name), ::service::group0_batch::unused(), [this] (auto& r, auto& mc) mutable {
+            return _service.revoke_all(r, mc);
        }).handle_exception([] (std::exception_ptr e) {
            log.error("Unexpected exception while revoking all permissions on functions in dropped keyspace: {}", e);
        });
@@ -111,9 +111,8 @@ private:
            return;
        }
        // Do it in the background.
-        (void)do_with(::service::group0_batch::unused(), [this, &ks_name, &cf_name] (auto& mc) mutable {
-            return _authorizer.revoke_all(
-                    auth::make_data_resource(ks_name, cf_name), mc);
+        (void)do_with(auth::make_data_resource(ks_name, cf_name), ::service::group0_batch::unused(), [this] (auto& r, auto& mc) mutable {
+            return _service.revoke_all(r, mc);
        }).handle_exception([] (std::exception_ptr e) {
            log.error("Unexpected exception while revoking all permissions on dropped table: {}", e);
        });
@@ -126,9 +125,8 @@ private:
            return;
        }
        // Do it in the background.
-        (void)do_with(::service::group0_batch::unused(), [this, &ks_name, &function_name] (auto& mc) mutable {
-            return _authorizer.revoke_all(
-                    auth::make_functions_resource(ks_name, function_name), mc);
+        (void)do_with(auth::make_functions_resource(ks_name, function_name), ::service::group0_batch::unused(), [this] (auto& r, auto& mc) mutable {
+            return _service.revoke_all(r, mc);
        }).handle_exception([] (std::exception_ptr e) {
            log.error("Unexpected exception while revoking all permissions on dropped function: {}", e);
        });
@@ -138,9 +136,8 @@ private:
            // in non legacy path revoke is part of schema change statement execution
            return;
        }
-        (void)do_with(::service::group0_batch::unused(), [this, &ks_name, &aggregate_name] (auto& mc) mutable {
-            return _authorizer.revoke_all(
-                    auth::make_functions_resource(ks_name, aggregate_name), mc);
+        (void)do_with(auth::make_functions_resource(ks_name, aggregate_name), ::service::group0_batch::unused(), [this] (auto& r, auto& mc) mutable {
+            return _service.revoke_all(r, mc);
        }).handle_exception([] (std::exception_ptr e) {
            log.error("Unexpected exception while revoking all permissions on dropped aggregate: {}", e);
        });
@@ -157,7 +154,6 @@ static future<> validate_role_exists(const service& ser, std::string_view role_n
 }

 service::service(
-        utils::loading_cache_config c,
        cache& cache,
        cql3::query_processor& qp,
        ::service::raft_group0_client& g0,
@@ -166,25 +162,17 @@ service::service(
        std::unique_ptr<authenticator> a,
        std::unique_ptr<role_manager> r,
        maintenance_socket_enabled used_by_maintenance_socket)
-            : _loading_cache_config(std::move(c))
-            , _permissions_cache(nullptr)
-            , _cache(cache)
+            : _cache(cache)
            , _qp(qp)
            , _group0_client(g0)
            , _mnotifier(mn)
            , _authorizer(std::move(z))
            , _authenticator(std::move(a))
            , _role_manager(std::move(r))
-            , _migration_listener(std::make_unique<auth_migration_listener>(*_authorizer, qp))
-            , _permissions_cache_cfg_cb([this] (uint32_t) { (void) _permissions_cache_config_action.trigger_later(); })
-            , _permissions_cache_config_action([this] { update_cache_config(); return make_ready_future<>(); })
-            , _permissions_cache_max_entries_observer(_qp.db().get_config().permissions_cache_max_entries.observe(_permissions_cache_cfg_cb))
-            , _permissions_cache_update_interval_in_ms_observer(_qp.db().get_config().permissions_update_interval_in_ms.observe(_permissions_cache_cfg_cb))
-            , _permissions_cache_validity_in_ms_observer(_qp.db().get_config().permissions_validity_in_ms.observe(_permissions_cache_cfg_cb))
+            , _migration_listener(std::make_unique<auth_migration_listener>(*this, qp))
            , _used_by_maintenance_socket(used_by_maintenance_socket) {}

 service::service(
-        utils::loading_cache_config c,
        cql3::query_processor& qp,
        ::service::raft_group0_client& g0,
        ::service::migration_notifier& mn,
@@ -193,7 +181,6 @@ service::service(
        maintenance_socket_enabled used_by_maintenance_socket,
        cache& cache)
            : service(
-                      std::move(c),
                      cache,
                      qp,
                      g0,
@@ -257,7 +244,14 @@ future<> service::start(::service::migration_manager& mm, db::system_keyspace& s
        co_await _role_manager->ensure_superuser_is_created();
    }
    co_await when_all_succeed(_authorizer->start(), _authenticator->start()).discard_result();
-    _permissions_cache = std::make_unique<permissions_cache>(_loading_cache_config, *this, log);
+    if (!_used_by_maintenance_socket) {
+        // Maintenance socket mode can't cache permissions because it has
+        // different authorizer. We can't mix cached permissions, they could be
+        // different in normal mode.
+        _cache.set_permission_loader(std::bind(
+                &service::get_uncached_permissions,
+                this, std::placeholders::_1, std::placeholders::_2));
+    }
    co_await once_among_shards([this] {
        _mnotifier.register_listener(_migration_listener.get());
        return make_ready_future<>();
@@ -269,9 +263,7 @@ future<> service::stop() {
    // Only one of the shards has the listener registered, but let's try to
    // unregister on each one just to make sure.
    return _mnotifier.unregister_listener(_migration_listener.get()).then([this] {
-        if (_permissions_cache) {
-            return _permissions_cache->stop();
-        }
+        _cache.set_permission_loader(nullptr);
        return make_ready_future<>();
    }).then([this] {
        return when_all_succeed(_role_manager->stop(), _authorizer->stop(), _authenticator->stop()).discard_result();
@@ -283,21 +275,8 @@ future<> service::ensure_superuser_is_created() {
    co_await _authenticator->ensure_superuser_is_created();
 }

-void service::update_cache_config() {
-    auto db = _qp.db();
-
-    utils::loading_cache_config perm_cache_config;
-    perm_cache_config.max_size = db.get_config().permissions_cache_max_entries();
-    perm_cache_config.expiry = std::chrono::milliseconds(db.get_config().permissions_validity_in_ms());
-    perm_cache_config.refresh = std::chrono::milliseconds(db.get_config().permissions_update_interval_in_ms());
-
-    if (!_permissions_cache->update_config(std::move(perm_cache_config))) {
-        log.error("Failed to apply permissions cache changes. Please read the documentation of these parameters");
-    }
-}

 void service::reset_authorization_cache() {
-    _permissions_cache->reset();
    _qp.reset_cache();
 }

@@ -322,7 +301,10 @@ service::get_uncached_permissions(const role_or_anonymous& maybe_role, const res
 }

 future<permission_set> service::get_permissions(const role_or_anonymous& maybe_role, const resource& r) const {
-    return _permissions_cache->get(maybe_role, r);
+    if (legacy_mode(_qp) || _used_by_maintenance_socket) {
+        return get_uncached_permissions(maybe_role, r);
+    }
+    return _cache.get_permissions(maybe_role, r);
 }

 future<bool> service::has_superuser(std::string_view role_name, const role_set& roles) const {
@@ -447,6 +429,11 @@ future<bool> service::exists(const resource& r) const {
    return make_ready_future<bool>(false);
 }

+future<> service::revoke_all(const resource& r, ::service::group0_batch& mc) const {
+    co_await _authorizer->revoke_all(r, mc);
+    co_await _cache.prune(r);
+}
+
 future<std::vector<cql3::description>> service::describe_roles(bool with_hashed_passwords) {
    std::vector<cql3::description> result{};

@@ -801,7 +788,7 @@ future<> revoke_permissions(
 }

 future<> revoke_all(const service& ser, const resource& r, ::service::group0_batch& mc) {
-    return ser.underlying_authorizer().revoke_all(r, mc);
+    return ser.revoke_all(r, mc);
 }

 future<std::vector<permission_details>> list_filtered_permissions(
@@ -876,22 +863,6 @@ future<> migrate_to_auth_v2(db::system_keyspace& sys_ks, ::service::raft_group0_
                continue; // some tables might not have been created if they were not used
            }

-            // use longer than usual timeout as we scan the whole table
-            // but not infinite or very long as we want to fail reasonably fast
-            const auto t = 5min;
-            const timeout_config tc{t, t, t, t, t, t, t};
-            ::service::client_state cs(::service::client_state::internal_tag{}, tc);
-            ::service::query_state qs(cs, empty_service_permit());
-
-            auto rows = co_await qp.execute_internal(
-                    seastar::format("SELECT * FROM {}.{}", meta::legacy::AUTH_KS, cf_name),
-                    db::consistency_level::ALL,
-                    qs,
-                    {},
-                    cql3::query_processor::cache_internal::no);
-            if (rows->empty()) {
-                continue;
-            }
            std::vector<sstring> col_names;
            for (const auto& col : schema->all_columns()) {
                col_names.push_back(col.name_as_cql_string());
@@ -900,30 +871,51 @@ future<> migrate_to_auth_v2(db::system_keyspace& sys_ks, ::service::raft_group0_
            for (size_t i = 1; i < col_names.size(); ++i) {
                val_binders_str += ", ?";
            }
-            for (const auto& row : *rows) {
-                std::vector<data_value_or_unset> values;
-                for (const auto& col : schema->all_columns()) {
-                    if (row.has(col.name_as_text())) {
-                        values.push_back(
-                                col.type->deserialize(row.get_blob_unfragmented(col.name_as_text())));
-                    } else {
-                        values.push_back(unset_value{});
+
+            std::vector<mutation> collected;
+            // use longer than usual timeout as we scan the whole table
+            // but not infinite or very long as we want to fail reasonably fast
+            const auto t = 5min;
+            const timeout_config tc{t, t, t, t, t, t, t};
+            ::service::client_state cs(::service::client_state::internal_tag{}, tc);
+            ::service::query_state qs(cs, empty_service_permit());
+
+            co_await qp.query_internal(
+                seastar::format("SELECT * FROM {}.{}", meta::legacy::AUTH_KS, cf_name),
+                db::consistency_level::ALL,
+                {},
+                1000,
+                [&qp, &cf_name, &col_names, &val_binders_str, &schema, ts, &collected] (const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
+                    std::vector<data_value_or_unset> values;
+                    for (const auto& col : schema->all_columns()) {
+                        if (row.has(col.name_as_text())) {
+                            values.push_back(
+                                    col.type->deserialize(row.get_blob_unfragmented(col.name_as_text())));
+                        } else {
+                            values.push_back(unset_value{});
+                        }
                    }
-                }
-                auto muts = co_await qp.get_mutations_internal(
-                        seastar::format("INSERT INTO {}.{} ({}) VALUES ({})",
-                                db::system_keyspace::NAME,
-                                cf_name,
-                                fmt::join(col_names, ", "),
-                                val_binders_str),
-                        internal_distributed_query_state(),
-                        ts,
-                        std::move(values));
-                if (muts.size() != 1) {
-                    on_internal_error(log,
-                            format("expecting single insert mutation, got {}", muts.size()));
-                }
-                co_yield std::move(muts[0]);
+                    auto muts = co_await qp.get_mutations_internal(
+                            seastar::format("INSERT INTO {}.{} ({}) VALUES ({})",
+                                    db::system_keyspace::NAME,
+                                    cf_name,
+                                    fmt::join(col_names, ", "),
+                                    val_binders_str),
+                            internal_distributed_query_state(),
+                            ts,
+                            std::move(values));
+                    if (muts.size() != 1) {
+                        on_internal_error(log,
+                                format("expecting single insert mutation, got {}", muts.size()));
+                    }
+
+                    collected.push_back(std::move(muts[0]));
+                    co_return stop_iteration::no;
+                },
+                std::move(qs));
+
+            for (auto& m : collected) {
+                co_yield std::move(m);
            }
        }
        co_yield co_await sys_ks.make_auth_version_mutation(ts,
--- a/auth/service.hh
+++ b/auth/service.hh
@@ -20,7 +20,6 @@
 #include "auth/authenticator.hh"
 #include "auth/authorizer.hh"
 #include "auth/permission.hh"
-#include "auth/permissions_cache.hh"
 #include "auth/cache.hh"
 #include "auth/role_manager.hh"
 #include "auth/common.hh"
@@ -75,8 +74,6 @@ public:
 /// peering_sharded_service inheritance is needed to be able to access shard local authentication service
 /// given an object from another shard. Used for bouncing lwt requests to correct shard.
 class service final : public seastar::peering_sharded_service<service> {
-    utils::loading_cache_config _loading_cache_config;
-    std::unique_ptr<permissions_cache> _permissions_cache;
    cache& _cache;

    cql3::query_processor& _qp;
@@ -94,20 +91,12 @@ class service final : public seastar::peering_sharded_service<service> {
    // Only one of these should be registered, so we end up with some unused instances. Not the end of the world.
    std::unique_ptr<::service::migration_listener> _migration_listener;

-    std::function<void(uint32_t)> _permissions_cache_cfg_cb;
-    serialized_action _permissions_cache_config_action;
-
-    utils::observer<uint32_t> _permissions_cache_max_entries_observer;
-    utils::observer<uint32_t> _permissions_cache_update_interval_in_ms_observer;
-    utils::observer<uint32_t> _permissions_cache_validity_in_ms_observer;
-
    maintenance_socket_enabled _used_by_maintenance_socket;

    abort_source _as;

 public:
    service(
-            utils::loading_cache_config,
            cache& cache,
            cql3::query_processor&,
            ::service::raft_group0_client&,
@@ -123,7 +112,6 @@ public:
    /// of the instances themselves.
    ///
    service(
-            utils::loading_cache_config,
            cql3::query_processor&,
            ::service::raft_group0_client&,
            ::service::migration_notifier&,
@@ -138,8 +126,6 @@ public:

    future<> ensure_superuser_is_created();

-    void update_cache_config();
-
    void reset_authorization_cache();

    ///
@@ -181,6 +167,13 @@ public:

    future<bool> exists(const resource&) const;

+    ///
+    /// Revoke all permissions granted to any role for a particular resource.
+    ///
+    /// \throws \ref unsupported_authorization_operation if revoking permissions is not supported.
+    ///
+    future<> revoke_all(const resource&, ::service::group0_batch&) const;
+
    ///
    /// Produces descriptions that can be used to restore the state of auth. That encompasses
    /// roles, role grants, and permission grants.
--- a/auth/standard_role_manager.cc
+++ b/auth/standard_role_manager.cc
@@ -52,13 +52,6 @@ static const class_registrator<
        ::service::migration_manager&,
        cache&> registration("org.apache.cassandra.auth.CassandraRoleManager");

-struct record final {
-    sstring name;
-    bool is_superuser;
-    bool can_login;
-    role_set member_of;
-};
-
 static db::consistency_level consistency_for_role(std::string_view role_name) noexcept {
    if (role_name == meta::DEFAULT_SUPERUSER_NAME) {
        return db::consistency_level::QUORUM;
@@ -67,13 +60,13 @@ static db::consistency_level consistency_for_role(std::string_view role_name) no
    return db::consistency_level::LOCAL_ONE;
 }

-static future<std::optional<record>> find_record(cql3::query_processor& qp, std::string_view role_name) {
+future<std::optional<standard_role_manager::record>> standard_role_manager::legacy_find_record(std::string_view role_name) {
    const sstring query = seastar::format("SELECT * FROM {}.{} WHERE {} = ?",
-            get_auth_ks_name(qp),
+            get_auth_ks_name(_qp),
            meta::roles_table::name,
            meta::roles_table::role_col_name);

-    const auto results = co_await qp.execute_internal(
+    const auto results = co_await _qp.execute_internal(
            query,
            consistency_for_role(role_name),
            internal_distributed_query_state(),
@@ -93,8 +86,25 @@ static future<std::optional<record>> find_record(cql3::query_processor& qp, std:
                        : role_set())});
 }

-static future<record> require_record(cql3::query_processor& qp, std::string_view role_name) {
-    return find_record(qp, role_name).then([role_name](std::optional<record> mr) {
+future<std::optional<standard_role_manager::record>> standard_role_manager::find_record(std::string_view role_name) {
+    if (legacy_mode(_qp)) {
+        return legacy_find_record(role_name);
+    }
+    auto name = sstring(role_name);
+    auto role = _cache.get(name);
+    if (!role) {
+        return make_ready_future<std::optional<record>>(std::nullopt);
+    }
+    return make_ready_future<std::optional<record>>(std::make_optional(record{
+        .name = std::move(name),
+        .is_superuser = role->is_superuser,
+        .can_login = role->can_login,
+        .member_of = role->member_of
+    }));
+}
+
+future<standard_role_manager::record> standard_role_manager::require_record(std::string_view role_name) {
+    return find_record(role_name).then([role_name](std::optional<record> mr) {
        if (!mr) {
            throw nonexistant_role(role_name);
        }
@@ -386,7 +396,7 @@ standard_role_manager::alter(std::string_view role_name, const role_config_updat
        return fmt::to_string(fmt::join(assignments, ", "));
    };

-    return require_record(_qp, role_name).then([this, role_name, &u, &mc](record) {
+    return require_record(role_name).then([this, role_name, &u, &mc](record) {
        if (!u.is_superuser && !u.can_login) {
            return make_ready_future<>();
        }
@@ -620,18 +630,17 @@ standard_role_manager::revoke(std::string_view revokee_name, std::string_view ro
    });
 }

-static future<> collect_roles(
-        cql3::query_processor& qp,
+future<> standard_role_manager::collect_roles(
        std::string_view grantee_name,
        bool recurse,
        role_set& roles) {
-    return require_record(qp, grantee_name).then([&qp, &roles, recurse](record r) {
-        return do_with(std::move(r.member_of), [&qp, &roles, recurse](const role_set& memberships) {
-            return do_for_each(memberships.begin(), memberships.end(), [&qp, &roles, recurse](const sstring& role_name) {
+    return require_record(grantee_name).then([this, &roles, recurse](standard_role_manager::record r) {
+        return do_with(std::move(r.member_of), [this, &roles, recurse](const role_set& memberships) {
+            return do_for_each(memberships.begin(), memberships.end(), [this, &roles, recurse](const sstring& role_name) {
                roles.insert(role_name);

                if (recurse) {
-                    return collect_roles(qp, role_name, true, roles);
+                    return collect_roles(role_name, true, roles);
                }

                return make_ready_future<>();
@@ -646,7 +655,7 @@ future<role_set> standard_role_manager::query_granted(std::string_view grantee_n
    return do_with(
            role_set{sstring(grantee_name)},
            [this, grantee_name, recurse](role_set& roles) {
-        return collect_roles(_qp, grantee_name, recurse, roles).then([&roles] { return roles; });
+        return collect_roles(grantee_name, recurse, roles).then([&roles] { return roles; });
    });
 }

@@ -706,27 +715,21 @@ future<role_set> standard_role_manager::query_all(::service::query_state& qs) {
 }

 future<bool> standard_role_manager::exists(std::string_view role_name) {
-    return find_record(_qp, role_name).then([](std::optional<record> mr) {
+    return find_record(role_name).then([](std::optional<record> mr) {
        return static_cast<bool>(mr);
    });
 }

 future<bool> standard_role_manager::is_superuser(std::string_view role_name) {
-    return require_record(_qp, role_name).then([](record r) {
+    return require_record(role_name).then([](record r) {
        return r.is_superuser;
    });
 }

 future<bool> standard_role_manager::can_login(std::string_view role_name) {
-    if (legacy_mode(_qp)) {
-       const auto r = co_await require_record(_qp, role_name);
-       co_return r.can_login;
-    }
-    auto role = _cache.get(sstring(role_name));
-    if (!role) {
-        throw nonexistant_role(role_name);
-    }
-    co_return role->can_login;
+    return require_record(role_name).then([](record r) {
+        return r.can_login;
+    });
 }

 future<std::optional<sstring>> standard_role_manager::get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state& qs) {
--- a/auth/standard_role_manager.hh
+++ b/auth/standard_role_manager.hh
@@ -90,6 +90,12 @@ public:

 private:
    enum class membership_change { add, remove };
+    struct record final {
+        sstring name;
+        bool is_superuser;
+        bool can_login;
+        role_set member_of;
+    };

    future<> create_legacy_metadata_tables_if_missing() const;

@@ -107,6 +113,14 @@ private:
    future<> legacy_modify_membership(std::string_view role_name, std::string_view grantee_name, membership_change);

    future<> modify_membership(std::string_view role_name, std::string_view grantee_name, membership_change, ::service::group0_batch& mc);
+
+    future<std::optional<record>> legacy_find_record(std::string_view role_name);
+    future<std::optional<record>> find_record(std::string_view role_name);
+    future<record> require_record(std::string_view role_name);
+    future<> collect_roles(
+            std::string_view grantee_name,
+            bool recurse,
+            role_set& roles);
 };

 } // namespace auth
--- a/cdc/generation.cc
+++ b/cdc/generation.cc
@@ -204,7 +204,7 @@ future<topology_description> topology_description::clone_async() const {

    for (const auto& entry : _entries) {
        vec.push_back(entry);
-        co_await seastar::maybe_yield();
+        co_await coroutine::maybe_yield();
    }

    co_return topology_description{std::move(vec)};
@@ -814,8 +814,7 @@ generation_service::generation_service(
            config cfg, gms::gossiper& g, sharded<db::system_distributed_keyspace>& sys_dist_ks,
            sharded<db::system_keyspace>& sys_ks,
            abort_source& abort_src, const locator::shared_token_metadata& stm, gms::feature_service& f,
-            replica::database& db,
-            std::function<bool()> raft_topology_change_enabled)
+            replica::database& db)
        : _cfg(std::move(cfg))
        , _gossiper(g)
        , _sys_dist_ks(sys_dist_ks)
@@ -824,7 +823,6 @@ generation_service::generation_service(
        , _token_metadata(stm)
        , _feature_service(f)
        , _db(db)
-        , _raft_topology_change_enabled(std::move(raft_topology_change_enabled))
 {
 }

@@ -878,16 +876,7 @@ future<> generation_service::on_join(gms::inet_address ep, locator::host_id id,
 future<> generation_service::on_change(gms::inet_address ep, locator::host_id id, const gms::application_state_map& states, gms::permit_id pid) {
    assert_shard_zero(__PRETTY_FUNCTION__);

-    if (_raft_topology_change_enabled()) {
-        return make_ready_future<>();
-    }
-
-    return on_application_state_change(ep, id, states, gms::application_state::CDC_GENERATION_ID, pid, [this] (gms::inet_address ep, locator::host_id id, const gms::versioned_value& v, gms::permit_id) {
-        auto gen_id = gms::versioned_value::cdc_generation_id_from_string(v.value());
-        cdc_log.debug("Endpoint: {}, CDC generation ID change: {}", ep, gen_id);
-
-        return legacy_handle_cdc_generation(gen_id);
-    });
+    return make_ready_future<>();
 }

 future<> generation_service::check_and_repair_cdc_streams() {
--- a/cdc/generation_service.hh
+++ b/cdc/generation_service.hh
@@ -79,17 +79,12 @@ private:
    std::optional<cdc::generation_id> _gen_id;
    future<> _cdc_streams_rewrite_complete = make_ready_future<>();

-    /* Returns true if raft topology changes are enabled.
-     * Can only be called from shard 0.
-     */
-    std::function<bool()> _raft_topology_change_enabled;
 public:
    generation_service(config cfg, gms::gossiper&,
            sharded<db::system_distributed_keyspace>&,
            sharded<db::system_keyspace>& sys_ks,
            abort_source&, const locator::shared_token_metadata&,
-            gms::feature_service&, replica::database& db,
-            std::function<bool()> raft_topology_change_enabled);
+            gms::feature_service&, replica::database& db);

    future<> stop();
    ~generation_service();
--- a/cdc/split.cc
+++ b/cdc/split.cc
@@ -15,7 +15,7 @@
 #include "mutation/tombstone.hh"
 #include "schema/schema.hh"

-#include "seastar/core/sstring.hh"
+#include <seastar/core/sstring.hh>
 #include "types/concrete_types.hh"
 #include "types/types.hh"
 #include "types/user.hh"
--- a/compaction/compaction_manager.cc
+++ b/compaction/compaction_manager.cc
@@ -1519,7 +1519,9 @@ future<> compaction_manager::maybe_wait_for_sstable_count_reduction(compaction_g
            | std::views::transform(std::mem_fn(&sstables::sstable::run_identifier))
            | std::ranges::to<std::unordered_set>());
    };
-    const auto threshold = size_t(std::max(schema->max_compaction_threshold(), 32));
+    const auto injected_threshold = utils::get_local_injector().inject_parameter<size_t>("set_sstable_count_reduction_threshold");
+    const auto threshold = injected_threshold.value_or(size_t(std::max(schema->max_compaction_threshold(), 32)));
+
    auto count = co_await num_runs_for_compaction();
    if (count <= threshold) {
        cmlog.trace("No need to wait for sstable count reduction in {}: {} <= {}",
@@ -1534,9 +1536,7 @@ future<> compaction_manager::maybe_wait_for_sstable_count_reduction(compaction_g
    auto& cstate = get_compaction_state(&t);
    try {
        while (can_perform_regular_compaction(t) && co_await num_runs_for_compaction() > threshold) {
-            co_await cstate.compaction_done.wait([this, &t] {
-                return !can_perform_regular_compaction(t);
-            });
+            co_await cstate.compaction_done.when();
        }
    } catch (const broken_condition_variable&) {
        co_return;
@@ -2296,7 +2296,7 @@ compaction_manager::maybe_split_new_sstable(sstables::shared_sstable sst, compac
    }
    // Throw an error if split cannot be performed due to e.g. out of space prevention.
    // We don't want to prevent split because compaction is temporarily disabled on a view only for synchronization,
-    // which is uneeded against new sstables that aren't part of any set yet, so never use can_proceed(&t) here.
+    // which is unneeded against new sstables that aren't part of any set yet, so never use can_proceed(&t) here.
    if (is_disabled()) {
        co_return coroutine::exception(std::make_exception_ptr(std::runtime_error(format("Cannot split {} because manager has compaction disabled, " \
                                                                                         "reason might be out of space prevention", sst->get_filename()))));
--- a/conf/scylla.yaml
+++ b/conf/scylla.yaml
@@ -299,13 +299,11 @@ batch_size_fail_threshold_in_kb: 1024
 # max_hint_window_in_ms: 10800000 # 3 hours


-# Validity period for permissions cache (fetching permissions can be an
-# expensive operation depending on the authorizer, CassandraAuthorizer is
-# one example). Defaults to 10000, set to 0 to disable.
+# Validity period for authorized statements cache. Defaults to 10000, set to 0 to disable.
 # Will be disabled automatically for AllowAllAuthorizer.
 # permissions_validity_in_ms: 10000

-# Refresh interval for permissions cache (if enabled).
+# Refresh interval for authorized statements cache.
 # After this interval, cache entries become eligible for refresh. Upon next
 # access, an async reload is scheduled and the old value returned until it
 # completes. If permissions_validity_in_ms is non-zero, then this also must have
@@ -566,15 +564,16 @@ commitlog_total_space_in_mb: -1
 # prometheus_address: 1.2.3.4

 # audit settings
-# By default, Scylla does not audit anything.
+# Table audit is enabled by default.
 # 'audit' config option controls if and where to output audited events:
-#   - "none": auditing is disabled (default)
-#   - "table": save audited events in audit.audit_log column family
+#   - "none": auditing is disabled
+#   - "table": save audited events in audit.audit_log column family (default)
 #   - "syslog": send audited events via syslog (depends on OS, but usually to /dev/log)
-# audit: "none"
+audit: "table"
 #
 # List of statement categories that should be audited.
-# audit_categories: "DCL,DDL,AUTH"
+# Possible categories are: QUERY, DML, DCL, DDL, AUTH, ADMIN
+audit_categories: "DCL,AUTH,ADMIN"
 #
 # List of tables that should be audited.
 # audit_tables: "<keyspace_name>.<table_name>,<keyspace_name>.<table_name>"
@@ -875,7 +874,16 @@ maintenance_socket: ignore
 # The `tablets` option cannot be changed using `ALTER KEYSPACE`.
 tablets_mode_for_new_keyspaces: enabled

-# Enforce RF-rack-valid keyspaces.
+# Require every tablet-enabled keyspace to be RF-rack-valid.
+#
+# A tablet-enabled keyspace is RF-rack-valid when, for each data center,
+# its replication factor (RF) is 0, 1, or exactly equal to the number of
+# racks in that data center. Setting the RF to the number of racks ensures
+# that a single rack failure never results in data unavailability.
+#
+# When set to true, CREATE KEYSPACE and ALTER KEYSPACE statements that
+# would produce an RF-rack-invalid keyspace are rejected.
+# When set to false, such statements are allowed but emit a warning.
 rf_rack_valid_keyspaces: false

 #
--- a/configure.py
+++ b/configure.py
@@ -368,6 +368,87 @@ def find_ninja():
    sys.exit(1)


+def find_compiler(name):
+    """
+    Find a compiler by name, skipping ccache wrapper directories.
+
+    This is useful when using sccache to avoid double-caching through ccache.
+
+    Args:
+        name: The compiler name (e.g., 'clang++', 'clang', 'gcc')
+
+    Returns:
+        Path to the compiler, skipping ccache directories, or None if not found.
+    """
+    ccache_dirs = {'/usr/lib/ccache', '/usr/lib64/ccache'}
+    for path_dir in os.environ.get('PATH', '').split(os.pathsep):
+        # Skip ccache wrapper directories
+        if os.path.realpath(path_dir) in ccache_dirs or path_dir in ccache_dirs:
+            continue
+        candidate = os.path.join(path_dir, name)
+        if os.path.isfile(candidate) and os.access(candidate, os.X_OK):
+            return candidate
+    return None
+
+
+def resolve_compilers_for_compiler_cache(args, compiler_cache):
+    """
+    When using a compiler cache, resolve compiler paths to avoid ccache directories.
+
+    This prevents double-caching when ccache symlinks are in PATH.
+
+    Args:
+        args: The argument namespace with cc and cxx attributes.
+        compiler_cache: Path to the compiler cache binary, or None.
+    """
+    if not compiler_cache:
+        return
+    if not os.path.isabs(args.cxx):
+        real_cxx = find_compiler(args.cxx)
+        if real_cxx:
+            args.cxx = real_cxx
+    if not os.path.isabs(args.cc):
+        real_cc = find_compiler(args.cc)
+        if real_cc:
+            args.cc = real_cc
+
+
+def find_compiler_cache(preference):
+    """
+    Find a compiler cache based on the preference.
+
+    Args:
+        preference: One of 'auto', 'sccache', 'ccache', 'none', or a path to a binary.
+
+    Returns:
+        Path to the compiler cache binary, or None if not found/disabled.
+    """
+    if preference == 'none':
+        return None
+
+    if preference == 'auto':
+        # Prefer sccache over ccache
+        for cache in ['sccache', 'ccache']:
+            path = which(cache)
+            if path:
+                return path
+        return None
+
+    if preference in ('sccache', 'ccache'):
+        path = which(preference)
+        if path:
+            return path
+        print(f"Warning: {preference} not found on PATH, disabling compiler cache")
+        return None
+
+    # Assume it's a path to a binary
+    if os.path.isfile(preference) and os.access(preference, os.X_OK):
+        return preference
+
+    print(f"Warning: compiler cache '{preference}' not found or not executable, disabling compiler cache")
+    return None
+
+
 modes = {
    'debug': {
        'cxxflags': '-DDEBUG -DSANITIZE -DDEBUG_LSA_SANITIZER -DSCYLLA_ENABLE_ERROR_INJECTION',
@@ -644,29 +725,9 @@ raft_tests = set([
 vector_search_tests = set([
    'test/vector_search/vector_store_client_test',
    'test/vector_search/load_balancer_test',
-    'test/vector_search/client_test'
-])
-
-vector_search_validator_bin = 'vector-search-validator/bin/vector-search-validator'
-vector_search_validator_deps = set([
-    'test/vector_search_validator/build-validator',
-    'test/vector_search_validator/Cargo.toml',
-    'test/vector_search_validator/crates/validator/Cargo.toml',
-    'test/vector_search_validator/crates/validator/src/main.rs',
-    'test/vector_search_validator/crates/validator-scylla/Cargo.toml',
-    'test/vector_search_validator/crates/validator-scylla/src/lib.rs',
-    'test/vector_search_validator/crates/validator-scylla/src/cql.rs',
-])
-
-vector_store_bin = 'vector-search-validator/bin/vector-store'
-vector_store_deps = set([
-    'test/vector_search_validator/build-env',
-    'test/vector_search_validator/build-vector-store',
-])
-
-vector_search_validator_bins = set([
-    vector_search_validator_bin,
-    vector_store_bin,
+    'test/vector_search/client_test',
+    'test/vector_search/filter_test',
+    'test/vector_search/rescoring_test'
 ])

 wasms = set([
@@ -702,7 +763,7 @@ other = set([
    'iotune',
 ])

-all_artifacts = apps | cpp_apps | tests | other | wasms | vector_search_validator_bins
+all_artifacts = apps | cpp_apps | tests | other | wasms

 arg_parser = argparse.ArgumentParser('Configure scylla', add_help=False, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 arg_parser.add_argument('--out', dest='buildfile', action='store', default='build.ninja',
@@ -732,6 +793,11 @@ arg_parser.add_argument('--compiler', action='store', dest='cxx', default='clang
                        help='C++ compiler path')
 arg_parser.add_argument('--c-compiler', action='store', dest='cc', default='clang',
                        help='C compiler path')
+arg_parser.add_argument('--compiler-cache', action='store', dest='compiler_cache', default='auto',
+                        help='Compiler cache to use: auto (default, prefers sccache), sccache, ccache, none, or a path to a binary')
+# Workaround for https://github.com/mozilla/sccache/issues/2575
+arg_parser.add_argument('--sccache-rust', action=argparse.BooleanOptionalAction, default=False,
+                        help='Use sccache for rust code (if sccache is selected as compiler cache). Doesn\'t work with distributed builds.')
 add_tristate(arg_parser, name='dpdk', dest='dpdk', default=False,
                        help='Use dpdk (from seastar dpdk sources)')
 arg_parser.add_argument('--dpdk-target', action='store', dest='dpdk_target', default='',
@@ -862,8 +928,7 @@ scylla_core = (['message/messaging_service.cc',
                'utils/crypt_sha512.cc',
                'utils/logalloc.cc',
                'utils/large_bitset.cc',
-                'utils/buffer_input_stream.cc',
-                'utils/limiting_data_source.cc',
+                'test/lib/limiting_data_source.cc',
                'utils/updateable_value.cc',
                'message/dictionary_service.cc',
                'utils/directories.cc',
@@ -951,6 +1016,10 @@ scylla_core = (['message/messaging_service.cc',
                'cql3/functions/aggregate_fcts.cc',
                'cql3/functions/castas_fcts.cc',
                'cql3/functions/error_injection_fcts.cc',
+                'cql3/statements/strong_consistency/modification_statement.cc',
+                'cql3/statements/strong_consistency/select_statement.cc',
+                'cql3/statements/strong_consistency/statement_helpers.cc',
+                'cql3/functions/vector_similarity_fcts.cc',
                'cql3/statements/cf_prop_defs.cc',
                'cql3/statements/cf_statement.cc',
                'cql3/statements/authentication_statement.cc',
@@ -975,8 +1044,8 @@ scylla_core = (['message/messaging_service.cc',
                'cql3/statements/raw/parsed_statement.cc',
                'cql3/statements/property_definitions.cc',
                'cql3/statements/update_statement.cc',
-                'cql3/statements/strongly_consistent_modification_statement.cc',
-                'cql3/statements/strongly_consistent_select_statement.cc',
+                'cql3/statements/broadcast_modification_statement.cc',
+                'cql3/statements/broadcast_select_statement.cc',
                'cql3/statements/delete_statement.cc',
                'cql3/statements/prune_materialized_view_statement.cc',
                'cql3/statements/batch_statement.cc',
@@ -1008,6 +1077,7 @@ scylla_core = (['message/messaging_service.cc',
                'cql3/statements/list_service_level_attachments_statement.cc',
                'cql3/statements/list_effective_service_level_statement.cc',
                'cql3/statements/describe_statement.cc',
+                'cql3/statements/view_prop_defs.cc',
                'cql3/update_parameters.cc',
                'cql3/util.cc',
                'cql3/ut_name.cc',
@@ -1104,6 +1174,7 @@ scylla_core = (['message/messaging_service.cc',
                'utils/gz/crc_combine.cc',
                'utils/gz/crc_combine_table.cc',
                'utils/http.cc',
+                'utils/http_client_error_processing.cc',
                'utils/rest/client.cc',
                'utils/s3/aws_error.cc',
                'utils/s3/client.cc',
@@ -1121,6 +1192,7 @@ scylla_core = (['message/messaging_service.cc',
                'utils/azure/identity/default_credentials.cc',
                'utils/gcp/gcp_credentials.cc',
                'utils/gcp/object_storage.cc',
+                'utils/gcp/object_storage_retry_strategy.cc',
                'gms/version_generator.cc',
                'gms/versioned_value.cc',
                'gms/gossiper.cc',
@@ -1205,7 +1277,6 @@ scylla_core = (['message/messaging_service.cc',
                'auth/passwords.cc',
                'auth/password_authenticator.cc',
                'auth/permission.cc',
-                'auth/permissions_cache.cc',
                'auth/service.cc',
                'auth/standard_role_manager.cc',
                'auth/ldap_role_manager.cc',
@@ -1266,6 +1337,9 @@ scylla_core = (['message/messaging_service.cc',
                'lang/wasm.cc',
                'lang/wasm_alien_thread_runner.cc',
                'lang/wasm_instance_cache.cc',
+                'service/strong_consistency/groups_manager.cc',
+                'service/strong_consistency/coordinator.cc',
+                'service/strong_consistency/state_machine.cc',
                'service/raft/group0_state_id_handler.cc',
                'service/raft/group0_state_machine.cc',
                'service/raft/group0_state_machine_merger.cc',
@@ -1287,7 +1361,6 @@ scylla_core = (['message/messaging_service.cc',
                'service/topology_state_machine.cc',
                'service/topology_mutation.cc',
                'service/topology_coordinator.cc',
-                'node_ops/node_ops_ctl.cc',
                'node_ops/task_manager_module.cc',
                'reader_concurrency_semaphore_group.cc',
                'utils/disk_space_monitor.cc',
@@ -1295,6 +1368,7 @@ scylla_core = (['message/messaging_service.cc',
                'vector_search/dns.cc',
                'vector_search/client.cc',
                'vector_search/clients.cc',
+                'vector_search/filter.cc',
                'vector_search/truststore.cc'
                ] + [Antlr3Grammar('cql3/Cql.g')] \
                  + scylla_raft_core
@@ -1370,6 +1444,7 @@ alternator = [
       'alternator/auth.cc',
       'alternator/streams.cc',
       'alternator/ttl.cc',
+       'alternator/http_compression.cc'
 ]

 idls = ['idl/gossip_digest.idl.hh',
@@ -1403,6 +1478,7 @@ idls = ['idl/gossip_digest.idl.hh',
        'idl/hinted_handoff.idl.hh',
        'idl/storage_proxy.idl.hh',
        'idl/sstables.idl.hh',
+        'idl/strong_consistency/state_machine.idl.hh',
        'idl/group0_state_machine.idl.hh',
        'idl/mapreduce_request.idl.hh',
        'idl/replica_exception.idl.hh',
@@ -1461,6 +1537,7 @@ scylla_perfs = ['test/perf/perf_alternator.cc',
                'test/perf/perf_fast_forward.cc',
                'test/perf/perf_row_cache_update.cc',
                'test/perf/perf_simple_query.cc',
+                'test/perf/perf_cql_raw.cc',
                'test/perf/perf_sstable.cc',
                'test/perf/perf_tablets.cc',
                'test/perf/tablet_load_balancing.cc',
@@ -1568,6 +1645,7 @@ for t in sorted(perf_tests):

 deps['test/boost/combined_tests'] += [
    'test/boost/aggregate_fcts_test.cc',
+    'test/boost/auth_cache_test.cc',
    'test/boost/auth_test.cc',
    'test/boost/batchlog_manager_test.cc',
    'test/boost/cache_algorithm_test.cc',
@@ -1615,6 +1693,7 @@ deps['test/boost/combined_tests'] += [
    'test/boost/schema_registry_test.cc',
    'test/boost/secondary_index_test.cc',
    'test/boost/sessions_test.cc',
+    'test/boost/simple_value_with_expiry_test.cc',
    'test/boost/sstable_compaction_test.cc',
    'test/boost/sstable_compressor_factory_test.cc',
    'test/boost/sstable_compression_config_test.cc',
@@ -1697,6 +1776,8 @@ deps['test/raft/discovery_test'] =  ['test/raft/discovery_test.cc',
 deps['test/vector_search/vector_store_client_test'] =  ['test/vector_search/vector_store_client_test.cc'] + scylla_tests_dependencies
 deps['test/vector_search/load_balancer_test'] = ['test/vector_search/load_balancer_test.cc'] + scylla_tests_dependencies
 deps['test/vector_search/client_test'] = ['test/vector_search/client_test.cc'] + scylla_tests_dependencies
+deps['test/vector_search/filter_test'] = ['test/vector_search/filter_test.cc'] + scylla_tests_dependencies
+deps['test/vector_search/rescoring_test'] = ['test/vector_search/rescoring_test.cc'] + scylla_tests_dependencies

 boost_tests_prefixes = ["test/boost/", "test/vector_search/", "test/raft/", "test/manual/", "test/ldap/"]

@@ -2014,7 +2095,7 @@ def semicolon_separated(*flags):
 def real_relpath(path, start):
    return os.path.relpath(os.path.realpath(path), os.path.realpath(start))

-def configure_seastar(build_dir, mode, mode_config):
+def configure_seastar(build_dir, mode, mode_config, compiler_cache=None):
    seastar_cxx_ld_flags = mode_config['cxx_ld_flags']
    # We want to "undo" coverage for seastar if we have it enabled.
    if args.coverage:
@@ -2061,6 +2142,10 @@ def configure_seastar(build_dir, mode, mode_config):
        '-DSeastar_IO_URING=ON',
    ]

+    if compiler_cache:
+        seastar_cmake_args += [f'-DCMAKE_CXX_COMPILER_LAUNCHER={compiler_cache}',
+                               f'-DCMAKE_C_COMPILER_LAUNCHER={compiler_cache}']
+
    if args.stack_guards is not None:
        stack_guards = 'ON' if args.stack_guards else 'OFF'
        seastar_cmake_args += ['-DSeastar_STACK_GUARDS={}'.format(stack_guards)]
@@ -2092,7 +2177,7 @@ def configure_seastar(build_dir, mode, mode_config):
    subprocess.check_call(seastar_cmd, shell=False, cwd=cmake_dir)


-def configure_abseil(build_dir, mode, mode_config):
+def configure_abseil(build_dir, mode, mode_config, compiler_cache=None):
    abseil_cflags = mode_config['lib_cflags']
    cxx_flags = mode_config['cxxflags']
    if '-DSANITIZE' in cxx_flags:
@@ -2118,6 +2203,10 @@ def configure_abseil(build_dir, mode, mode_config):
        '-DABSL_PROPAGATE_CXX_STD=ON',
    ]

+    if compiler_cache:
+        abseil_cmake_args += [f'-DCMAKE_CXX_COMPILER_LAUNCHER={compiler_cache}',
+                              f'-DCMAKE_C_COMPILER_LAUNCHER={compiler_cache}']
+
    cmake_args = abseil_cmake_args[:]
    abseil_build_dir = os.path.join(build_dir, mode, 'abseil')
    abseil_cmd = ['cmake', '-G', 'Ninja', real_relpath('abseil', abseil_build_dir)] + cmake_args
@@ -2290,10 +2379,15 @@ def write_build_file(f,
                     scylla_product,
                     scylla_version,
                     scylla_release,
+                     compiler_cache,
                     args):
    use_precompiled_header = not args.disable_precompiled_header
    warnings = get_warning_options(args.cxx)
    rustc_target = pick_rustc_target('wasm32-wasi', 'wasm32-wasip1')
+    # If compiler cache is available, prefix the compiler with it
+    cxx_with_cache = f'{compiler_cache} {args.cxx}' if compiler_cache else args.cxx
+    # For Rust, sccache is used via RUSTC_WRAPPER environment variable
+    rustc_wrapper = f'RUSTC_WRAPPER={compiler_cache} ' if compiler_cache and 'sccache' in compiler_cache and args.sccache_rust else ''
    f.write(textwrap.dedent('''\
        configure_args = {configure_args}
        builddir = {outdir}
@@ -2356,7 +2450,7 @@ def write_build_file(f,
            command = clang --target=wasm32 --no-standard-libraries -Wl,--export-all -Wl,--no-entry $in -o $out
            description = C2WASM $out
        rule rust2wasm
-            command = cargo build --target={rustc_target} --example=$example --locked --manifest-path=test/resource/wasm/rust/Cargo.toml --target-dir=$builddir/wasm/ $
+            command = {rustc_wrapper}cargo build --target={rustc_target} --example=$example --locked --manifest-path=test/resource/wasm/rust/Cargo.toml --target-dir=$builddir/wasm/ $
                && wasm-opt -Oz $builddir/wasm/{rustc_target}/debug/examples/$example.wasm -o $builddir/wasm/$example.wasm $
                && wasm-strip $builddir/wasm/$example.wasm
            description = RUST2WASM $out
@@ -2372,7 +2466,7 @@ def write_build_file(f,
          command = llvm-profdata merge $in -output=$out
        ''').format(configure_args=configure_args,
                    outdir=outdir,
-                    cxx=args.cxx,
+                    cxx=cxx_with_cache,
                    user_cflags=user_cflags,
                    warnings=warnings,
                    defines=defines,
@@ -2380,6 +2474,7 @@ def write_build_file(f,
                    user_ldflags=user_ldflags,
                    libs=libs,
                    rustc_target=rustc_target,
+                    rustc_wrapper=rustc_wrapper,
                    link_pool_depth=link_pool_depth,
                    seastar_path=args.seastar_path,
                    ninja=ninja,
@@ -2464,16 +2559,15 @@ def write_build_file(f,
              description = TEST {mode}
            # This rule is unused for PGO stages. They use the rust lib from the parent mode.
            rule rust_lib.{mode}
-              command = CARGO_BUILD_DEP_INFO_BASEDIR='.' cargo build --locked --manifest-path=rust/Cargo.toml --target-dir=$builddir/{mode} --profile=rust-{mode} $
+              command = CARGO_BUILD_DEP_INFO_BASEDIR='.' {rustc_wrapper}cargo build --locked --manifest-path=rust/Cargo.toml --target-dir=$builddir/{mode} --profile=rust-{mode} $
                        && touch $out
              description = RUST_LIB $out
-            ''').format(mode=mode, antlr3_exec=args.antlr3_exec, fmt_lib=fmt_lib, test_repeat=args.test_repeat, test_timeout=args.test_timeout, **modeval))
+            ''').format(mode=mode, antlr3_exec=args.antlr3_exec, fmt_lib=fmt_lib, test_repeat=args.test_repeat, test_timeout=args.test_timeout, rustc_wrapper=rustc_wrapper, **modeval))
        f.write(
-            'build {mode}-build: phony {artifacts} {wasms} {vector_search_validator_bins}\n'.format(
+            'build {mode}-build: phony {artifacts} {wasms}\n'.format(
                mode=mode,
-                artifacts=str.join(' ', ['$builddir/' + mode + '/' + x for x in sorted(build_artifacts - wasms - vector_search_validator_bins)]),
+                artifacts=str.join(' ', ['$builddir/' + mode + '/' + x for x in sorted(build_artifacts - wasms)]),
                wasms = str.join(' ', ['$builddir/' + x for x in sorted(build_artifacts & wasms)]),
-                vector_search_validator_bins=str.join(' ', ['$builddir/' + x for x in sorted(build_artifacts & vector_search_validator_bins)]),
            )
        )
        if profile_recipe := modes[mode].get('profile_recipe'):
@@ -2503,7 +2597,7 @@ def write_build_file(f,
                continue
            profile_dep = modes[mode].get('profile_target', "")

-            if binary in other or binary in wasms or binary in vector_search_validator_bins:
+            if binary in other or binary in wasms:
                continue
            srcs = deps[binary]
            # 'scylla'
@@ -2531,7 +2625,7 @@ def write_build_file(f,
                # In debug/sanitize modes, we compile with fsanitizers,
                # so must use the same options during the link:
                if '-DSANITIZE' in modes[mode]['cxxflags']:
-                    f.write('   libs = -fsanitize=address -fsanitize=undefined\n')
+                    f.write('   libs = -fsanitize=address -fsanitize=undefined -lubsan\n')
                else:
                    f.write('   libs =\n')
                f.write(f'build $builddir/{mode}/{binary}.stripped: strip $builddir/{mode}/{binary}\n')
@@ -2614,11 +2708,10 @@ def write_build_file(f,
        )

        f.write(
-            'build {mode}-test: test.{mode} {test_executables} $builddir/{mode}/scylla {wasms} {vector_search_validator_bins} \n'.format(
+            'build {mode}-test: test.{mode} {test_executables} $builddir/{mode}/scylla {wasms}\n'.format(
                mode=mode,
                test_executables=' '.join(['$builddir/{}/{}'.format(mode, binary) for binary in sorted(tests)]),
                wasms=' '.join([f'$builddir/{binary}' for binary in sorted(wasms)]),
-                vector_search_validator_bins=' '.join([f'$builddir/{binary}' for binary in sorted(vector_search_validator_bins)]),
            )
        )
        f.write(
@@ -2705,38 +2798,35 @@ def write_build_file(f,

        seastar_dep = f'$builddir/{mode}/seastar/libseastar.{seastar_lib_ext}'
        seastar_testing_dep = f'$builddir/{mode}/seastar/libseastar_testing.{seastar_lib_ext}'
-        f.write('build {seastar_dep}: ninja $builddir/{mode}/seastar/build.ninja | always {profile_dep}\n'
-                .format(**locals()))
+        f.write(f'build {seastar_dep}: ninja $builddir/{mode}/seastar/build.ninja | always {profile_dep}\n')
        f.write('  pool = submodule_pool\n')
-        f.write('  subdir = $builddir/{mode}/seastar\n'.format(**locals()))
-        f.write('  target = seastar\n'.format(**locals()))
-        f.write('build {seastar_testing_dep}: ninja $builddir/{mode}/seastar/build.ninja | always {profile_dep}\n'
-                .format(**locals()))
+        f.write(f'  subdir = $builddir/{mode}/seastar\n')
+        f.write('  target = seastar\n')
+        f.write(f'build {seastar_testing_dep}: ninja $builddir/{mode}/seastar/build.ninja | always {profile_dep}\n')
        f.write('  pool = submodule_pool\n')
-        f.write('  subdir = $builddir/{mode}/seastar\n'.format(**locals()))
-        f.write('  target = seastar_testing\n'.format(**locals()))
-        f.write('  profile_dep = {profile_dep}\n'.format(**locals()))
+        f.write(f'  subdir = $builddir/{mode}/seastar\n')
+        f.write('  target = seastar_testing\n')
+        f.write(f'  profile_dep = {profile_dep}\n')

        for lib in abseil_libs:
-            f.write('build $builddir/{mode}/abseil/{lib}: ninja $builddir/{mode}/abseil/build.ninja | always {profile_dep}\n'.format(**locals()))
-            f.write('  pool = submodule_pool\n')
-            f.write('  subdir = $builddir/{mode}/abseil\n'.format(**locals()))
-            f.write('  target = {lib}\n'.format(**locals()))
-            f.write('  profile_dep = {profile_dep}\n'.format(**locals()))
+            f.write(f'build $builddir/{mode}/abseil/{lib}: ninja $builddir/{mode}/abseil/build.ninja | always {profile_dep}\n')
+            f.write(f'  pool = submodule_pool\n')
+            f.write(f'  subdir = $builddir/{mode}/abseil\n')
+            f.write(f'  target = {lib}\n')
+            f.write(f'  profile_dep = {profile_dep}\n')

        f.write(f'build $builddir/{mode}/stdafx.hh.pch: cxx_build_precompiled_header.{mode} stdafx.hh | {profile_dep} {seastar_dep} {abseil_dep} {gen_headers_dep} {pch_dep}\n')

-        f.write('build $builddir/{mode}/seastar/apps/iotune/iotune: ninja $builddir/{mode}/seastar/build.ninja | $builddir/{mode}/seastar/libseastar.{seastar_lib_ext}\n'
-                .format(**locals()))
+        f.write(f'build $builddir/{mode}/seastar/apps/iotune/iotune: ninja $builddir/{mode}/seastar/build.ninja | $builddir/{mode}/seastar/libseastar.{seastar_lib_ext}\n')
        f.write('  pool = submodule_pool\n')
-        f.write('  subdir = $builddir/{mode}/seastar\n'.format(**locals()))
-        f.write('  target = iotune\n'.format(**locals()))
-        f.write('  profile_dep = {profile_dep}\n'.format(**locals()))
-        f.write(textwrap.dedent('''\
+        f.write(f'  subdir = $builddir/{mode}/seastar\n')
+        f.write('  target = iotune\n')
+        f.write(f'  profile_dep = {profile_dep}\n')
+        f.write(textwrap.dedent(f'''\
            build $builddir/{mode}/iotune: copy $builddir/{mode}/seastar/apps/iotune/iotune
            build $builddir/{mode}/iotune.stripped: strip $builddir/{mode}/iotune
            build $builddir/{mode}/iotune.debug: phony $builddir/{mode}/iotune.stripped
-            ''').format(**locals()))
+            '''))
        if args.dist_only:
            include_scylla_and_iotune = ''
            include_scylla_and_iotune_stripped = ''
@@ -2745,16 +2835,16 @@ def write_build_file(f,
            include_scylla_and_iotune = f'$builddir/{mode}/scylla $builddir/{mode}/iotune $builddir/{mode}/patchelf'
            include_scylla_and_iotune_stripped = f'$builddir/{mode}/scylla.stripped $builddir/{mode}/iotune.stripped $builddir/{mode}/patchelf.stripped'
            include_scylla_and_iotune_debug = f'$builddir/{mode}/scylla.debug $builddir/{mode}/iotune.debug'
-        f.write('build $builddir/{mode}/dist/tar/{scylla_product}-unstripped-{scylla_version}-{scylla_release}.{arch}.tar.gz: package {include_scylla_and_iotune} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter | always\n'.format(**locals()))
-        f.write('  mode = {mode}\n'.format(**locals()))
-        f.write('build $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz: stripped_package {include_scylla_and_iotune_stripped} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter.stripped | always\n'.format(**locals()))
-        f.write('  mode = {mode}\n'.format(**locals()))
-        f.write('build $builddir/{mode}/dist/tar/{scylla_product}-debuginfo-{scylla_version}-{scylla_release}.{arch}.tar.gz: debuginfo_package {include_scylla_and_iotune_debug} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter.debug | always\n'.format(**locals()))
-        f.write('  mode = {mode}\n'.format(**locals()))
-        f.write('build $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz\n'.format(**locals()))
-        f.write('  mode = {mode}\n'.format(**locals()))
-        f.write('build $builddir/{mode}/dist/tar/{scylla_product}-{arch}-package.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz\n'.format(**locals()))
-        f.write('  mode = {mode}\n'.format(**locals()))
+        f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-unstripped-{scylla_version}-{scylla_release}.{arch}.tar.gz: package {include_scylla_and_iotune} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter | always\n')
+        f.write(f'  mode = {mode}\n')
+        f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz: stripped_package {include_scylla_and_iotune_stripped} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter.stripped | always\n')
+        f.write(f'  mode = {mode}\n')
+        f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-debuginfo-{scylla_version}-{scylla_release}.{arch}.tar.gz: debuginfo_package {include_scylla_and_iotune_debug} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter.debug | always\n')
+        f.write(f'  mode = {mode}\n')
+        f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
+        f.write(f'  mode = {mode}\n')
+        f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-{arch}-package.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
+        f.write(f'  mode = {mode}\n')

        f.write(f'build $builddir/dist/{mode}/redhat: rpmbuild $builddir/{mode}/dist/tar/{scylla_product}-unstripped-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
        f.write(f'  mode = {mode}\n')
@@ -2789,19 +2879,6 @@ def write_build_file(f,
            'build compiler-training: phony {}\n'.format(' '.join(['{mode}-compiler-training'.format(mode=mode) for mode in default_modes]))
    )

-    f.write(textwrap.dedent(f'''\
-        rule build-vector-search-validator
-            command = test/vector_search_validator/build-validator $builddir
-        rule build-vector-store
-            command = test/vector_search_validator/build-vector-store $builddir
-        '''))
-    f.write(
-            'build $builddir/{vector_search_validator_bin}: build-vector-search-validator {}\n'.format(' '.join([dep for dep in sorted(vector_search_validator_deps)]), vector_search_validator_bin=vector_search_validator_bin)
-    )
-    f.write(
-            'build $builddir/{vector_store_bin}: build-vector-store {}\n'.format(' '.join([dep for dep in sorted(vector_store_deps)]), vector_store_bin=vector_store_bin)
-    )
-
    f.write(textwrap.dedent(f'''\
        build dist-unified-tar: phony {' '.join([f'$builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz' for mode in default_modes])}
        build dist-unified: phony dist-unified-tar
@@ -2927,6 +3004,9 @@ def create_build_system(args):

    os.makedirs(outdir, exist_ok=True)

+    compiler_cache = find_compiler_cache(args.compiler_cache)
+    resolve_compilers_for_compiler_cache(args, compiler_cache)
+
    scylla_product, scylla_version, scylla_release = generate_version(args.date_stamp)

    for mode, mode_config in build_modes.items():
@@ -2943,8 +3023,8 @@ def create_build_system(args):
        # {outdir}/{mode}/seastar/build.ninja, and
        # {outdir}/{mode}/seastar/seastar.pc is queried for building flags
        for mode, mode_config in build_modes.items():
-            configure_seastar(outdir, mode, mode_config)
-            configure_abseil(outdir, mode, mode_config)
+            configure_seastar(outdir, mode, mode_config, compiler_cache)
+            configure_abseil(outdir, mode, mode_config, compiler_cache)
        user_cflags += ' -isystem abseil'

    for mode, mode_config in build_modes.items():
@@ -2967,6 +3047,7 @@ def create_build_system(args):
                         scylla_product,
                         scylla_version,
                         scylla_release,
+                         compiler_cache,
                         args)
    generate_compdb('compile_commands.json', ninja, args.buildfile, selected_modes)

@@ -3009,6 +3090,10 @@ def configure_using_cmake(args):
    selected_modes = args.selected_modes or default_modes
    selected_configs = ';'.join(build_modes[mode].cmake_build_type for mode
                                in selected_modes)
+
+    compiler_cache = find_compiler_cache(args.compiler_cache)
+    resolve_compilers_for_compiler_cache(args, compiler_cache)
+
    settings = {
        'CMAKE_CONFIGURATION_TYPES': selected_configs,
        'CMAKE_CROSS_CONFIGS': selected_configs,
@@ -3026,6 +3111,14 @@ def configure_using_cmake(args):
        'Scylla_WITH_DEBUG_INFO' : 'ON' if args.debuginfo else 'OFF',
        'Scylla_USE_PRECOMPILED_HEADER': 'OFF' if args.disable_precompiled_header else 'ON',
    }
+
+    if compiler_cache:
+        settings['CMAKE_CXX_COMPILER_LAUNCHER'] = compiler_cache
+        settings['CMAKE_C_COMPILER_LAUNCHER'] = compiler_cache
+        # For Rust, sccache is used via RUSTC_WRAPPER
+        if 'sccache' in compiler_cache and args.sccache_rust:
+            settings['Scylla_RUSTC_WRAPPER'] = compiler_cache
+
    if args.date_stamp:
        settings['Scylla_DATE_STAMP'] = args.date_stamp
    if args.staticboost:
@@ -3057,7 +3150,7 @@ def configure_using_cmake(args):

    if not args.dist_only:
        for mode in selected_modes:
-            configure_seastar(build_dir, build_modes[mode].cmake_build_type, modes[mode])
+            configure_seastar(build_dir, build_modes[mode].cmake_build_type, modes[mode], compiler_cache)

    cmake_command = ['cmake']
    cmake_command += [f'-D{var}={value}' for var, value in settings.items()]
--- a/cql3/CMakeLists.txt
+++ b/cql3/CMakeLists.txt
@@ -47,6 +47,10 @@ target_sources(cql3
    functions/aggregate_fcts.cc
    functions/castas_fcts.cc
    functions/error_injection_fcts.cc
+    statements/strong_consistency/select_statement.cc
+    statements/strong_consistency/modification_statement.cc
+    statements/strong_consistency/statement_helpers.cc
+    functions/vector_similarity_fcts.cc
    statements/cf_prop_defs.cc
    statements/cf_statement.cc
    statements/authentication_statement.cc
@@ -71,8 +75,8 @@ target_sources(cql3
    statements/raw/parsed_statement.cc
    statements/property_definitions.cc
    statements/update_statement.cc
-    statements/strongly_consistent_modification_statement.cc
-    statements/strongly_consistent_select_statement.cc
+    statements/broadcast_modification_statement.cc
+    statements/broadcast_select_statement.cc
    statements/delete_statement.cc
    statements/prune_materialized_view_statement.cc
    statements/batch_statement.cc
@@ -104,6 +108,7 @@ target_sources(cql3
    statements/list_service_level_attachments_statement.cc
    statements/list_effective_service_level_statement.cc
    statements/describe_statement.cc
+    statements/view_prop_defs.cc
    update_parameters.cc
    util.cc
    ut_name.cc
--- a/cql3/Cql.g
+++ b/cql3/Cql.g
@@ -389,8 +389,10 @@ selectStatement returns [std::unique_ptr<raw::select_statement> expr]
        bool is_ann_ordering = false;
    }
    : K_SELECT (
-                ( K_JSON { statement_subtype = raw::select_statement::parameters::statement_subtype::JSON; } )?
-                ( K_DISTINCT { is_distinct = true; } )?
+                ( (K_JSON K_DISTINCT)=> K_JSON { statement_subtype = raw::select_statement::parameters::statement_subtype::JSON; }
+                | (K_JSON selectClause K_FROM)=> K_JSON { statement_subtype = raw::select_statement::parameters::statement_subtype::JSON; }
+                )?
+                ( (K_DISTINCT selectClause K_FROM)=> K_DISTINCT { is_distinct = true; } )?
                sclause=selectClause
               )
      K_FROM (
@@ -425,6 +427,7 @@ selector returns [shared_ptr<raw_selector> s]

 unaliasedSelector returns [uexpression tmp]
    :  ( c=cident                                  { tmp = unresolved_identifier{std::move(c)}; }
+       | v=value                                   { tmp = std::move(v); }
       | K_COUNT '(' countArgument ')'             { tmp = make_count_rows_function_expression(); }
       | K_WRITETIME '(' c=cident ')'              { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::writetime,
                                                                                              unresolved_identifier{std::move(c)}}; }
@@ -447,9 +450,7 @@ selectionFunctionArgs returns [std::vector<expression> a]

 countArgument
    : '*'
-    | i=INTEGER { if (i->getText() != "1") {
-                    add_recognition_error("Only COUNT(1) is supported, got COUNT(" + i->getText() + ")");
-                } }
+    /* COUNT(1) is also allowed, it is recognized via the general function(args) path */
    ;

 whereClause returns [uexpression clause]
@@ -873,8 +874,8 @@ cfamDefinition[cql3::statements::create_table_statement::raw_statement& expr]
    ;

 cfamColumns[cql3::statements::create_table_statement::raw_statement& expr]
-    @init { bool is_static=false; }
-    : k=ident v=comparatorType (K_STATIC {is_static = true;})? { $expr.add_definition(k, v, is_static); }
+    @init { bool is_static=false, is_ttl=false; }
+    : k=ident v=comparatorType (K_TTL {is_ttl = true;})? (K_STATIC {is_static = true;})? { $expr.add_definition(k, v, is_static, is_ttl); }
        (K_PRIMARY K_KEY { $expr.add_key_aliases(std::vector<shared_ptr<cql3::column_identifier>>{k}); })?
    | K_PRIMARY K_KEY '(' pkDef[expr] (',' c=ident { $expr.add_column_alias(c); } )* ')'
    ;
@@ -885,6 +886,10 @@ pkDef[cql3::statements::create_table_statement::raw_statement& expr]
    | '(' k1=ident { l.push_back(k1); } ( ',' kn=ident { l.push_back(kn); } )* ')' { $expr.add_key_aliases(l); }
    ;

+cfamProperties[cql3::statements::cf_properties& expr]
+    : cfamProperty[expr] (K_AND cfamProperty[expr])*
+    ;
+
 cfamProperty[cql3::statements::cf_properties& expr]
    : property[*$expr.properties()]
    | K_COMPACT K_STORAGE { $expr.set_compact_storage(); }
@@ -922,16 +927,22 @@ typeColumns[create_type_statement& expr]
 */
 createIndexStatement returns [std::unique_ptr<create_index_statement> expr]
    @init {
-        auto props = make_shared<index_prop_defs>();
+        auto idx_props = make_shared<index_specific_prop_defs>();
+        auto props = index_prop_defs();
        bool if_not_exists = false;
        auto name = ::make_shared<cql3::index_name>();
        std::vector<::shared_ptr<index_target::raw>> targets;
    }
-    : K_CREATE (K_CUSTOM { props->is_custom = true; })? K_INDEX (K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
+    : K_CREATE (K_CUSTOM { idx_props->is_custom = true; })? K_INDEX (K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
        (idxName[*name])? K_ON cf=columnFamilyName '(' (target1=indexIdent { targets.emplace_back(target1); } (',' target2=indexIdent { targets.emplace_back(target2); } )*)? ')'
-        (K_USING cls=STRING_LITERAL { props->custom_class = sstring{$cls.text}; })?
-        (K_WITH properties[*props])?
-      { $expr = std::make_unique<create_index_statement>(cf, name, targets, props, if_not_exists); }
+        (K_USING cls=STRING_LITERAL { idx_props->custom_class = sstring{$cls.text}; })?
+        (K_WITH cfamProperties[props])?
+      {
+        props.extract_index_specific_properties_to(*idx_props);
+        view_prop_defs view_props = std::move(props).into_view_prop_defs();
+
+        $expr = std::make_unique<create_index_statement>(cf, name, targets, std::move(idx_props), std::move(view_props), if_not_exists);
+      }
    ;

 indexIdent returns [::shared_ptr<index_target::raw> id]
@@ -1031,6 +1042,7 @@ alterTableStatement returns [std::unique_ptr<alter_table_statement::raw_statemen
        std::vector<alter_table_statement::column_change> column_changes;
        std::vector<std::pair<shared_ptr<cql3::column_identifier::raw>, shared_ptr<cql3::column_identifier::raw>>> renames;
        auto attrs = std::make_unique<cql3::attributes::raw>();
+        shared_ptr<cql3::column_identifier::raw> ttl_change;
    }
    : K_ALTER K_COLUMNFAMILY cf=columnFamilyName
          ( K_ALTER id=cident K_TYPE v=comparatorType { type = alter_table_statement::type::alter; column_changes.emplace_back(alter_table_statement::column_change{id, v}); }
@@ -1049,9 +1061,11 @@ alterTableStatement returns [std::unique_ptr<alter_table_statement::raw_statemen
          | K_RENAME                                  { type = alter_table_statement::type::rename; }
               id1=cident K_TO toId1=cident { renames.emplace_back(id1, toId1); }
               ( K_AND idn=cident K_TO toIdn=cident { renames.emplace_back(idn, toIdn); } )*
+          | K_TTL                                     { type = alter_table_statement::type::ttl; }
+               ( id=cident { ttl_change = id; } | K_NULL )
          )
    {
-        $expr = std::make_unique<alter_table_statement::raw_statement>(std::move(cf), type, std::move(column_changes), std::move(props), std::move(renames), std::move(attrs));
+        $expr = std::make_unique<alter_table_statement::raw_statement>(std::move(cf), type, std::move(column_changes), std::move(props), std::move(renames), std::move(attrs), std::move(ttl_change));
    }
    ;

@@ -1079,9 +1093,9 @@ alterTypeStatement returns [std::unique_ptr<alter_type_statement> expr]
 */
 alterViewStatement returns [std::unique_ptr<alter_view_statement> expr]
    @init {
-        auto props = cql3::statements::cf_prop_defs();
+        auto props = cql3::statements::view_prop_defs();
    }
-    : K_ALTER K_MATERIALIZED K_VIEW cf=columnFamilyName K_WITH properties[props]
+    : K_ALTER K_MATERIALIZED K_VIEW cf=columnFamilyName K_WITH properties[*props.properties()]
    {
        $expr = std::make_unique<alter_view_statement>(std::move(cf), std::move(props));
    }
@@ -2060,7 +2074,21 @@ vector_type returns [shared_ptr<cql3::cql3_type::raw> pt]
        {
            if ($d.text[0] == '-')
                throw exceptions::invalid_request_exception("Vectors must have a dimension greater than 0");
-            $pt = cql3::cql3_type::raw::vector(t, std::stoul($d.text));
+            unsigned long parsed_dimension;
+            try {
+                parsed_dimension = std::stoul($d.text);
+            } catch (const std::exception& e) {
+                throw exceptions::invalid_request_exception(format("Invalid vector dimension: {}", $d.text));
+            }
+            static_assert(sizeof(unsigned long) >= sizeof(vector_dimension_t));
+            if (parsed_dimension == 0) {
+                throw exceptions::invalid_request_exception("Vectors must have a dimension greater than 0");
+            }
+            if (parsed_dimension > cql3::cql3_type::MAX_VECTOR_DIMENSION) {
+                throw exceptions::invalid_request_exception(
+                        format("Vectors must have a dimension less than or equal to {}", cql3::cql3_type::MAX_VECTOR_DIMENSION));
+            }
+            $pt = cql3::cql3_type::raw::vector(t, static_cast<vector_dimension_t>(parsed_dimension));
        }
    ;

--- a/cql3/assignment_testable.hh
+++ b/cql3/assignment_testable.hh
@@ -25,6 +25,11 @@ public:
        NOT_ASSIGNABLE,
    };

+    struct vector_test_result {
+        test_result result;
+        std::optional<vector_dimension_t> dimension_opt;
+    };
+
    static bool is_assignable(test_result tr) {
        return tr != test_result::NOT_ASSIGNABLE;
    }
@@ -44,6 +49,8 @@ public:
     */
    virtual test_result test_assignment(data_dictionary::database db, const sstring& keyspace, const schema* schema_opt, const column_specification& receiver) const = 0;

+    virtual vector_test_result test_assignment_any_size_float_vector() const = 0;
+
    virtual std::optional<data_type> assignment_testable_type_opt() const = 0;

    // for error reporting
--- a/cql3/cql3_type.cc
+++ b/cql3/cql3_type.cc
@@ -307,17 +307,14 @@ public:

 class cql3_type::raw_vector : public raw {
    shared_ptr<raw> _type;
-    size_t _dimension;
-
-    // This limitation is acquired from the maximum number of dimensions in OpenSearch. 
-    static constexpr size_t MAX_VECTOR_DIMENSION = 16000;
+    vector_dimension_t _dimension;

    virtual sstring to_string() const override {
        return seastar::format("vector<{}, {}>", _type, _dimension);
    }

 public:
-    raw_vector(shared_ptr<raw> type, size_t dimension)
+    raw_vector(shared_ptr<raw> type, vector_dimension_t dimension)
            : _type(std::move(type)), _dimension(dimension) {
    }

@@ -417,7 +414,7 @@ cql3_type::raw::tuple(std::vector<shared_ptr<raw>> ts) {
 }

 shared_ptr<cql3_type::raw>
-cql3_type::raw::vector(shared_ptr<raw> t, size_t dimension) {
+cql3_type::raw::vector(shared_ptr<raw> t, vector_dimension_t dimension) {
    return ::make_shared<raw_vector>(std::move(t), dimension);
 }

--- a/cql3/cql3_type.hh
+++ b/cql3/cql3_type.hh
@@ -39,6 +39,9 @@ public:
    data_type get_type() const { return _type; }
    const sstring& to_string() const { return _type->cql3_type_name(); }

+    // This limitation is acquired from the maximum number of dimensions in OpenSearch.
+    static constexpr vector_dimension_t MAX_VECTOR_DIMENSION = 16000;
+
    // For UserTypes, we need to know the current keyspace to resolve the
    // actual type used, so Raw is a "not yet prepared" CQL3Type.
    class raw {
@@ -64,7 +67,7 @@ public:
        static shared_ptr<raw> list(shared_ptr<raw> t);
        static shared_ptr<raw> set(shared_ptr<raw> t);
        static shared_ptr<raw> tuple(std::vector<shared_ptr<raw>> ts);
-        static shared_ptr<raw> vector(shared_ptr<raw> t, size_t dimension);
+        static shared_ptr<raw> vector(shared_ptr<raw> t, vector_dimension_t dimension);
        static shared_ptr<raw> frozen(shared_ptr<raw> t);
        friend sstring format_as(const raw& r) {
            return r.to_string();
--- a/cql3/expr/prepare_expr.cc
+++ b/cql3/expr/prepare_expr.cc
@@ -10,6 +10,7 @@
 #include "expr-utils.hh"
 #include "evaluate.hh"
 #include "cql3/functions/functions.hh"
+#include "cql3/functions/aggregate_fcts.hh"
 #include "cql3/functions/castas_fcts.hh"
 #include "cql3/functions/scalar_function.hh"
 #include "cql3/column_identifier.hh"
@@ -501,8 +502,8 @@ vector_validate_assignable_to(const collection_constructor& c, data_dictionary::
        throw exceptions::invalid_request_exception(format("Invalid vector type literal for {} of type {}", *receiver.name, receiver.type->as_cql3_type()));
    }

-    size_t expected_size = vt->get_dimension();
-    if (!expected_size) {
+    vector_dimension_t expected_size = vt->get_dimension();
+    if (expected_size == 0) {
        throw exceptions::invalid_request_exception(format("Invalid vector type literal for {}: type {} expects at least one element",
                                                            *receiver.name, receiver.type->as_cql3_type()));
    }
@@ -1047,8 +1048,47 @@ prepare_function_args_for_type_inference(std::span<const expression> args, data_
    return partially_prepared_args;
 }

+// Special case for count(1) - recognize it as the countRows() function. Note it is quite
+// artificial and we might relax it to the more general count(expression) later.
+static
+std::optional<expression>
+try_prepare_count_rows(const expr::function_call& fc, data_dictionary::database db, const sstring& keyspace, const schema* schema_opt, lw_shared_ptr<column_specification> receiver) {
+    return std::visit(overloaded_functor{
+        [&] (const functions::function_name& name) -> std::optional<expression> {
+            auto native_name = name;
+            if (!native_name.has_keyspace()) {
+                native_name = name.as_native_function();
+            }
+            // Collapse count(1) into countRows()
+            if (native_name == functions::function_name::native_function("count")) {
+                if (fc.args.size() == 1) {
+                    if (auto uc_arg = expr::as_if<expr::untyped_constant>(&fc.args[0])) {
+                        if (uc_arg->partial_type == expr::untyped_constant::type_class::integer
+                                && uc_arg->raw_text == "1") {
+                            return expr::function_call{
+                                .func = functions::aggregate_fcts::make_count_rows_function(),
+                                .args = {},
+                            };
+                        } else {
+                            throw exceptions::invalid_request_exception(format("count() expects a column or the literal 1 as an argument", fc.args[0]));
+                        }
+                    }
+                }
+            }
+            return std::nullopt;
+        },
+        [] (const shared_ptr<functions::function>&) -> std::optional<expression> {
+            // Already prepared, nothing to do
+            return std::nullopt;
+        },
+    }, fc.func);
+}
+
 std::optional<expression>
 prepare_function_call(const expr::function_call& fc, data_dictionary::database db, const sstring& keyspace, const schema* schema_opt, lw_shared_ptr<column_specification> receiver) {
+    if (auto prepared = try_prepare_count_rows(fc, db, keyspace, schema_opt, receiver)) {
+        return prepared;
+    }
    // Try to extract a column family name from the available information.
    // Most functions can be prepared without information about the column family, usually just the keyspace is enough.
    // One exception is the token() function - in order to prepare system.token() we have to know the partition key of the table,
@@ -1434,6 +1474,112 @@ test_assignment(const expression& expr, data_dictionary::database db, const sstr
    }, expr);
 }

+template <cql3_type::kind... Kinds>
+assignment_testable::vector_test_result
+test_assignment_any_size_float_vector(const expression& expr) {
+    using test_result = assignment_testable::vector_test_result;
+    const test_result NOT_ASSIGNABLE = {assignment_testable::test_result::NOT_ASSIGNABLE, std::nullopt};
+    const test_result WEAKLY_ASSIGNABLE = {assignment_testable::test_result::WEAKLY_ASSIGNABLE, std::nullopt};
+    auto is_float_or_bind = [] (const expression& e) {
+        return expr::visit(overloaded_functor{
+            [] (const bind_variable&) {
+                return true;
+            },
+            [] (const untyped_constant& uc) {
+                return uc.partial_type == untyped_constant::type_class::floating_point
+                    || uc.partial_type == untyped_constant::type_class::integer;
+            },
+            [] (const constant& value) {
+                auto kind = value.type->as_cql3_type().get_kind();
+                return cql3_type::kind_enum_set::frozen<Kinds...>().contains(kind);
+            },
+            [] (const auto&) {
+                return false;
+            },
+        }, e);
+    };
+    auto validate_assignment = [&] (const data_type& dt) -> test_result {
+         auto vt = dynamic_pointer_cast<const vector_type_impl>(dt->underlying_type());
+            if (!vt) {
+                return NOT_ASSIGNABLE;
+            }
+            auto elem_kind = vt->get_elements_type()->as_cql3_type().get_kind();
+            if (cql3_type::kind_enum_set::frozen<Kinds...>().contains(elem_kind)) {
+                return {assignment_testable::test_result::WEAKLY_ASSIGNABLE, vt->get_dimension()};
+            }
+            return NOT_ASSIGNABLE;
+    };
+    return expr::visit(overloaded_functor{
+        [&] (const constant& value) -> test_result {
+            return validate_assignment(value.type);
+        },
+        [&] (const binary_operator&) -> test_result {
+            return NOT_ASSIGNABLE;
+        },
+        [&] (const conjunction&) -> test_result {
+            return NOT_ASSIGNABLE;
+        },
+        [&] (const column_value& col_val) -> test_result {
+            return validate_assignment(col_val.col->type);
+        },
+        [&] (const subscript&) -> test_result {
+            return NOT_ASSIGNABLE;
+        },
+        [&] (const unresolved_identifier& ui) -> test_result {
+            return NOT_ASSIGNABLE;
+        },
+        [&] (const column_mutation_attribute& cma) -> test_result {
+            return NOT_ASSIGNABLE;
+        },
+        [&] (const function_call& fc) -> test_result {
+            return NOT_ASSIGNABLE;
+        },
+        [&] (const cast& c) -> test_result {
+            return NOT_ASSIGNABLE;
+        },
+        [&] (const field_selection& fs) -> test_result {
+            return NOT_ASSIGNABLE;
+        },
+        [&] (const bind_variable& bv) -> test_result {
+            return WEAKLY_ASSIGNABLE;
+        },
+        [&] (const untyped_constant& uc) -> test_result {
+            return uc.partial_type == untyped_constant::type_class::null
+                ? WEAKLY_ASSIGNABLE
+                : NOT_ASSIGNABLE;
+        },
+        [&] (const tuple_constructor& tc) -> test_result {
+            return NOT_ASSIGNABLE;
+        },
+        [&] (const collection_constructor& c) -> test_result {
+            switch (c.style) {
+            case collection_constructor::style_type::list_or_vector: {
+                if(std::ranges::all_of(c.elements, is_float_or_bind)) {
+                    return {assignment_testable::test_result::WEAKLY_ASSIGNABLE, c.elements.size()};
+                }
+                return NOT_ASSIGNABLE;
+            }
+            case collection_constructor::style_type::set: return NOT_ASSIGNABLE;
+            case collection_constructor::style_type::map: return NOT_ASSIGNABLE;
+            case collection_constructor::style_type::vector:
+                on_internal_error(expr_logger, "vector style type found in test_assignment, should have been introduced post-prepare");
+            }
+            on_internal_error(expr_logger, fmt::format("unexpected collection_constructor style {}", static_cast<unsigned>(c.style)));
+        },
+        [&] (const usertype_constructor& uc) -> test_result {
+            return NOT_ASSIGNABLE;
+        },
+        [&] (const temporary& t) -> test_result {
+            return NOT_ASSIGNABLE;
+        },
+    }, expr);
+}
+
+assignment_testable::vector_test_result
+test_assignment_any_size_float_vector(const expression& expr) {
+    return test_assignment_any_size_float_vector<cql3_type::kind::FLOAT, cql3_type::kind::DOUBLE>(expr);
+}
+
 expression
 prepare_expression(const expression& expr, data_dictionary::database db, const sstring& keyspace, const schema* schema_opt, lw_shared_ptr<column_specification> receiver) {
    auto e_opt = try_prepare_expression(expr, db, keyspace, schema_opt, std::move(receiver));
@@ -1467,6 +1613,9 @@ public:
    virtual test_result test_assignment(data_dictionary::database db, const sstring& keyspace, const schema* schema_opt, const column_specification& receiver) const override {
        return expr::test_assignment(_e, db, keyspace, schema_opt, receiver);
    }
+    virtual vector_test_result test_assignment_any_size_float_vector() const override {
+        return expr::test_assignment_any_size_float_vector(_e);
+    }
    virtual sstring assignment_testable_source_context() const override {
        return fmt::format("{}", _e);
    }
--- a/cql3/functions/functions.cc
+++ b/cql3/functions/functions.cc
@@ -16,6 +16,7 @@
 #include "cql3/functions/user_function.hh"
 #include "cql3/functions/user_aggregate.hh"
 #include "cql3/functions/uuid_fcts.hh"
+#include "cql3/functions/vector_similarity_fcts.hh"
 #include "data_dictionary/data_dictionary.hh"
 #include "as_json_function.hh"
 #include "cql3/prepare_context.hh"
@@ -398,6 +399,14 @@ functions::get(data_dictionary::database db,
        }
    });

+    const auto func_name = name.has_keyspace() ? name : name.as_native_function();
+    if (SIMILARITY_FUNCTIONS.contains(func_name)) {
+        auto arg_types = retrieve_vector_arg_types(func_name, provided_args);
+        auto fun = ::make_shared<vector_similarity_fct>(func_name.name, arg_types);
+        validate_types(db, keyspace, schema.get(), fun, provided_args, receiver_ks, receiver_cf);
+        return fun;
+    }
+
    if (name.has_keyspace()
                ? name == TOKEN_FUNCTION_NAME
                : name.name == TOKEN_FUNCTION_NAME.name) {
--- a/cql3/functions/vector_similarity_fcts.cc
+++ b/cql3/functions/vector_similarity_fcts.cc
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#include "vector_similarity_fcts.hh"
+#include "types/types.hh"
+#include "types/vector.hh"
+#include "exceptions/exceptions.hh"
+#include <span>
+#include <bit>
+
+namespace cql3 {
+namespace functions {
+
+namespace detail {
+
+std::vector<float> extract_float_vector(const bytes_opt& param, vector_dimension_t dimension) {
+    if (!param) {
+        throw exceptions::invalid_request_exception("Cannot extract float vector from null parameter");
+    }
+
+    const size_t expected_size = dimension * sizeof(float);
+    if (param->size() != expected_size) {
+        throw exceptions::invalid_request_exception(
+            fmt::format("Invalid vector size: expected {} bytes for {} floats, got {} bytes",
+                       expected_size, dimension, param->size()));
+    }
+
+    std::vector<float> result;
+    result.reserve(dimension);
+
+    bytes_view view(*param);
+    for (size_t i = 0; i < dimension; ++i) {
+        // read_simple handles network byte order (big-endian) conversion
+        uint32_t raw = read_simple<uint32_t>(view);
+        result.push_back(std::bit_cast<float>(raw));
+    }
+
+    return result;
+}
+
+} // namespace detail
+
+namespace {
+
+// The computations of similarity scores match the exact formulas of Cassandra's (jVector's) implementation to ensure compatibility.
+// There exist tests checking the compliance of the results.
+// Reference:
+// https://github.com/datastax/jvector/blob/f967f1c9249035b63b55a566fac7d4dc38380349/jvector-base/src/main/java/io/github/jbellis/jvector/vector/VectorSimilarityFunction.java#L36-L69
+
+// You should only use this function if you need to preserve the original vectors and cannot normalize
+// them in advance.
+float compute_cosine_similarity(std::span<const float> v1, std::span<const float> v2) {
+    double dot_product = 0.0;
+    double squared_norm_a = 0.0;
+    double squared_norm_b = 0.0;
+
+    for (size_t i = 0; i < v1.size(); ++i) {
+        double a = v1[i];
+        double b = v2[i];
+
+        dot_product += a * b;
+        squared_norm_a += a * a;
+        squared_norm_b += b * b;
+    }
+
+    if (squared_norm_a == 0 || squared_norm_b == 0) {
+        return std::numeric_limits<float>::quiet_NaN();
+    }
+
+    // The cosine similarity is in the range [-1, 1].
+    // It is mapped to a similarity score in the range [0, 1] (-1 -> 0, 1 -> 1)
+    // for consistency with other similarity functions.
+    return (1 + (dot_product / (std::sqrt(squared_norm_a * squared_norm_b)))) / 2;
+}
+
+float compute_euclidean_similarity(std::span<const float> v1, std::span<const float> v2) {
+    double sum = 0.0;
+
+    for (size_t i = 0; i < v1.size(); ++i) {
+        double a = v1[i];
+        double b = v2[i];
+
+        double diff = a - b;
+        sum += diff * diff;
+    }
+
+    // The squared Euclidean (L2) distance is of range [0, inf).
+    // It is mapped to a similarity score in the range (0, 1] (0 -> 1, inf -> 0)
+    // for consistency with other similarity functions.
+    return (1 / (1 + sum));
+}
+
+// Assumes that both vectors are L2-normalized.
+// This similarity is intended as an optimized way to perform cosine similarity calculation.
+float compute_dot_product_similarity(std::span<const float> v1, std::span<const float> v2) {
+    double dot_product = 0.0;
+
+    for (size_t i = 0; i < v1.size(); ++i) {
+        double a = v1[i];
+        double b = v2[i];
+        dot_product += a * b;
+    }
+
+    // The dot product is in the range [-1, 1] for L2-normalized vectors.
+    // It is mapped to a similarity score in the range [0, 1] (-1 -> 0, 1 -> 1)
+    // for consistency with other similarity functions.
+    return ((1 + dot_product) / 2);
+}
+
+} // namespace
+
+thread_local const std::unordered_map<function_name, similarity_function_t> SIMILARITY_FUNCTIONS = {
+        {SIMILARITY_COSINE_FUNCTION_NAME, compute_cosine_similarity},
+        {SIMILARITY_EUCLIDEAN_FUNCTION_NAME, compute_euclidean_similarity},
+        {SIMILARITY_DOT_PRODUCT_FUNCTION_NAME, compute_dot_product_similarity},
+};
+
+std::vector<data_type> retrieve_vector_arg_types(const function_name& name, const std::vector<shared_ptr<assignment_testable>>& provided_args) {
+    if (provided_args.size() != 2) {
+        throw exceptions::invalid_request_exception(fmt::format("Invalid number of arguments for function {}(vector<float, n>, vector<float, n>)", name));
+    }
+
+    auto [first_result, first_dim_opt] = provided_args[0]->test_assignment_any_size_float_vector();
+    auto [second_result, second_dim_opt] = provided_args[1]->test_assignment_any_size_float_vector();
+
+    auto invalid_type_error_message = [&name](const shared_ptr<assignment_testable>& arg) {
+        auto type = arg->assignment_testable_type_opt();
+        const auto& source_context = arg->assignment_testable_source_context();
+        if (type) {
+            return fmt::format("Function {} requires a float vector argument, but found {} of type {}", name, source_context, type.value()->cql3_type_name());
+        } else {
+            return fmt::format("Function {} requires a float vector argument, but found {}", name, source_context);
+        }
+    };
+
+    if (!is_assignable(first_result)) {
+        throw exceptions::invalid_request_exception(invalid_type_error_message(provided_args[0]));
+    }
+    if (!is_assignable(second_result)) {
+        throw exceptions::invalid_request_exception(invalid_type_error_message(provided_args[1]));
+    }
+
+    if (!first_dim_opt && !second_dim_opt) {
+        throw exceptions::invalid_request_exception(fmt::format("Cannot infer type of argument {} for function {}(vector<float, n>, vector<float, n>)",
+                provided_args[0]->assignment_testable_source_context(), name));
+    }
+    if (first_dim_opt && second_dim_opt) {
+        if (*first_dim_opt != *second_dim_opt) {
+            throw exceptions::invalid_request_exception(fmt::format(
+                    "All arguments must have the same vector dimensions, but found vector<float, {}> and vector<float, {}>", *first_dim_opt, *second_dim_opt));
+        }
+    }
+
+    vector_dimension_t dimension = first_dim_opt ? *first_dim_opt : *second_dim_opt;
+    auto type = vector_type_impl::get_instance(float_type, dimension);
+    return {type, type};
+}
+
+bytes_opt vector_similarity_fct::execute(std::span<const bytes_opt> parameters) {
+    if (std::any_of(parameters.begin(), parameters.end(), [](const auto& param) {
+            return !param;
+        })) {
+        return std::nullopt;
+    }
+
+    // Extract dimension from the vector type
+    const auto& type = static_cast<const vector_type_impl&>(*arg_types()[0]);
+    vector_dimension_t dimension = type.get_dimension();
+
+    // Optimized path: extract floats directly from bytes, bypassing data_value overhead
+    std::vector<float> v1 = detail::extract_float_vector(parameters[0], dimension);
+    std::vector<float> v2 = detail::extract_float_vector(parameters[1], dimension);
+
+    float result = SIMILARITY_FUNCTIONS.at(_name)(v1, v2);
+    return float_type->decompose(result);
+}
+
+} // namespace functions
+} // namespace cql3
--- a/cql3/functions/vector_similarity_fcts.hh
+++ b/cql3/functions/vector_similarity_fcts.hh
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#pragma once
+
+#include "native_scalar_function.hh"
+#include "cql3/assignment_testable.hh"
+#include "cql3/functions/function_name.hh"
+#include <span>
+
+namespace cql3 {
+namespace functions {
+
+static const function_name SIMILARITY_COSINE_FUNCTION_NAME = function_name::native_function("similarity_cosine");
+static const function_name SIMILARITY_EUCLIDEAN_FUNCTION_NAME = function_name::native_function("similarity_euclidean");
+static const function_name SIMILARITY_DOT_PRODUCT_FUNCTION_NAME = function_name::native_function("similarity_dot_product");
+
+using similarity_function_t = float (*)(std::span<const float>, std::span<const float>);
+extern thread_local const std::unordered_map<function_name, similarity_function_t> SIMILARITY_FUNCTIONS;
+
+std::vector<data_type> retrieve_vector_arg_types(const function_name& name, const std::vector<shared_ptr<assignment_testable>>& provided_args);
+
+class vector_similarity_fct : public native_scalar_function {
+public:
+    vector_similarity_fct(const sstring& name, const std::vector<data_type>& arg_types)
+        : native_scalar_function(name, float_type, arg_types) {
+    }
+
+    virtual bytes_opt execute(std::span<const bytes_opt> parameters) override;
+};
+
+namespace detail {
+
+// Extract float vector directly from serialized bytes, bypassing data_value overhead.
+// This is an internal API exposed for testing purposes.
+// Vector<float, N> wire format: N floats as big-endian uint32_t values, 4 bytes each.
+std::vector<float> extract_float_vector(const bytes_opt& param, vector_dimension_t dimension);
+
+} // namespace detail
+
+} // namespace functions
+} // namespace cql3
--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -14,6 +14,7 @@
 #include <seastar/core/shared_ptr.hh>
 #include <seastar/coroutine/parallel_for_each.hh>
 #include <seastar/coroutine/as_future.hh>
+#include <seastar/coroutine/try_future.hh>

 #include "service/storage_proxy.hh"
 #include "service/migration_manager.hh"
@@ -47,8 +48,10 @@ const std::chrono::minutes prepared_statements_cache::entry_expiry = std::chrono

 struct query_processor::remote {
    remote(service::migration_manager& mm, service::mapreduce_service& fwd,
-           service::storage_service& ss, service::raft_group0_client& group0_client)
+           service::storage_service& ss, service::raft_group0_client& group0_client,
+           service::strong_consistency::coordinator& _sc_coordinator)
            : mm(mm), mapreducer(fwd), ss(ss), group0_client(group0_client)
+            , sc_coordinator(_sc_coordinator)
            , gate("query_processor::remote")
    {}

@@ -56,6 +59,7 @@ struct query_processor::remote {
    service::mapreduce_service& mapreducer;
    service::storage_service& ss;
    service::raft_group0_client& group0_client;
+    service::strong_consistency::coordinator& sc_coordinator;

    seastar::named_gate gate;
 };
@@ -513,9 +517,16 @@ query_processor::~query_processor() {
    }
 }

+std::pair<std::reference_wrapper<service::strong_consistency::coordinator>, gate::holder>
+query_processor::acquire_strongly_consistent_coordinator() {
+    auto [remote_, holder] = remote();
+    return {remote_.get().sc_coordinator, std::move(holder)};
+}
+
 void query_processor::start_remote(service::migration_manager& mm, service::mapreduce_service& mapreducer,
-                                   service::storage_service& ss, service::raft_group0_client& group0_client) {
-    _remote = std::make_unique<struct remote>(mm, mapreducer, ss, group0_client);
+                                   service::storage_service& ss, service::raft_group0_client& group0_client,
+                                   service::strong_consistency::coordinator& sc_coordinator) {
+    _remote = std::make_unique<struct remote>(mm, mapreducer, ss, group0_client, sc_coordinator);
 }

 future<> query_processor::stop_remote() {
@@ -859,6 +870,7 @@ struct internal_query_state {
    sstring query_string;
    std::unique_ptr<query_options> opts;
    statements::prepared_statement::checked_weak_ptr p;
+    std::optional<service::query_state> qs;
    bool more_results = true;
 };

@@ -866,10 +878,14 @@ internal_query_state query_processor::create_paged_state(
        const sstring& query_string,
        db::consistency_level cl,
        const data_value_list& values,
-        int32_t page_size) {
+        int32_t page_size,
+        std::optional<service::query_state> qs) {
    auto p = prepare_internal(query_string);
    auto opts = make_internal_options(p, values, cl, page_size);
-    return internal_query_state{query_string, std::make_unique<cql3::query_options>(std::move(opts)), std::move(p), true};
+    if (!qs) {
+        qs.emplace(query_state_for_internal_call());
+    }
+    return internal_query_state{query_string, std::make_unique<cql3::query_options>(std::move(opts)), std::move(p), std::move(qs), true};
 }

 bool query_processor::has_more_results(cql3::internal_query_state& state) const {
@@ -892,9 +908,8 @@ future<> query_processor::for_each_cql_result(
 future<::shared_ptr<untyped_result_set>>
 query_processor::execute_paged_internal(internal_query_state& state) {
    state.p->statement->validate(*this, service::client_state::for_internal_calls());
-    auto qs = query_state_for_internal_call();
    ::shared_ptr<cql_transport::messages::result_message> msg =
-      co_await state.p->statement->execute(*this, qs, *state.opts, std::nullopt);
+      co_await state.p->statement->execute(*this, *state.qs, *state.opts, std::nullopt);

    class visitor : public result_message::visitor_base {
        internal_query_state& _state;
@@ -993,7 +1008,7 @@ query_processor::execute_with_params(
    auto opts = make_internal_options(p, values, cl);
    auto statement = p->statement;

-    auto msg = co_await execute_maybe_with_guard(query_state, std::move(statement), opts, &query_processor::do_execute_with_params);
+    auto msg = co_await coroutine::try_future(execute_maybe_with_guard(query_state, std::move(statement), opts, &query_processor::do_execute_with_params));
    co_return ::make_shared<untyped_result_set>(msg);
 }

@@ -1003,7 +1018,7 @@ query_processor::do_execute_with_params(
        shared_ptr<cql_statement> statement,
        const query_options& options, std::optional<service::group0_guard> guard) {
    statement->validate(*this, service::client_state::for_internal_calls());
-    co_return co_await statement->execute(*this, query_state, options, std::move(guard));
+    co_return co_await coroutine::try_future(statement->execute(*this, query_state, options, std::move(guard)));
 }


@@ -1201,8 +1216,9 @@ future<> query_processor::query_internal(
        db::consistency_level cl,
        const data_value_list& values,
        int32_t page_size,
-        noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f) {
-    auto query_state = create_paged_state(query_string, cl, values, page_size);
+        noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f,
+        std::optional<service::query_state> qs) {
+    auto query_state = create_paged_state(query_string, cl, values, page_size, std::move(qs));
    co_return co_await for_each_cql_result(query_state, std::move(f));
 }

--- a/cql3/query_processor.hh
+++ b/cql3/query_processor.hh
@@ -44,6 +44,10 @@ class query_state;
 class mapreduce_service;
 class raft_group0_client;

+namespace strong_consistency {
+class coordinator;
+}
+
 namespace broadcast_tables {
 struct query;
 }
@@ -155,7 +159,8 @@ public:
    ~query_processor();

    void start_remote(service::migration_manager&, service::mapreduce_service&,
-                      service::storage_service& ss, service::raft_group0_client&);
+                      service::storage_service& ss, service::raft_group0_client&,
+                      service::strong_consistency::coordinator&);
    future<> stop_remote();

    data_dictionary::database db() {
@@ -174,6 +179,9 @@ public:
        return _proxy;
    }

+    std::pair<std::reference_wrapper<service::strong_consistency::coordinator>, gate::holder>
+    acquire_strongly_consistent_coordinator();
+
    cql_stats& get_cql_stats() {
        return _cql_stats;
    }
@@ -322,6 +330,7 @@ public:
     * page_size - maximum page size
     * f - a function to be run on each row of the query result,
     *     if the function returns stop_iteration::yes the iteration will stop
+     * qs - optional query state (default: std::nullopt)
     *
     * \note This function is optimized for convenience, not performance.
     */
@@ -330,7 +339,8 @@ public:
            db::consistency_level cl,
            const data_value_list& values,
            int32_t page_size,
-            noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f);
+            noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f,
+            std::optional<service::query_state> qs = std::nullopt);

    /*
     * \brief iterate over all cql results using paging
@@ -499,7 +509,8 @@ private:
            const sstring& query_string,
            db::consistency_level,
            const data_value_list& values,
-            int32_t page_size);
+            int32_t page_size,
+            std::optional<service::query_state> qs = std::nullopt);

    /*!
     * \brief run a query using paging
--- a/cql3/query_result_printer.hh
+++ b/cql3/query_result_printer.hh
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#pragma once
+
+#include <ostream>
+
+namespace cql3 {
+
+class result;
+
+void print_query_results_text(std::ostream& os, const result& result);
+void print_query_results_json(std::ostream& os, const result& result);
+
+} // namespace cql3
--- a/cql3/result_set.cc
+++ b/cql3/result_set.cc
@@ -9,8 +9,10 @@
 */

 #include <cstdint>
+#include "types/json_utils.hh"
 #include "utils/assert.hh"
 #include "utils/hashers.hh"
+#include "utils/rjson.hh"
 #include "cql3/result_set.hh"

 namespace cql3 {
@@ -46,6 +48,13 @@ void metadata::add_non_serialized_column(lw_shared_ptr<column_specification> nam
    _column_info->_names.emplace_back(std::move(name));
 }

+void metadata::hide_last_column() {
+    if (_column_info->_column_count == 0) {
+        utils::on_internal_error("Trying to hide a column when there are no columns visible.");
+    }
+    _column_info->_column_count--;
+}
+
 void metadata::set_paging_state(lw_shared_ptr<const service::pager::paging_state> paging_state) {
    _flags.set<flag::HAS_MORE_PAGES>();
    _paging_state = std::move(paging_state);
@@ -188,4 +197,85 @@ make_empty_metadata() {
    return empty_metadata_cache;
 }

+void print_query_results_text(std::ostream& os, const cql3::result& result) {
+    const auto& metadata = result.get_metadata();
+    const auto& column_metadata = metadata.get_names();
+
+    struct column_values {
+        size_t max_size{0};
+        sstring header_format;
+        sstring row_format;
+        std::vector<sstring> values;
+
+        void add(sstring value) {
+            max_size = std::max(max_size, value.size());
+            values.push_back(std::move(value));
+        }
+    };
+
+    std::vector<column_values> columns;
+    columns.resize(column_metadata.size());
+
+    for (size_t i = 0; i < column_metadata.size(); ++i) {
+        columns[i].add(column_metadata[i]->name->text());
+    }
+
+    for (const auto& row : result.result_set().rows()) {
+        for (size_t i = 0; i < row.size(); ++i) {
+            if (row[i]) {
+                columns[i].add(column_metadata[i]->type->to_string(linearized(managed_bytes_view(*row[i]))));
+            } else {
+                columns[i].add("");
+            }
+        }
+    }
+
+    std::vector<sstring> separators(columns.size(), sstring());
+    for (size_t i = 0; i < columns.size(); ++i) {
+        auto& col_values = columns[i];
+        col_values.header_format = seastar::format(" {{:<{}}} ", col_values.max_size);
+        col_values.row_format = seastar::format(" {{:>{}}} ", col_values.max_size);
+        for (size_t c = 0; c < col_values.max_size; ++c) {
+            separators[i] += "-";
+        }
+    }
+
+    for (size_t r = 0; r < result.result_set().rows().size() + 1; ++r) {
+        std::vector<sstring> row;
+        row.reserve(columns.size());
+        for (size_t i = 0; i < columns.size(); ++i) {
+            const auto& format = r == 0 ? columns[i].header_format : columns[i].row_format;
+            row.push_back(fmt::format(fmt::runtime(std::string_view(format)), columns[i].values[r]));
+        }
+        fmt::print(os, "{}\n", fmt::join(row, "|"));
+        if (!r) {
+            fmt::print(os, "-{}-\n", fmt::join(separators, "-+-"));
+        }
+    }
+}
+
+void print_query_results_json(std::ostream& os, const cql3::result& result) {
+    const auto& metadata = result.get_metadata();
+    const auto& column_metadata = metadata.get_names();
+
+    rjson::streaming_writer writer(os);
+
+    writer.StartArray();
+    for (const auto& row : result.result_set().rows()) {
+        writer.StartObject();
+        for (size_t i = 0; i < row.size(); ++i) {
+            writer.Key(column_metadata[i]->name->text());
+            if (!row[i] || row[i]->empty()) {
+                writer.Null();
+                continue;
+            }
+            const auto value = to_json_string(*column_metadata[i]->type, *row[i]);
+            const auto type = to_json_type(*column_metadata[i]->type, *row[i]);
+            writer.RawValue(value, type);
+        }
+        writer.EndObject();
+    }
+    writer.EndArray();
+}
+
 }
--- a/cql3/result_set.hh
+++ b/cql3/result_set.hh
@@ -73,6 +73,7 @@ public:
    uint32_t value_count() const;

    void add_non_serialized_column(lw_shared_ptr<column_specification> name);
+    void hide_last_column();

 public:
    void set_paging_state(lw_shared_ptr<const service::pager::paging_state> paging_state);
--- a/cql3/selection/selectable.cc
+++ b/cql3/selection/selectable.cc
@@ -32,7 +32,7 @@ bool
 selectable_processes_selection(const expr::expression& selectable) {
    return expr::visit(overloaded_functor{
        [&] (const expr::constant&) -> bool {
-            on_internal_error(slogger, "no way to express SELECT constant in the grammar yet");
+            return true;
        },
        [&] (const expr::conjunction& conj) -> bool {
            on_internal_error(slogger, "no way to express 'SELECT a AND b' in the grammar yet");
--- a/cql3/statements/alter_keyspace_statement.cc
+++ b/cql3/statements/alter_keyspace_statement.cc
@@ -19,7 +19,7 @@
 #include "locator/abstract_replication_strategy.hh"
 #include "mutation/canonical_mutation.hh"
 #include "prepared_statement.hh"
-#include "seastar/coroutine/exception.hh"
+#include <seastar/coroutine/exception.hh>
 #include "service/migration_manager.hh"
 #include "service/storage_proxy.hh"
 #include "service/topology_mutation.hh"
@@ -206,8 +206,9 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
                locator::replication_strategy_params(ks_md_update->strategy_options(), ks_md_update->initial_tablets(), ks_md_update->consistency_option()),
                topo);

-        // If `rf_rack_valid_keyspaces` is enabled, it's forbidden to perform a schema change that
-        // would lead to an RF-rack-valid keyspace. Verify that this change does not.
+        // If RF-rack-validity must be enforced for the keyspace according to `enforce_rf_rack_validity_for_keyspace`,
+        // it's forbidden to perform a schema change that would lead to an RF-rack-invalid keyspace.
+        // Verify that this change does not.
        // For more context, see: scylladb/scylladb#23071.
        try {
            // There are two things to note here:
@@ -224,14 +225,13 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
            //    The second hyphen is not really true because currently topological changes can
            //    disturb it (see scylladb/scylladb#23345), but we ignore that.
            locator::assert_rf_rack_valid_keyspace(_name, tmptr, *rs);
-        } catch (const std::exception& e) {
-            if (qp.db().get_config().rf_rack_valid_keyspaces()) {
-                // There's no guarantee what the type of the exception will be, so we need to
-                // wrap it manually here in a type that can be passed to the user.
+        } catch (const std::invalid_argument& e) {
+            if (replica::database::enforce_rf_rack_validity_for_keyspace(qp.db().get_config(), *ks_md)) {
+                // wrap the exception manually here in a type that can be passed to the user.
                throw exceptions::invalid_request_exception(e.what());
            } else {
-                // Even when the configuration option `rf_rack_valid_keyspaces` is set to false,
-                // we'd like to inform the user that the keyspace they're altering will not
+                // Even when RF-rack-validity is not enforced for the keyspace, we'd
+                // like to inform the user that the keyspace they're altering will not
                // satisfy the restriction after the change--but just as a warning.
                // For more context, see issue: scylladb/scylladb#23330.
                warnings.push_back(seastar::format(
--- a/cql3/statements/alter_table_statement.cc
+++ b/cql3/statements/alter_table_statement.cc
@@ -10,6 +10,7 @@

 #include "cdc/log.hh"
 #include "index/vector_index.hh"
+#include "types/types.hh"
 #include "utils/assert.hh"
 #include <seastar/core/coroutine.hh>
 #include "cql3/query_options.hh"
@@ -30,6 +31,9 @@
 #include "cql3/query_processor.hh"
 #include "cdc/cdc_extension.hh"
 #include "cdc/cdc_partitioner.hh"
+#include "db/tags/extension.hh"
+#include "db/tags/utils.hh"
+#include "alternator/ttl_tag.hh"

 namespace cql3 {

@@ -43,7 +47,8 @@ alter_table_statement::alter_table_statement(uint32_t bound_terms,
                                             std::vector<column_change> column_changes,
                                             std::optional<cf_prop_defs> properties,
                                             renames_type renames,
-                                             std::unique_ptr<attributes> attrs)
+                                             std::unique_ptr<attributes> attrs,
+                                             shared_ptr<column_identifier::raw> ttl_change)
    : schema_altering_statement(std::move(name))
    , _bound_terms(bound_terms)
    , _type(t)
@@ -51,6 +56,7 @@ alter_table_statement::alter_table_statement(uint32_t bound_terms,
    , _properties(std::move(properties))
    , _renames(std::move(renames))
    , _attrs(std::move(attrs))
+    , _ttl_change(std::move(ttl_change))
 {
 }

@@ -380,6 +386,21 @@ std::pair<schema_ptr, std::vector<view_ptr>> alter_table_statement::prepare_sche
            throw exceptions::invalid_request_exception("Cannot drop columns from a non-CQL3 table");
        }
        invoke_column_change_fn(std::mem_fn(&alter_table_statement::drop_column));
+
+        // If we dropped the column used for per-row TTL, we need to remove the tag.
+        if (std::optional<std::string> ttl_column = db::find_tag(*s, TTL_TAG_KEY)) {
+            for (auto& [raw_name, raw_validator, is_static] : _column_changes) {
+                if (*ttl_column == raw_name->text()) {
+                    const std::map<sstring, sstring>* tags_ptr = db::get_tags_of_table(s);
+                    if (tags_ptr) {
+                        std::map<sstring, sstring> tags_map = *tags_ptr;
+                        tags_map.erase(TTL_TAG_KEY);
+                        cfm.add_extension(db::tags_extension::NAME, ::make_shared<db::tags_extension>(std::move(tags_map)));
+                    }
+                    break;
+                }
+            }
+        }
        break;

    case alter_table_statement::type::opts:
@@ -434,6 +455,7 @@ std::pair<schema_ptr, std::vector<view_ptr>> alter_table_statement::prepare_sche
        break;

    case alter_table_statement::type::rename:
+    {
        for (auto&& entry : _renames) {
            auto from = entry.first->prepare_column_identifier(*s);
            auto to = entry.second->prepare_column_identifier(*s);
@@ -470,6 +492,53 @@ std::pair<schema_ptr, std::vector<view_ptr>> alter_table_statement::prepare_sche
        }
        return make_pair(std::move(new_base_schema), std::move(view_updates));
    }
+    case alter_table_statement::type::ttl:
+        if (!db.features().cql_row_ttl) {
+            throw exceptions::invalid_request_exception("The CQL per-row TTL feature is not yet supported by this cluster. Upgrade all nodes to use it.");
+        }
+        if (_ttl_change) {
+            // Enable per-row TTL with chosen column for expiration time
+            const column_definition *cdef = 
+                s->get_column_definition(to_bytes(_ttl_change->text()));
+            if (!cdef) {
+                throw exceptions::invalid_request_exception(fmt::format("Column '{}' does not exist in table {}.{}", _ttl_change->text(), keyspace(), column_family()));
+            }
+            if (cdef->type != timestamp_type && cdef->type != long_type && cdef->type != int32_type) {
+                throw exceptions::invalid_request_exception(fmt::format("TTL column {} must be of type timestamp, bigint or int, can't be {}", _ttl_change->text(), cdef->type->as_cql3_type().to_string()));
+            }
+            if (cdef->is_primary_key()) {
+                throw exceptions::invalid_request_exception(fmt::format("Cannot use a primary key column {} as a TTL column", _ttl_change->text()));
+            }
+            if (cdef->is_static()) {
+                throw exceptions::invalid_request_exception(fmt::format("Cannot use a static column {} as a TTL column", _ttl_change->text()));
+            }
+            std::optional<std::string> old_ttl_column = db::find_tag(*s, TTL_TAG_KEY);
+            if (old_ttl_column) {
+                throw exceptions::invalid_request_exception(fmt::format("Cannot set TTL column, table {}.{} already has a TTL column defined: {}", keyspace(), column_family(), *old_ttl_column));
+            }
+            const std::map<sstring, sstring>* old_tags_ptr = db::get_tags_of_table(s);
+            std::map<sstring, sstring> tags_map;
+            if (old_tags_ptr) {
+                // tags_ptr is a constant pointer to schema data. To modify
+                // it, we must make a copy.
+                tags_map = *old_tags_ptr;
+            }
+            tags_map[TTL_TAG_KEY] = _ttl_change->text();
+            cfm.add_extension(db::tags_extension::NAME, ::make_shared<db::tags_extension>(std::move(tags_map)));
+        } else {
+            // Disable per-row TTL
+            const std::map<sstring, sstring>* tags_ptr = db::get_tags_of_table(s);
+            if (!tags_ptr || tags_ptr->find(TTL_TAG_KEY) == tags_ptr->end()) {
+                throw exceptions::invalid_request_exception(fmt::format("Cannot unset TTL column, table {}.{} does not have a TTL column set", keyspace(), column_family()));
+            }
+            // tags_ptr is a constant pointer to schema data. To modify it, we
+            // must make a copy.
+            std::map<sstring, sstring> tags_map = *tags_ptr;
+            tags_map.erase(TTL_TAG_KEY);
+            cfm.add_extension(db::tags_extension::NAME, ::make_shared<db::tags_extension>(std::move(tags_map)));
+        }
+        break;
+    }

    return make_pair(cfm.build(), std::move(view_updates));
 }
@@ -508,13 +577,15 @@ alter_table_statement::raw_statement::raw_statement(cf_name name,
                                                    std::vector<column_change> column_changes,
                                                    std::optional<cf_prop_defs> properties,
                                                    renames_type renames,
-                                                    std::unique_ptr<attributes::raw> attrs)
+                                                    std::unique_ptr<attributes::raw> attrs,
+                                                    shared_ptr<column_identifier::raw> ttl_change)
    : cf_statement(std::move(name))
    , _type(t)
    , _column_changes(std::move(column_changes))
    , _properties(std::move(properties))
    , _renames(std::move(renames))
    , _attrs(std::move(attrs))
+    , _ttl_change(std::move(ttl_change))
    {}

 std::unique_ptr<cql3::statements::prepared_statement>
@@ -539,7 +610,8 @@ alter_table_statement::raw_statement::prepare(data_dictionary::database db, cql_
                _column_changes,
                _properties,
                _renames,
-                std::move(prepared_attrs)
+                std::move(prepared_attrs),
+                _ttl_change
            ),
            ctx,
            // since alter table is `cql_statement_no_metadata` (it doesn't return any metadata when preparing)
--- a/cql3/statements/alter_table_statement.hh
+++ b/cql3/statements/alter_table_statement.hh
@@ -32,6 +32,7 @@ public:
        drop,
        opts,
        rename,
+        ttl,
    };
    using renames_type = std::vector<std::pair<shared_ptr<column_identifier::raw>,
                                               shared_ptr<column_identifier::raw>>>;
@@ -50,6 +51,7 @@ private:
    const std::optional<cf_prop_defs> _properties;
    const renames_type _renames;
    const std::unique_ptr<attributes> _attrs;
+    shared_ptr<column_identifier::raw> _ttl_change;
 public:
    alter_table_statement(uint32_t bound_terms,
                          cf_name name,
@@ -57,7 +59,8 @@ public:
                          std::vector<column_change> column_changes,
                          std::optional<cf_prop_defs> properties,
                          renames_type renames,
-                          std::unique_ptr<attributes> attrs);
+                          std::unique_ptr<attributes> attrs,
+                          shared_ptr<column_identifier::raw> ttl_change);

    virtual uint32_t get_bound_terms() const override;
    virtual future<> check_access(query_processor& qp, const service::client_state& state) const override;
@@ -78,6 +81,7 @@ class alter_table_statement::raw_statement : public raw::cf_statement {
    const std::optional<cf_prop_defs> _properties;
    const alter_table_statement::renames_type _renames;
    const std::unique_ptr<attributes::raw> _attrs;
+    shared_ptr<column_identifier::raw> _ttl_change;

 public:
    raw_statement(cf_name name,
@@ -85,7 +89,8 @@ public:
                  std::vector<column_change> column_changes,
                  std::optional<cf_prop_defs> properties,
                  renames_type renames,
-                  std::unique_ptr<attributes::raw> attrs);
+                  std::unique_ptr<attributes::raw> attrs,
+                  shared_ptr<column_identifier::raw> ttl_change);
    
    virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;

--- a/cql3/statements/alter_view_statement.cc
+++ b/cql3/statements/alter_view_statement.cc
@@ -11,6 +11,7 @@
 #include <seastar/core/coroutine.hh>
 #include "cql3/statements/alter_view_statement.hh"
 #include "cql3/statements/prepared_statement.hh"
+#include "cql3/statements/view_prop_defs.hh"
 #include "service/migration_manager.hh"
 #include "service/storage_proxy.hh"
 #include "validation.hh"
@@ -22,7 +23,7 @@ namespace cql3 {

 namespace statements {

-alter_view_statement::alter_view_statement(cf_name view_name, std::optional<cf_prop_defs> properties)
+alter_view_statement::alter_view_statement(cf_name view_name, std::optional<view_prop_defs> properties)
        : schema_altering_statement{std::move(view_name)}
        , _properties{std::move(properties)}
 {
@@ -52,8 +53,8 @@ view_ptr alter_view_statement::prepare_view(data_dictionary::database db) const
        throw exceptions::invalid_request_exception("ALTER MATERIALIZED VIEW WITH invoked, but no parameters found");
    }

-    auto schema_extensions = _properties->make_schema_extensions(db.extensions());
-    _properties->validate(db, keyspace(), schema_extensions);
+    auto schema_extensions = _properties->properties()->make_schema_extensions(db.extensions());
+    _properties->validate_raw(view_prop_defs::op_type::alter, db, keyspace(), schema_extensions);

    bool is_colocated = [&] {
        if (!db.find_keyspace(keyspace()).get_replication_strategy().uses_tablets()) {
@@ -70,28 +71,15 @@ view_ptr alter_view_statement::prepare_view(data_dictionary::database db) const
    }();

    if (is_colocated) {
-        auto gc_opts = _properties->get_tombstone_gc_options(schema_extensions);
+        auto gc_opts = _properties->properties()->get_tombstone_gc_options(schema_extensions);
        if (gc_opts && gc_opts->mode() == tombstone_gc_mode::repair) {
            throw exceptions::invalid_request_exception("The 'repair' mode for tombstone_gc is not allowed on co-located materialized view tables.");
        }
    }

    auto builder = schema_builder(schema);
-    _properties->apply_to_builder(builder, std::move(schema_extensions), db, keyspace(), !is_colocated);
-
-    if (builder.get_gc_grace_seconds() == 0) {
-        throw exceptions::invalid_request_exception(
-                "Cannot alter gc_grace_seconds of a materialized view to 0, since this "
-                "value is used to TTL undelivered updates. Setting gc_grace_seconds too "
-                "low might cause undelivered updates to expire before being replayed.");
-    }
-
-    if (builder.default_time_to_live().count() > 0) {
-        throw exceptions::invalid_request_exception(
-                "Cannot set or alter default_time_to_live for a materialized view. "
-                "Data in a materialized view always expire at the same time than "
-                "the corresponding data in the parent table.");
-    }
+    _properties->apply_to_builder(view_prop_defs::op_type::alter, builder, std::move(schema_extensions),
+            db, keyspace(), is_colocated);

    return view_ptr(builder.build());
 }
--- a/cql3/statements/alter_view_statement.hh
+++ b/cql3/statements/alter_view_statement.hh
@@ -12,8 +12,8 @@

 #include <seastar/core/shared_ptr.hh>

+#include "cql3/statements/view_prop_defs.hh"
 #include "data_dictionary/data_dictionary.hh"
-#include "cql3/statements/cf_prop_defs.hh"
 #include "cql3/statements/schema_altering_statement.hh"

 namespace cql3 {
@@ -26,10 +26,10 @@ namespace statements {
 /** An <code>ALTER MATERIALIZED VIEW</code> parsed from a CQL query statement. */
 class alter_view_statement : public schema_altering_statement {
 private:
-    std::optional<cf_prop_defs> _properties;
+    std::optional<view_prop_defs> _properties;
    view_ptr prepare_view(data_dictionary::database db) const;
 public:
-    alter_view_statement(cf_name view_name, std::optional<cf_prop_defs> properties);
+    alter_view_statement(cf_name view_name, std::optional<view_prop_defs> properties);

    virtual future<> check_access(query_processor& qp, const service::client_state& state) const override;

--- a/cql3/statements/batch_statement.cc
+++ b/cql3/statements/batch_statement.cc
@@ -190,7 +190,7 @@ future<utils::chunked_vector<mutation>> batch_statement::get_mutations(query_pro
    co_return vresult;
 }

-void batch_statement::verify_batch_size(query_processor& qp, const utils::chunked_vector<mutation>& mutations) {
+void batch_statement::verify_batch_size(query_processor& qp, const utils::chunked_vector<mutation>& mutations) const {
    if (mutations.size() <= 1) {
        return;     // We only warn for batch spanning multiple mutations
    }
@@ -209,8 +209,9 @@ void batch_statement::verify_batch_size(query_processor& qp, const utils::chunke
            for (auto&& m : mutations) {
                ks_cf_pairs.insert(m.schema()->ks_name() + "." + m.schema()->cf_name());
            }
-            return seastar::format("Batch modifying {:d} partitions in {} is of size {:d} bytes, exceeding specified {} threshold of {:d} by {:d}.",
-                    mutations.size(), fmt::join(ks_cf_pairs, ", "), size, type, threshold, size - threshold);
+            const auto batch_type = _type == type::LOGGED ? "Logged" : "Unlogged";
+            return seastar::format("{} batch modifying {:d} partitions in {} is of size {:d} bytes, exceeding specified {} threshold of {:d} by {:d}.",
+                    batch_type, mutations.size(), fmt::join(ks_cf_pairs, ", "), size, type, threshold, size - threshold);
        };
        if (size > fail_threshold) {
            _logger.error("{}", error("FAIL", fail_threshold).c_str());
--- a/cql3/statements/batch_statement.hh
+++ b/cql3/statements/batch_statement.hh
@@ -116,7 +116,7 @@ public:
     * Checks batch size to ensure threshold is met. If not, a warning is logged.
     * @param cfs ColumnFamilies that will store the batch's mutations.
     */
-    static void verify_batch_size(query_processor& qp, const utils::chunked_vector<mutation>& mutations);
+    void verify_batch_size(query_processor& qp, const utils::chunked_vector<mutation>& mutations) const;

    virtual future<shared_ptr<cql_transport::messages::result_message>> execute(
            query_processor& qp, service::query_state& state, const query_options& options, std::optional<service::group0_guard> guard) const override;
--- a/cql3/statements/broadcast_modification_statement.cc
+++ b/cql3/statements/broadcast_modification_statement.cc
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2022-present ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
+ */
+
+
+#include "cql3/statements/broadcast_modification_statement.hh"
+
+#include <optional>
+
+#include <seastar/core/future.hh>
+#include <seastar/util/variant_utils.hh>
+
+#include "bytes.hh"
+#include "cql3/attributes.hh"
+#include "cql3/expr/expression.hh"
+#include "cql3/expr/evaluate.hh"
+#include "cql3/query_processor.hh"
+#include "cql3/values.hh"
+#include "timeout_config.hh"
+#include "service/broadcast_tables/experimental/lang.hh"
+#include "db/system_keyspace.hh"
+
+namespace cql3 {
+
+static logging::logger logger("broadcast_modification_statement");
+
+namespace statements {
+
+broadcast_modification_statement::broadcast_modification_statement(
+    uint32_t bound_terms,
+    schema_ptr schema,
+    broadcast_tables::prepared_update query)
+    : cql_statement_opt_metadata{&timeout_config::write_timeout}
+    , _bound_terms{bound_terms}
+    , _schema{schema}
+    , _query{std::move(query)}
+{ }
+
+future<::shared_ptr<cql_transport::messages::result_message>>
+broadcast_modification_statement::execute(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
+    return execute_without_checking_exception_message(qp, qs, options, std::move(guard))
+            .then(cql_transport::messages::propagate_exception_as_future<shared_ptr<cql_transport::messages::result_message>>);
+}
+
+static
+service::broadcast_tables::update_query
+evaluate_prepared(
+    const broadcast_tables::prepared_update& query,
+    const query_options& options) {
+    return service::broadcast_tables::update_query{
+        .key = expr::evaluate(query.key, options).to_bytes(),
+        .new_value = expr::evaluate(query.new_value, options).to_bytes(),
+        .value_condition = query.value_condition
+            ? std::optional<bytes_opt>{expr::evaluate(*query.value_condition, options).to_bytes_opt()}
+            : std::nullopt
+    };
+}
+
+future<::shared_ptr<cql_transport::messages::result_message>>
+broadcast_modification_statement::execute_without_checking_exception_message(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
+    if (this_shard_id() != 0) {
+        co_return ::make_shared<cql_transport::messages::result_message::bounce_to_shard>(0, cql3::computed_function_values{});
+    }
+
+    auto result = co_await qp.execute_broadcast_table_query(
+        { evaluate_prepared(_query, options) }
+    );
+
+    co_return co_await std::visit(make_visitor(
+        [] (service::broadcast_tables::query_result_conditional_update& qr) -> future<::shared_ptr<cql_transport::messages::result_message>> {
+            auto result_set = std::make_unique<cql3::result_set>(std::vector{
+                make_lw_shared<cql3::column_specification>(
+                    db::system_keyspace::NAME,
+                    db::system_keyspace::BROADCAST_KV_STORE,
+                    ::make_shared<cql3::column_identifier>("[applied]", false),
+                    boolean_type
+                ),
+                make_lw_shared<cql3::column_specification>(
+                    db::system_keyspace::NAME,
+                    db::system_keyspace::BROADCAST_KV_STORE,
+                    ::make_shared<cql3::column_identifier>("value", true),
+                    utf8_type
+                )
+            });
+
+            result_set->add_row({ boolean_type->decompose(qr.is_applied), qr.previous_value });
+
+            return make_ready_future<::shared_ptr<cql_transport::messages::result_message>>(
+                ::make_shared<cql_transport::messages::result_message::rows>(cql3::result{std::move(result_set)}));
+        },
+        [] (service::broadcast_tables::query_result_none&) -> future<::shared_ptr<cql_transport::messages::result_message>> {
+            return make_ready_future<::shared_ptr<cql_transport::messages::result_message>>();
+        },
+        [] (service::broadcast_tables::query_result_select&) -> future<::shared_ptr<cql_transport::messages::result_message>> {
+            on_internal_error(logger, "incorrect query result ");
+        }
+    ), result);
+}
+
+uint32_t broadcast_modification_statement::get_bound_terms() const {
+    return _bound_terms;
+}
+
+future<> broadcast_modification_statement::check_access(query_processor& qp, const service::client_state& state) const {
+    auto f = state.has_column_family_access(_schema->ks_name(), _schema->cf_name(), auth::permission::MODIFY);
+    if (_query.value_condition.has_value()) {
+        f = f.then([this, &state] {
+           return state.has_column_family_access(_schema->ks_name(), _schema->cf_name(), auth::permission::SELECT);
+        });
+    }
+    return f;
+}
+
+bool broadcast_modification_statement::depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const {
+    return _schema->ks_name() == ks_name && (!cf_name || _schema->cf_name() == *cf_name);
+}
+
+}
+
+}
--- a/cql3/statements/broadcast_modification_statement.hh
+++ b/cql3/statements/broadcast_modification_statement.hh
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2022-present ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
+ */
+
+#pragma once
+
+#include "cql3/cql_statement.hh"
+#include "cql3/statements/modification_statement.hh"
+
+namespace cql3 {
+
+namespace statements {
+
+namespace broadcast_tables {
+
+struct prepared_update {
+    expr::expression key;
+    expr::expression new_value;
+    std::optional<expr::expression> value_condition;
+};
+
+}
+
+class broadcast_modification_statement : public cql_statement_opt_metadata {
+    const uint32_t _bound_terms;
+    const schema_ptr _schema;
+    const broadcast_tables::prepared_update _query;
+
+public:
+    broadcast_modification_statement(uint32_t bound_terms, schema_ptr schema, broadcast_tables::prepared_update query);
+
+    virtual future<::shared_ptr<cql_transport::messages::result_message>>
+    execute(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const override;
+
+    virtual future<::shared_ptr<cql_transport::messages::result_message>>
+    execute_without_checking_exception_message(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const override;
+
+    virtual uint32_t get_bound_terms() const override;
+
+    virtual future<> check_access(query_processor& qp, const service::client_state& state) const override;
+
+    virtual bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const override;
+};
+
+
+}
+
+}
--- a/cql3/statements/broadcast_select_statement.cc
+++ b/cql3/statements/broadcast_select_statement.cc
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2022-present ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
+ */
+
+
+#include "cql3/statements/broadcast_select_statement.hh"
+
+#include <seastar/core/future.hh>
+#include <seastar/core/on_internal_error.hh>
+
+#include "cql3/restrictions/statement_restrictions.hh"
+#include "cql3/expr/evaluate.hh"
+#include "cql3/query_processor.hh"
+#include "service/broadcast_tables/experimental/lang.hh"
+#include "db/system_keyspace.hh"
+
+namespace cql3 {
+
+namespace statements {
+
+static logging::logger logger("broadcast_select_statement");
+
+static
+expr::expression get_key(const cql3::expr::expression& partition_key_restrictions) {
+    const auto* conjunction = cql3::expr::as_if<cql3::expr::conjunction>(&partition_key_restrictions);
+
+    if (!conjunction || conjunction->children.size() != 1) {
+        throw service::broadcast_tables::unsupported_operation_error(fmt::format(
+            "partition key restriction: {}", partition_key_restrictions));
+    }
+
+    const auto* key_restriction = cql3::expr::as_if<cql3::expr::binary_operator>(&conjunction->children[0]);
+
+    if (!key_restriction) {
+        throw service::broadcast_tables::unsupported_operation_error(fmt::format("partition key restriction: {}", *conjunction));
+    }
+
+    const auto* column = cql3::expr::as_if<cql3::expr::column_value>(&key_restriction->lhs);
+
+    if (!column || column->col->kind != column_kind::partition_key ||
+        key_restriction->op != cql3::expr::oper_t::EQ) {
+        throw service::broadcast_tables::unsupported_operation_error(fmt::format("key restriction: {}", *key_restriction));
+    }
+
+    return key_restriction->rhs;
+}
+
+static
+bool is_selecting_only_value(const cql3::selection::selection& selection) {
+    return selection.is_trivial() &&
+           selection.get_column_count() == 1 &&
+           selection.get_columns()[0]->name() == "value";
+}
+
+broadcast_select_statement::broadcast_select_statement(schema_ptr schema, uint32_t bound_terms,
+                                                                           lw_shared_ptr<const parameters> parameters,
+                                                                           ::shared_ptr<selection::selection> selection,
+                                                                           ::shared_ptr<const restrictions::statement_restrictions> restrictions,
+                                                                           ::shared_ptr<std::vector<size_t>> group_by_cell_indices,
+                                                                           bool is_reversed,
+                                                                           ordering_comparator_type ordering_comparator,
+                                                                           std::optional<expr::expression> limit,
+                                                                           std::optional<expr::expression> per_partition_limit,
+                                                                           cql_stats &stats,
+                                                                           std::unique_ptr<attributes> attrs)
+    : select_statement{schema, bound_terms, parameters, selection, restrictions, group_by_cell_indices, is_reversed, ordering_comparator, std::move(limit), std::move(per_partition_limit), stats, std::move(attrs)},
+      _query{prepare_query()}
+{ }
+
+broadcast_tables::prepared_select broadcast_select_statement::prepare_query() const {
+    if (!is_selecting_only_value(*_selection)) {
+        throw service::broadcast_tables::unsupported_operation_error("only 'value' selector is allowed");
+    }
+
+    return {
+        .key = get_key(_restrictions->get_partition_key_restrictions())
+    };
+}
+
+static
+service::broadcast_tables::select_query
+evaluate_prepared(
+    const broadcast_tables::prepared_select& query,
+    const query_options& options) {
+    return service::broadcast_tables::select_query{
+        .key = expr::evaluate(query.key, options).to_bytes()
+    };
+}
+
+future<::shared_ptr<cql_transport::messages::result_message>>
+broadcast_select_statement::execute_without_checking_exception_message(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
+    if (this_shard_id() != 0) {
+        co_return ::make_shared<cql_transport::messages::result_message::bounce_to_shard>(0, cql3::computed_function_values{});
+    }
+
+    auto result = co_await qp.execute_broadcast_table_query(
+        { evaluate_prepared(_query, options) }
+    );
+
+    auto query_result = std::get_if<service::broadcast_tables::query_result_select>(&result);
+
+    if (!query_result) {
+        on_internal_error(logger, "incorrect query result ");
+    }
+
+    auto result_set = std::make_unique<cql3::result_set>(std::vector{
+        make_lw_shared<cql3::column_specification>(
+            db::system_keyspace::NAME,
+            db::system_keyspace::BROADCAST_KV_STORE,
+            ::make_shared<cql3::column_identifier>("value", true),
+            utf8_type
+        )
+    });
+
+    if (query_result->value) {
+        result_set->add_row({ managed_bytes_opt(query_result->value) });
+    }
+
+    co_return ::make_shared<cql_transport::messages::result_message::rows>(cql3::result{std::move(result_set)});
+}
+
+}
+
+}
--- a/cql3/statements/broadcast_select_statement.hh
+++ b/cql3/statements/broadcast_select_statement.hh
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2022-present ScyllaDB
+ *
+ * Modified by ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
+ */
+
+#pragma once
+
+#include "cql3/expr/expression.hh"
+#include "cql3/statements/select_statement.hh"
+
+namespace cql3 {
+
+namespace statements {
+
+namespace broadcast_tables {
+
+struct prepared_select {
+    expr::expression key;
+};
+
+}
+
+class broadcast_select_statement : public select_statement {
+    const broadcast_tables::prepared_select _query;
+
+    broadcast_tables::prepared_select prepare_query() const;
+public:
+    broadcast_select_statement(schema_ptr schema,
+                     uint32_t bound_terms,
+                     lw_shared_ptr<const parameters> parameters,
+                     ::shared_ptr<selection::selection> selection,
+                     ::shared_ptr<const restrictions::statement_restrictions> restrictions,
+                     ::shared_ptr<std::vector<size_t>> group_by_cell_indices,
+                     bool is_reversed,
+                     ordering_comparator_type ordering_comparator,
+                     std::optional<expr::expression> limit,
+                     std::optional<expr::expression> per_partition_limit,
+                     cql_stats &stats,
+                     std::unique_ptr<cql3::attributes> attrs);
+
+    virtual future<::shared_ptr<cql_transport::messages::result_message>>
+        execute_without_checking_exception_message(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const override;
+};
+
+}
+
+}
--- a/cql3/statements/cf_properties.hh
+++ b/cql3/statements/cf_properties.hh
@@ -19,7 +19,8 @@ namespace statements {
 /**
 * Class for common statement properties.
 */
-class cf_properties final {
+class cf_properties {
+protected:
    const ::shared_ptr<cf_prop_defs> _properties = ::make_shared<cf_prop_defs>();
    bool _use_compact_storage = false;
    std::vector<std::pair<::shared_ptr<column_identifier>, bool>> _defined_ordering; // Insertion ordering is important
--- a/cql3/statements/create_index_statement.cc
+++ b/cql3/statements/create_index_statement.cc
@@ -14,6 +14,7 @@
 #include "db/view/view.hh"
 #include "exceptions/exceptions.hh"
 #include "index/vector_index.hh"
+#include "locator/token_metadata_fwd.hh"
 #include "prepared_statement.hh"
 #include "replica/database.hh"
 #include "types/types.hh"
@@ -218,18 +219,24 @@ view_ptr create_index_statement::create_view_for_index(const schema_ptr schema,
        std::map<sstring, sstring> tags_map = {{db::SYNCHRONOUS_VIEW_UPDATES_TAG_KEY, "true"}};
        builder.add_extension(db::tags_extension::NAME, ::make_shared<db::tags_extension>(tags_map));
    }
+
+    const schema::extensions_map exts = _view_properties.properties()->make_schema_extensions(db.extensions());
+    _view_properties.apply_to_builder(view_prop_defs::op_type::create, builder, exts, db, keyspace(), is_colocated);
+
    return view_ptr{builder.build()};
 }

 create_index_statement::create_index_statement(cf_name name,
                                               ::shared_ptr<index_name> index_name,
                                               std::vector<::shared_ptr<index_target::raw>> raw_targets,
-                                               ::shared_ptr<index_prop_defs> properties,
+                                               ::shared_ptr<index_specific_prop_defs> idx_properties,
+                                               view_prop_defs view_properties,
                                               bool if_not_exists)
    : schema_altering_statement(name)
    , _index_name(index_name->get_idx())
    , _raw_targets(raw_targets)
-    , _properties(properties)
+    , _idx_properties(std::move(idx_properties))
+    , _view_properties(std::move(view_properties))
    , _if_not_exists(if_not_exists)
 {
 }
@@ -252,14 +259,53 @@ static sstring target_type_name(index_target::target_type type) {
 void
 create_index_statement::validate(query_processor& qp, const service::client_state& state) const
 {
-    if (_raw_targets.empty() && !_properties->is_custom) {
+    if (_raw_targets.empty() && !_idx_properties->is_custom) {
        throw exceptions::invalid_request_exception("Only CUSTOM indexes can be created without specifying a target column");
    }

-    _properties->validate();
+    _idx_properties->validate();
+
+    // FIXME: This is ugly and can be improved.
+    const bool is_vector_index = _idx_properties->custom_class && *_idx_properties->custom_class == "vector_index";
+    const bool uses_view_properties = _view_properties.properties()->count() > 0
+            || _view_properties.use_compact_storage()
+            || _view_properties.defined_ordering().size() > 0;
+
+    if (is_vector_index && uses_view_properties) {
+        throw exceptions::invalid_request_exception("You cannot use view properties with a vector index");
+    }
+
+    const schema::extensions_map exts = _view_properties.properties()->make_schema_extensions(qp.db().extensions());
+    _view_properties.validate_raw(view_prop_defs::op_type::create, qp.db(), keyspace(), exts);
+
+    // These keywords are still accepted by other schema entities, but they don't have effect on them.
+    // Since indexes are not bound by any backward compatibility contract in this regard, let's forbid these.
+    static sstring obsolete_keywords[] = {
+        "index_interval",
+        "replicate_on_write",
+        "populate_io_cache_on_flush",
+        "read_repair_chance",
+        "dclocal_read_repair_chance",
+    };
+
+    for (const sstring& keyword : obsolete_keywords) {
+        if (_view_properties.properties()->has_property(keyword)) {
+            // We use the same type of exception and the same error message as would be thrown for
+            // an invalid property via `_view_properties.validate_raw`.
+            throw exceptions::syntax_exception(seastar::format("Unknown property '{}'", keyword));
+        }
+    }
+
+    // FIXME: This is a temporary limitation as it might deserve more attention.
+    if (!_view_properties.defined_ordering().empty()) {
+        throw exceptions::invalid_request_exception("Indexes do not allow for specifying the clustering order");
+    }
 }

-std::vector<::shared_ptr<index_target>> create_index_statement::validate_while_executing(data_dictionary::database db) const {
+std::pair<std::vector<::shared_ptr<index_target>>, cql3::cql_warnings_vec>
+create_index_statement::validate_while_executing(data_dictionary::database db, locator::token_metadata_ptr tmptr) const {
+    cql3::cql_warnings_vec warnings;
+
    auto schema = validation::validate_column_family(db, keyspace(), column_family());

    if (schema->is_counter()) {
@@ -281,13 +327,22 @@ std::vector<::shared_ptr<index_target>> create_index_statement::validate_while_e

    // Regular secondary indexes require rf-rack-validity.
    // Custom indexes need to validate this property themselves, if they need it.
-    if (!_properties || !_properties->custom_class) {
+    if (!_idx_properties || !_idx_properties->custom_class) {
        try {
-            db::view::validate_view_keyspace(db, keyspace());
+            db::view::validate_view_keyspace(db, keyspace(), tmptr);
        } catch (const std::exception& e) {
            // The type of the thrown exception is not specified, so we need to wrap it here.
            throw exceptions::invalid_request_exception(e.what());
        }
+
+        if (db.find_keyspace(keyspace()).uses_tablets()) {
+            warnings.emplace_back(
+                "Creating an index in a keyspace that uses tablets requires "
+                "the keyspace to remain RF-rack-valid while the index exists. "
+                "Some operations will be restricted to enforce this: altering the keyspace's replication "
+                "factor, adding a node in a new rack, and removing or decommissioning a node that would "
+                "eliminate a rack.");
+        }
    }

    validate_for_local_index(*schema);
@@ -297,14 +352,14 @@ std::vector<::shared_ptr<index_target>> create_index_statement::validate_while_e
        targets.emplace_back(raw_target->prepare(*schema));
    }

-    if (_properties && _properties->custom_class) {
-        auto custom_index_factory = secondary_index::secondary_index_manager::get_custom_class_factory(*_properties->custom_class);
+    if (_idx_properties && _idx_properties->custom_class) {
+        auto custom_index_factory = secondary_index::secondary_index_manager::get_custom_class_factory(*_idx_properties->custom_class);
        if (!custom_index_factory) {
-            throw exceptions::invalid_request_exception(format("Non-supported custom class \'{}\' provided", *(_properties->custom_class)));
+            throw exceptions::invalid_request_exception(format("Non-supported custom class \'{}\' provided", *_idx_properties->custom_class));
        }
        auto custom_index = (*custom_index_factory)();
-        custom_index->validate(*schema, *_properties, targets, db.features(), db);
-        _properties->index_version = custom_index->index_version(*schema);
+        custom_index->validate(*schema, *_idx_properties, targets, db.features(), db);
+        _idx_properties->index_version = custom_index->index_version(*schema);
    }

    if (targets.size() > 1) {
@@ -384,7 +439,7 @@ std::vector<::shared_ptr<index_target>> create_index_statement::validate_while_e
        }
    }

-    return targets;
+    return std::make_pair(std::move(targets), std::move(warnings));
 }

 void create_index_statement::validate_for_local_index(const schema& schema) const {
@@ -523,7 +578,7 @@ void create_index_statement::validate_target_column_is_map_if_index_involves_key

 void create_index_statement::validate_targets_for_multi_column_index(std::vector<::shared_ptr<index_target>> targets) const
 {
-    if (!_properties->is_custom) {
+    if (!_idx_properties->is_custom) {
        if (targets.size() > 2 || (targets.size() == 2 && std::holds_alternative<index_target::single_column>(targets.front()->value))) {
            throw exceptions::invalid_request_exception("Only CUSTOM indexes support multiple columns");
        }
@@ -537,8 +592,9 @@ void create_index_statement::validate_targets_for_multi_column_index(std::vector
    }
 }

-std::optional<create_index_statement::base_schema_with_new_index> create_index_statement::build_index_schema(data_dictionary::database db) const {
-    auto targets = validate_while_executing(db);
+std::pair<std::optional<create_index_statement::base_schema_with_new_index>, cql3::cql_warnings_vec>
+create_index_statement::build_index_schema(data_dictionary::database db, locator::token_metadata_ptr tmptr) const {
+    auto [targets, warnings] = validate_while_executing(db, tmptr);

    auto schema = db.find_schema(keyspace(), column_family());

@@ -554,8 +610,8 @@ std::optional<create_index_statement::base_schema_with_new_index> create_index_s
    }
    index_metadata_kind kind;
    index_options_map index_options;
-    if (_properties->custom_class) {
-        index_options = _properties->get_options();
+    if (_idx_properties->custom_class) {
+        index_options = _idx_properties->get_options();
        kind = index_metadata_kind::custom;
    } else {
        kind = schema->is_compound() ? index_metadata_kind::composites : index_metadata_kind::keys;
@@ -564,17 +620,17 @@ std::optional<create_index_statement::base_schema_with_new_index> create_index_s
    auto existing_index = schema->find_index_noname(index);
    if (existing_index) {
        if (_if_not_exists) {
-            return {};
+            return std::make_pair(std::nullopt, std::move(warnings));
        } else {
            throw exceptions::invalid_request_exception(
                    format("Index {} is a duplicate of existing index {}", index.name(), existing_index.value().name()));
        }
    }
-    bool existing_vector_index = _properties->custom_class && _properties->custom_class == "vector_index" && secondary_index::vector_index::has_vector_index_on_column(*schema, targets[0]->column_name());
-    bool custom_index_with_same_name = _properties->custom_class && db.existing_index_names(keyspace()).contains(_index_name);
+    bool existing_vector_index = _idx_properties->custom_class && _idx_properties->custom_class == "vector_index" && secondary_index::vector_index::has_vector_index_on_column(*schema, targets[0]->column_name());
+    bool custom_index_with_same_name = _idx_properties->custom_class && db.existing_index_names(keyspace()).contains(_index_name);
    if (existing_vector_index || custom_index_with_same_name) {
        if (_if_not_exists) {
-            return {};
+            return std::make_pair(std::nullopt, std::move(warnings));
        } else {
            throw exceptions::invalid_request_exception("There exists a duplicate custom index");
        }
@@ -590,13 +646,13 @@ std::optional<create_index_statement::base_schema_with_new_index> create_index_s
    schema_builder builder{schema};
    builder.with_index(index);

-    return base_schema_with_new_index{builder.build(), index};
+    return std::make_pair(base_schema_with_new_index{builder.build(), index}, std::move(warnings));
 }

 future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, utils::chunked_vector<mutation>, cql3::cql_warnings_vec>>
 create_index_statement::prepare_schema_mutations(query_processor& qp, const query_options&, api::timestamp_type ts) const {
    using namespace cql_transport;
-    auto res = build_index_schema(qp.db());
+    auto [res, warnings] = build_index_schema(qp.db(), qp.proxy().get_token_metadata_ptr());

    ::shared_ptr<event::schema_change> ret;
    utils::chunked_vector<mutation> muts;
@@ -626,7 +682,7 @@ create_index_statement::prepare_schema_mutations(query_processor& qp, const quer
                column_family());
    }

-    co_return std::make_tuple(std::move(ret), std::move(muts), std::vector<sstring>());
+    co_return std::make_tuple(std::move(ret), std::move(muts), std::move(warnings));
 }

 std::unique_ptr<cql3::statements::prepared_statement>
--- a/cql3/statements/create_index_statement.hh
+++ b/cql3/statements/create_index_statement.hh
@@ -10,6 +10,8 @@

 #pragma once

+#include "cql3/statements/index_prop_defs.hh"
+#include "cql3/statements/view_prop_defs.hh"
 #include "schema_altering_statement.hh"
 #include "index_target.hh"

@@ -27,20 +29,25 @@ class index_name;

 namespace statements {

-class index_prop_defs;
+class index_specific_prop_defs;

 /** A <code>CREATE INDEX</code> statement parsed from a CQL query. */
 class create_index_statement : public schema_altering_statement {
    const sstring _index_name;
    const std::vector<::shared_ptr<index_target::raw>> _raw_targets;
-    const ::shared_ptr<index_prop_defs> _properties;
+
+    // Options specific to this index.
+    const ::shared_ptr<index_specific_prop_defs> _idx_properties;
+    // Options corresponding to the underlying materialized view.
+    const view_prop_defs _view_properties;
+
    const bool _if_not_exists;
    cql_stats* _cql_stats = nullptr;

 public:
    create_index_statement(cf_name name, ::shared_ptr<index_name> index_name,
            std::vector<::shared_ptr<index_target::raw>> raw_targets,
-            ::shared_ptr<index_prop_defs> properties, bool if_not_exists);
+            ::shared_ptr<index_specific_prop_defs> idx_properties, view_prop_defs view_properties, bool if_not_exists);

    future<> check_access(query_processor& qp, const service::client_state& state) const override;
    void validate(query_processor&, const service::client_state& state) const override;
@@ -53,7 +60,7 @@ public:
        schema_ptr schema;
        index_metadata index;
    };
-    std::optional<base_schema_with_new_index> build_index_schema(data_dictionary::database db) const;
+    std::pair<std::optional<base_schema_with_new_index>, cql3::cql_warnings_vec> build_index_schema(data_dictionary::database db, locator::token_metadata_ptr tmptr) const;
    view_ptr create_view_for_index(const schema_ptr, const index_metadata& im, const data_dictionary::database&) const;
 private:
    void validate_for_local_index(const schema& schema) const;
@@ -69,7 +76,7 @@ private:
                                              const sstring& name,
                                              index_metadata_kind kind,
                                              const index_options_map& options);
-    std::vector<::shared_ptr<index_target>> validate_while_executing(data_dictionary::database db) const;
+    std::pair<std::vector<::shared_ptr<index_target>>, cql3::cql_warnings_vec> validate_while_executing(data_dictionary::database db, locator::token_metadata_ptr tmptr) const;
 };

 }
--- a/cql3/statements/create_keyspace_statement.cc
+++ b/cql3/statements/create_keyspace_statement.cc
@@ -116,21 +116,20 @@ future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, utils::chun
            warnings.push_back("Keyspace `initial` tablets option is deprecated.  Use per-table tablet options instead.");
        }

-        // If `rf_rack_valid_keyspaces` is enabled, it's forbidden to create an RF-rack-invalid keyspace.
-        // Verify that it's RF-rack-valid.
+        // If RF-rack-validity must be enforced for the keyspace according to `enforce_rf_rack_validity_for_keyspace`,
+        // it's forbidden to create an RF-rack-invalid keyspace. Verify that it's RF-rack-valid.
        // For more context, see: scylladb/scylladb#23071.
        try {
            // We hold a group0_guard, so it's correct to check this here.
            // The topology or schema cannot change while we're performing this query.
            locator::assert_rf_rack_valid_keyspace(_name, tmptr, *rs);
-        } catch (const std::exception& e) {
-            if (cfg.rf_rack_valid_keyspaces()) {
-                // There's no guarantee what the type of the exception will be, so we need to
-                // wrap it manually here in a type that can be passed to the user.
+        } catch (const std::invalid_argument& e) {
+            if (replica::database::enforce_rf_rack_validity_for_keyspace(cfg, *ksm)) {
+                // wrap the exception in a type that can be passed to the user.
                throw exceptions::invalid_request_exception(e.what());
            } else {
-                // Even when the configuration option `rf_rack_valid_keyspaces` is set to false,
-                // we'd like to inform the user that the keyspace they're creating does not
+                // Even when RF-rack-validity is not enforced for the keyspace, we'd
+                // like to inform the user that the keyspace they're creating does not
                // satisfy the restriction--but just as a warning.
                // For more context, see issue: scylladb/scylladb#23330.
                warnings.push_back(seastar::format(
--- a/cql3/statements/create_table_statement.cc
+++ b/cql3/statements/create_table_statement.cc
@@ -30,8 +30,9 @@
 #include "service/storage_proxy.hh"
 #include "db/config.hh"
 #include "compaction/time_window_compaction_strategy.hh"
-
-bool is_internal_keyspace(std::string_view name);
+#include "db/tags/extension.hh"
+#include "db/tags/utils.hh"
+#include "alternator/ttl_tag.hh"

 namespace cql3 {

@@ -43,10 +44,12 @@ create_table_statement::create_table_statement(cf_name name,
                                               ::shared_ptr<cf_prop_defs> properties,
                                               bool if_not_exists,
                                               column_set_type static_columns,
+                                               ::shared_ptr<column_identifier> ttl_column,
                                               const std::optional<table_id>& id)
    : schema_altering_statement{name}
    , _use_compact_storage(false)
    , _static_columns{static_columns}
+    , _ttl_column{ttl_column}
    , _properties{properties}
    , _if_not_exists{if_not_exists}
    , _id(id)
@@ -124,11 +127,14 @@ void create_table_statement::apply_properties_to(schema_builder& builder, const
        addColumnMetadataFromAliases(cfmd, Collections.singletonList(valueAlias), defaultValidator, ColumnDefinition.Kind.COMPACT_VALUE);
 #endif

-    if (!_properties->get_compression_options() && !is_internal_keyspace(keyspace())) {
-        builder.set_compressor_params(db.get_config().sstable_compression_user_table_options());
-    }
-
    _properties->apply_to_builder(builder, _properties->make_schema_extensions(db.extensions()), db, keyspace(), true);
+    // Remembering which column was designated as the TTL column for row-based
+    // TTL column is done using a "tag" extension. If there is no TTL column,
+    // we don't need this extension at all.
+    if (_ttl_column) {
+        std::map<sstring, sstring> tags_map = {{TTL_TAG_KEY, _ttl_column->text()}};
+        builder.add_extension(db::tags_extension::NAME, ::make_shared<db::tags_extension>(std::move(tags_map)));
+    }
 }

 void create_table_statement::add_column_metadata_from_aliases(schema_builder& builder, std::vector<bytes> aliases, const std::vector<data_type>& types, column_kind kind) const
@@ -204,7 +210,7 @@ std::unique_ptr<prepared_statement> create_table_statement::raw_statement::prepa
    }
    const bool has_default_ttl = _properties.properties()->get_default_time_to_live() > 0;

-    auto stmt = ::make_shared<create_table_statement>(*_cf_name, _properties.properties(), _if_not_exists, _static_columns, _properties.properties()->get_id());
+    auto stmt = ::make_shared<create_table_statement>(*_cf_name, _properties.properties(), _if_not_exists, _static_columns, _ttl_column, _properties.properties()->get_id());

    bool ks_uses_tablets;
    try {
@@ -409,6 +415,27 @@ std::unique_ptr<prepared_statement> create_table_statement::raw_statement::prepa
        }
    }

+    // If a TTL column is defined, it must be a regular column - not a static
+    // column or part of the primary key.
+    if (_ttl_column) {
+        if (!db.features().cql_row_ttl) {
+            throw exceptions::invalid_request_exception("The CQL per-row TTL feature is not yet supported by this cluster. Upgrade all nodes to use it.");
+        }
+        for (const auto& alias : key_aliases) {
+            if (alias->text() == _ttl_column->text()) {
+                throw exceptions::invalid_request_exception(format("TTL column {} cannot be part of the PRIMARY KEY", alias->text()));
+            }
+        }
+        for (const auto& alias : _column_aliases) {
+            if (alias->text() == _ttl_column->text()) {
+                throw exceptions::invalid_request_exception(format("TTL column {} cannot be part of the PRIMARY KEY", alias->text()));
+            }
+        }
+        if (_static_columns.contains(_ttl_column)) {
+            throw exceptions::invalid_request_exception(format("TTL column {} cannot be a static column", _ttl_column->text()));
+        }
+    }
+
    return std::make_unique<prepared_statement>(audit_info(), stmt, std::move(stmt_warnings));
 }

@@ -431,12 +458,23 @@ data_type create_table_statement::raw_statement::get_type_and_remove(column_map_
    return _properties.get_reversable_type(*t, type);
 }

-void create_table_statement::raw_statement::add_definition(::shared_ptr<column_identifier> def, ::shared_ptr<cql3_type::raw> type, bool is_static) {
+void create_table_statement::raw_statement::add_definition(::shared_ptr<column_identifier> def, ::shared_ptr<cql3_type::raw> type, bool is_static, bool is_ttl) {
    _defined_names.emplace(def);
    _definitions.emplace(def, type);
    if (is_static) {
        _static_columns.emplace(def);
    }
+    if (is_ttl) {
+        if (_ttl_column) {
+            throw exceptions::invalid_request_exception(fmt::format("Cannot have more than one TTL column in a table. Saw {} and {}", _ttl_column->text(), def->text()));
+        }
+        // FIXME: find a way to check cql3_type::raw without fmt::format
+        auto type_name = fmt::format("{}", type);
+        if (type_name != "timestamp" && type_name != "bigint" && type_name != "int") {
+            throw exceptions::invalid_request_exception(fmt::format("TTL column '{}' must be of type timestamp, bigint or int, can't be {}", def->text(), type_name));
+        }
+        _ttl_column = def;
+    }
 }

 void create_table_statement::raw_statement::add_key_aliases(const std::vector<::shared_ptr<column_identifier>> aliases) {
--- a/cql3/statements/create_table_statement.hh
+++ b/cql3/statements/create_table_statement.hh
@@ -57,6 +57,7 @@ class create_table_statement : public schema_altering_statement {
                           shared_ptr_equal_by_value<column_identifier>>;
    column_map_type _columns;
    column_set_type _static_columns;
+    ::shared_ptr<column_identifier> _ttl_column; // for row-based TTL
    const ::shared_ptr<cf_prop_defs> _properties;
    const bool _if_not_exists;
    std::optional<table_id> _id;
@@ -65,6 +66,7 @@ public:
                           ::shared_ptr<cf_prop_defs> properties,
                           bool if_not_exists,
                           column_set_type static_columns,
+                           ::shared_ptr<column_identifier> ttl_column,
                           const std::optional<table_id>& id);

    virtual future<> check_access(query_processor& qp, const service::client_state& state) const override;
@@ -100,6 +102,7 @@ private:
    std::vector<std::vector<::shared_ptr<column_identifier>>> _key_aliases;
    std::vector<::shared_ptr<column_identifier>> _column_aliases;
    create_table_statement::column_set_type _static_columns;
+    ::shared_ptr<column_identifier> _ttl_column; // for row-based TTL

    std::multiset<::shared_ptr<column_identifier>,
            indirect_less<::shared_ptr<column_identifier>, column_identifier::text_comparator>> _defined_names;
@@ -116,7 +119,7 @@ public:

    data_type get_type_and_remove(column_map_type& columns, ::shared_ptr<column_identifier> t);

-    void add_definition(::shared_ptr<column_identifier> def, ::shared_ptr<cql3_type::raw> type, bool is_static);
+    void add_definition(::shared_ptr<column_identifier> def, ::shared_ptr<cql3_type::raw> type, bool is_static, bool is_ttl);

    void add_key_aliases(const std::vector<::shared_ptr<column_identifier>> aliases);

--- a/cql3/statements/create_view_statement.cc
+++ b/cql3/statements/create_view_statement.cc
@@ -8,6 +8,7 @@
 * SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
 */

+#include "cql3/statements/view_prop_defs.hh"
 #include "exceptions/exceptions.hh"
 #include "utils/assert.hh"
 #include <unordered_set>
@@ -105,7 +106,7 @@ static bool validate_primary_key(
    return new_non_pk_column;
 }

-std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(data_dictionary::database db) const {
+std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(data_dictionary::database db, locator::token_metadata_ptr tmptr) const {
    // We need to make sure that:
    //  - materialized view name is valid
    //  - primary key includes all columns in base table's primary key
@@ -119,15 +120,7 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(
    cql3::cql_warnings_vec warnings;

    auto schema_extensions = _properties.properties()->make_schema_extensions(db.extensions());
-    _properties.validate(db, keyspace(), schema_extensions);
-
-    if (_properties.use_compact_storage()) {
-        throw exceptions::invalid_request_exception(format("Cannot use 'COMPACT STORAGE' when defining a materialized view"));
-    }
-
-    if (_properties.properties()->get_cdc_options(schema_extensions)) {
-        throw exceptions::invalid_request_exception("Cannot enable CDC for a materialized view");
-    }
+    _properties.validate_raw(view_prop_defs::op_type::create, db, keyspace(), schema_extensions);

    // View and base tables must be in the same keyspace, to ensure that RF
    // is the same (because we assign a view replica to each base replica).
@@ -153,12 +146,21 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(
    schema_ptr schema = validation::validate_column_family(db, _base_name.get_keyspace(), _base_name.get_column_family());

    try {
-        db::view::validate_view_keyspace(db, keyspace());
+        db::view::validate_view_keyspace(db, keyspace(), tmptr);
    } catch (const std::exception& e) {
        // The type of the thrown exception is not specified, so we need to wrap it here.
        throw exceptions::invalid_request_exception(e.what());
    }

+    if (db.find_keyspace(keyspace()).uses_tablets()) {
+        warnings.emplace_back(
+            "Creating a materialized view in a keyspaces that uses tablets requires "
+            "the keyspace to remain RF-rack-valid while the materialized view exists. "
+            "Some operations will be restricted to enforce this: altering the keyspace's replication "
+            "factor, adding a node in a new rack, and removing or decommissioning a node that would "
+            "eliminate a rack.");
+    }
+
    if (schema->is_counter()) {
        throw exceptions::invalid_request_exception(format("Materialized views are not supported on counter tables"));
    }
@@ -341,16 +343,7 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(
        warnings.emplace_back(std::move(warning_text));
    }

-    const auto maybe_id = _properties.properties()->get_id();
-    if (maybe_id && db.try_find_table(*maybe_id)) {
-        const auto schema_ptr = db.find_schema(*maybe_id);
-        const auto& ks_name = schema_ptr->ks_name();
-        const auto& cf_name = schema_ptr->cf_name();
-
-        throw exceptions::invalid_request_exception(seastar::format("Table with ID {} already exists: {}.{}", *maybe_id, ks_name, cf_name));
-    }
-
-    schema_builder builder{keyspace(), column_family(), maybe_id};
+    schema_builder builder{keyspace(), column_family()};
    auto add_columns = [this, &builder] (std::vector<const column_definition*>& defs, column_kind kind) mutable {
        for (auto* def : defs) {
            auto&& type = _properties.get_reversable_type(*def->column_specification->name, def->type);
@@ -396,14 +389,8 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(
        }
    }

-    _properties.properties()->apply_to_builder(builder, std::move(schema_extensions), db, keyspace(), !is_colocated);
-
-    if (builder.default_time_to_live().count() > 0) {
-        throw exceptions::invalid_request_exception(
-                "Cannot set or alter default_time_to_live for a materialized view. "
-                "Data in a materialized view always expire at the same time than "
-                "the corresponding data in the parent table.");
-    }
+    _properties.apply_to_builder(view_prop_defs::op_type::create, builder, std::move(schema_extensions),
+            db, keyspace(), is_colocated);

    auto where_clause_text = util::relations_to_where_clause(_where_clause);
    builder.with_view_info(schema, included.empty(), std::move(where_clause_text));
@@ -414,7 +401,7 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(
 future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, utils::chunked_vector<mutation>, cql3::cql_warnings_vec>>
 create_view_statement::prepare_schema_mutations(query_processor& qp, const query_options&, api::timestamp_type ts) const {
    utils::chunked_vector<mutation> m;
-    auto [definition, warnings] = prepare_view(qp.db());
+    auto [definition, warnings] = prepare_view(qp.db(), qp.proxy().get_token_metadata_ptr());
    try {
        m = co_await service::prepare_new_view_announcement(qp.proxy(), std::move(definition), ts);
    } catch (const exceptions::already_exists_exception& e) {
--- a/cql3/statements/create_view_statement.hh
+++ b/cql3/statements/create_view_statement.hh
@@ -7,9 +7,9 @@
 #pragma once

 #include "cql3/statements/schema_altering_statement.hh"
-#include "cql3/statements/cf_properties.hh"
 #include "cql3/cf_name.hh"
 #include "cql3/expr/expression.hh"
+#include "cql3/statements/view_prop_defs.hh"

 #include <seastar/core/shared_ptr.hh>

@@ -35,7 +35,7 @@ private:
    expr::expression _where_clause;
    std::vector<::shared_ptr<cql3::column_identifier::raw>> _partition_keys;
    std::vector<::shared_ptr<cql3::column_identifier::raw>> _clustering_keys;
-    cf_properties _properties;
+    view_prop_defs _properties;
    bool _if_not_exists;

 public:
@@ -48,7 +48,7 @@ public:
            std::vector<::shared_ptr<cql3::column_identifier::raw>> clustering_keys,
            bool if_not_exists);

-    std::pair<view_ptr, cql3::cql_warnings_vec> prepare_view(data_dictionary::database db) const;
+    std::pair<view_ptr, cql3::cql_warnings_vec> prepare_view(data_dictionary::database db, locator::token_metadata_ptr tmptr) const;

    auto& properties() {
        return _properties;
--- a/Show More
+++ b/Show More