Revert "Update seastar submodule"

This reverts commit 2943d30b0c. It introduces a regression where --unsafe-bypass-fsync is not honored. Fixes https://scylladb.atlassian.net/browse/SCYLLADB-1496
Revert "alternator: optional stripping of http response headers"
2026-04-19 16:15:07 +00:00 · 2026-04-19 15:14:48 +03:00 · 2026-04-19 15:14:48 +03:00 · 2026-04-19 09:22:04 +03:00 · 2026-04-19 09:15:13 +03:00 · 2026-04-19 03:54:33 +02:00
379 changed files with 19008 additions and 5297 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -92,6 +92,10 @@ test/boost/querier_cache_test.cc @denesb
 # PYTEST-BASED CQL TESTS
 test/cqlpy/* @nyh

+# TEST FRAMEWORK
+test/pylib/* @xtrey
+test.py @xtrey
+
 # RAFT
 raft/* @kbr-scylla @gleb-cloudius @kostja
 test/raft/* @kbr-scylla @gleb-cloudius @kostja
--- a/.github/workflows/add-label-when-promoted.yaml
+++ b/.github/workflows/add-label-when-promoted.yaml
@@ -10,6 +10,9 @@ on:
    types: [labeled, unlabeled]
    branches: [master, next, enterprise]

+env:
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
+
 jobs:
  check-commit:
    runs-on: ubuntu-latest
@@ -30,7 +33,7 @@ jobs:
            echo "DEFAULT_BRANCH=master" >> $GITHUB_ENV
          fi
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          repository: ${{ github.repository }}
          ref: ${{ env.DEFAULT_BRANCH }}
--- a/.github/workflows/backport-pr-fixes-validation.yaml
+++ b/.github/workflows/backport-pr-fixes-validation.yaml
@@ -5,6 +5,9 @@ on:
    types: [opened, reopened, edited]
    branches: [branch-*]

+env:
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
+
 jobs:
  check-fixes-prefix:
    runs-on: ubuntu-latest
@@ -13,7 +16,7 @@ jobs:
      issues: write
    steps:
      - name: Check PR body for "Fixes" prefix patterns
-        uses: actions/github-script@v7
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          script: |
            const body = context.payload.pull_request.body;
--- a/.github/workflows/build-scylla.yaml
+++ b/.github/workflows/build-scylla.yaml
@@ -12,6 +12,9 @@ on:
        description: 'the md5sum for scylla executable'
        value: ${{ jobs.build.outputs.md5sum }}

+env:
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
+
 jobs:
  read-toolchain:
    uses: ./.github/workflows/read-toolchain.yaml
@@ -24,7 +27,7 @@ jobs:
    outputs:
      md5sum: ${{ steps.checksum.outputs.md5sum }}
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          submodules: recursive
      - name: Generate the building system
--- a/.github/workflows/check-license-header.yaml
+++ b/.github/workflows/check-license-header.yaml
@@ -9,6 +9,7 @@ env:
  HEADER_CHECK_LINES: 10
  LICENSE: "LicenseRef-ScyllaDB-Source-Available-1.1"
  CHECKED_EXTENSIONS: ".cc .hh .py"
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true

 jobs:
  check-license-headers:
@@ -19,7 +20,7 @@ jobs:

    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          fetch-depth: 0

@@ -40,7 +41,7 @@ jobs:

      - name: Comment on PR if check fails
        if: failure()
-        uses: actions/github-script@v7
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          script: |
            const license = '${{ env.LICENSE }}';
--- a/.github/workflows/clang-nightly.yaml
+++ b/.github/workflows/clang-nightly.yaml
@@ -9,6 +9,7 @@ env:
  # use the development branch explicitly
  CLANG_VERSION: 21
  BUILD_DIR: build
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true

 permissions: {}

@@ -32,7 +33,7 @@ jobs:
    steps:
      - run: |
          sudo dnf -y install git
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          submodules: true
      - name: Install build dependencies
--- a/.github/workflows/clang-tidy.yaml
+++ b/.github/workflows/clang-tidy.yaml
@@ -18,6 +18,7 @@ env:
  BUILD_TYPE: RelWithDebInfo
  BUILD_DIR: build
  CLANG_TIDY_CHECKS: '-*,bugprone-use-after-move'
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true

 permissions: {}

@@ -42,7 +43,7 @@ jobs:
          IMAGE: ${{ needs.read-toolchain.image }}
        run: |
          echo ${{ needs.read-toolchain.image }}
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          submodules: true
      - run: |
--- a/.github/workflows/close_issue_for_scylla_associate.yml
+++ b/.github/workflows/close_issue_for_scylla_associate.yml
@@ -7,13 +7,16 @@ on:
 permissions:
  issues: write

+env:
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
+
 jobs:
  comment-and-close:
    runs-on: ubuntu-latest

    steps:
      - name: Comment and close if author email is scylladb.com
-        uses: actions/github-script@v7
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          script: |
--- a/.github/workflows/codespell.yaml
+++ b/.github/workflows/codespell.yaml
@@ -4,13 +4,15 @@ on:
    branches:
      - master
 permissions: {}
+env:
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
 jobs:
  codespell:
    name: Check for spelling errors
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
-      - uses: codespell-project/actions-codespell@master
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: codespell-project/actions-codespell@8f01853be192eb0f849a5c7d721450e7a467c579 # v2.2
        with:
          only_warn: 1
          ignore_words_list: "ans,datas,fo,ser,ue,crate,nd,reenable,strat,stap,te,raison,iif,tread"
--- a/.github/workflows/compare-build-systems.yaml
+++ b/.github/workflows/compare-build-systems.yaml
@@ -0,0 +1,38 @@
+name: Compare Build Systems
+
+on:
+  pull_request:
+    branches:
+      - master
+    paths:
+      - 'configure.py'
+      - '**/CMakeLists.txt'
+      - 'cmake/**'
+      - 'scripts/compare_build_systems.py'
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+# cancel the in-progress run upon a repush
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  read-toolchain:
+    uses: ./.github/workflows/read-toolchain.yaml
+  compare:
+    name: Compare configure.py vs CMake
+    needs:
+      - read-toolchain
+    runs-on: ubuntu-latest
+    container: ${{ needs.read-toolchain.outputs.image }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: true
+      - name: Compare build systems
+        run: |
+          git config --global --add safe.directory $GITHUB_WORKSPACE
+          python3 scripts/compare_build_systems.py --ci
--- a/.github/workflows/conflict_reminder.yaml
+++ b/.github/workflows/conflict_reminder.yaml
@@ -12,13 +12,16 @@ on:
  schedule:
    - cron: '0 10 * * 1'  # Runs every Monday at 10:00am

+env:
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
+
 jobs:
  notify_conflict_prs:
    runs-on: ubuntu-latest

    steps:
      - name: Notify PR Authors of Conflicts
-        uses: actions/github-script@v7
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          script: |
            console.log("Starting conflict reminder script...");
--- a/.github/workflows/differential-shellcheck.yaml
+++ b/.github/workflows/differential-shellcheck.yaml
@@ -13,6 +13,9 @@ on:
 permissions:
  contents: read

+env:
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
+
 jobs:
  lint:
    runs-on: ubuntu-latest
@@ -21,12 +24,12 @@ jobs:
      security-events: write

    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          fetch-depth: 0

      - name: Differential ShellCheck
-        uses: redhat-plumbers-in-action/differential-shellcheck@v5
+        uses: redhat-plumbers-in-action/differential-shellcheck@d965e66ec0b3b2f821f75c8eff9b12442d9a7d1e # v5.5.6
        with:
          severity: warning
          token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/docs-pages.yaml
+++ b/.github/workflows/docs-pages.yaml
@@ -5,6 +5,7 @@ name: "Docs / Publish"
 env:
  FLAG: ${{ github.repository == 'scylladb/scylla-enterprise' && 'enterprise' || 'opensource' }}
  DEFAULT_BRANCH: ${{ github.repository == 'scylladb/scylla-enterprise' && 'enterprise' || 'master' }}
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true

 on:
  push:
@@ -25,17 +26,17 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          ref: ${{ env.DEFAULT_BRANCH }}
          persist-credentials: false
          fetch-depth: 0
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: "3.12"
      - name: Install uv
-        uses: astral-sh/setup-uv@v6
+        uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
      - name: Set up env
        run: make -C docs FLAG="${{ env.FLAG }}" setupenv
      - name: Build docs
--- a/.github/workflows/docs-pr.yaml
+++ b/.github/workflows/docs-pr.yaml
@@ -7,6 +7,7 @@ permissions:

 env:
  FLAG: ${{ github.repository == 'scylladb/scylla-enterprise' && 'enterprise' || 'opensource' }}
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true

 on:
  pull_request:
@@ -22,16 +23,16 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          persist-credentials: false
          fetch-depth: 0
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: "3.12"
      - name: Install uv
-        uses: astral-sh/setup-uv@v6
+        uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
      - name: Set up env
        run: make -C docs FLAG="${{ env.FLAG }}" setupenv
      - name: Build docs
--- a/.github/workflows/docs-validate-metrics.yml
+++ b/.github/workflows/docs-validate-metrics.yml
@@ -3,6 +3,9 @@ name: Docs / Validate metrics
 permissions:
  contents: read

+env:
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
+
 on:
  pull_request:
    branches:
@@ -21,12 +24,12 @@ jobs:

    steps:
    - name: Checkout code
-      uses: actions/checkout@v4
+      uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      with:
        submodules: true

    - name: Set up Python
-      uses: actions/setup-python@v6
+      uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
      with:
        python-version: '3.10'

--- a/.github/workflows/iwyu.yaml
+++ b/.github/workflows/iwyu.yaml
@@ -13,6 +13,7 @@ env:
  # supposed to be processed by idl-compiler.py, so we don't check them using the cleaner
  CLEANER_DIRS: test/unit exceptions alternator api auth cdc compaction db dht gms index lang message mutation mutation_writer node_ops raft redis replica service
  SEASTAR_BAD_INCLUDE_OUTPUT_PATH: build/seastar-bad-include.log
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true

 permissions:
  contents: read
@@ -32,7 +33,7 @@ jobs:
    runs-on: ubuntu-latest
    container: ${{ needs.read-toolchain.outputs.image }}
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          submodules: true
      - name: Generate compilation database
@@ -89,7 +90,7 @@ jobs:
            | tee "$SEASTAR_BAD_INCLUDE_OUTPUT_PATH"
      - run: |
          echo "::remove-matcher owner=seastar-bad-include::"
-      - uses: actions/upload-artifact@v4
+      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
        with:
          name: Logs
          path: |
--- a/.github/workflows/make-pr-ready-for-review.yaml
+++ b/.github/workflows/make-pr-ready-for-review.yaml
@@ -7,6 +7,7 @@ on:

 env:
  DEFAULT_BRANCH: 'master'
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true

 jobs:
  mark-ready:
@@ -17,7 +18,7 @@ jobs:

    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          repository: ${{ github.repository }}
          ref: ${{ env.DEFAULT_BRANCH }}
--- a/.github/workflows/pr-require-backport-label.yaml
+++ b/.github/workflows/pr-require-backport-label.yaml
@@ -5,6 +5,8 @@ on:
    branches:
      - master
      - next
+env:
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
 jobs:
  label:
    if: github.event.pull_request.draft == false
@@ -15,7 +17,7 @@ jobs:
    steps:
      - name: Wait for label to be added
        run: sleep 1m
-      - uses: mheap/github-action-required-labels@v5
+      - uses: mheap/github-action-required-labels@0ac283b4e65c1fb28ce6079dea5546ceca98ccbe # v5.5.2
        with:
          mode: minimum
          count: 1
--- a/.github/workflows/read-toolchain.yaml
+++ b/.github/workflows/read-toolchain.yaml
@@ -7,6 +7,9 @@ on:
        description: "the toolchain docker image"
        value: ${{ jobs.read-toolchain.outputs.image }}

+env:
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
+
 jobs:
  read-toolchain:
    runs-on: ubuntu-latest
@@ -15,7 +18,7 @@ jobs:
    outputs:
      image: ${{ steps.read.outputs.image }}
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          sparse-checkout: tools/toolchain/image
          sparse-checkout-cone-mode: false
--- a/.github/workflows/seastar.yaml
+++ b/.github/workflows/seastar.yaml
@@ -13,6 +13,7 @@ concurrency:

 env:
  BUILD_DIR: build
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true

 jobs:
  read-toolchain:
@@ -29,12 +30,12 @@ jobs:
          - RelWithDebInfo
          - Dev
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          submodules: true
      - run: |
          rm -rf seastar
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          repository: scylladb/seastar
          submodules: true
--- a/.github/workflows/sync-labels.yaml
+++ b/.github/workflows/sync-labels.yaml
@@ -7,6 +7,9 @@ on:
  issues:
    types: [labeled, unlabeled]

+env:
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
+
 jobs:
  label-sync:
    if: ${{ github.repository == 'scylladb/scylladb' }}
@@ -21,7 +24,7 @@ jobs:
          GITHUB_CONTEXT: ${{ toJson(github) }}
        run: echo "$GITHUB_CONTEXT"
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          sparse-checkout: |
            .github/scripts/sync_labels.py
--- a/.github/workflows/trigger_ci.yaml
+++ b/.github/workflows/trigger_ci.yaml
@@ -5,7 +5,10 @@ on:
    types: [opened, reopened, synchronize]
  issue_comment:
    types: [created]
-    
+
+env:
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
+
 jobs:
  trigger-ci:
    runs-on: ubuntu-latest
@@ -15,7 +18,7 @@ jobs:
          GITHUB_CONTEXT: ${{ toJson(github) }}
        run: echo "$GITHUB_CONTEXT"
      - name: Checkout PR code
-        uses: actions/checkout@v3
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          fetch-depth: 0  # Needed to access full history
          ref: ${{ github.event.pull_request.head.ref }}
--- a/.github/workflows/urgent_issue_reminder.yml
+++ b/.github/workflows/urgent_issue_reminder.yml
@@ -4,13 +4,16 @@ on:
  schedule:
    - cron: '10 8 * * *' # Runs daily at 8 AM

+env:
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
+
 jobs:
  reminder:
    runs-on: ubuntu-latest

    steps:
    - name: Send reminders
-      uses: actions/github-script@v7
+      uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
      with:
        script: |
          const labelFilters = ['P0', 'P1', 'Field-Tier1','status/release blocker', 'status/regression']; 
--- a/alternator/CMakeLists.txt
+++ b/alternator/CMakeLists.txt
@@ -9,6 +9,8 @@ target_sources(alternator
    controller.cc
    server.cc
    executor.cc
+    executor_read.cc
+    executor_util.cc
    stats.cc
    serialization.cc
    expressions.cc
--- a/alternator/attribute_path.hh
+++ b/alternator/attribute_path.hh
@@ -0,0 +1,253 @@
+/*
+ * Copyright 2019-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
+ */
+
+#pragma once
+
+#include <map>
+#include <memory>
+#include <optional>
+#include <string>
+#include <unordered_map>
+#include <variant>
+
+#include "utils/rjson.hh"
+#include "utils/overloaded_functor.hh"
+#include "alternator/error.hh"
+#include "alternator/expressions_types.hh"
+
+namespace alternator {
+
+// An attribute_path_map object is used to hold data for various attributes
+// paths (parsed::path) in a hierarchy of attribute paths. Each attribute path
+// has a root attribute, and then modified by member and index operators -
+// for example in "a.b[2].c" we have "a" as the root, then ".b" member, then
+// "[2]" index, and finally ".c" member.
+// Data can be added to an attribute_path_map using the add() function, but
+// requires that attributes with data not be *overlapping* or *conflicting*:
+//
+// 1. Two attribute paths which are identical or an ancestor of one another
+//    are considered *overlapping* and not allowed. If a.b.c has data,
+//    we can't add more data in a.b.c or any of its descendants like a.b.c.d.
+//
+// 2. Two attribute paths which need the same parent to have both a member and
+//    an index are considered *conflicting* and not allowed. E.g., if a.b has
+//    data, you can't add a[1]. The meaning of adding both would be that the
+//    attribute a is both a map and an array, which isn't sensible.
+//
+// These two requirements are common to the two places where Alternator uses
+// this abstraction to describe how a hierarchical item is to be transformed:
+//
+// 1. In ProjectExpression: for filtering from a full top-level attribute
+//    only the parts for which user asked in ProjectionExpression.
+//
+// 2. In UpdateExpression: for taking the previous value of a top-level
+//    attribute, and modifying it based on the instructions in the user
+//    wrote in UpdateExpression.
+
+template<typename T>
+class attribute_path_map_node {
+public:
+    using data_t = T;
+    // We need the extra unique_ptr<> here because libstdc++ unordered_map
+    // doesn't work with incomplete types :-(
+    using members_t =  std::unordered_map<std::string, std::unique_ptr<attribute_path_map_node<T>>>;
+    // The indexes list is sorted because DynamoDB requires handling writes
+    // beyond the end of a list in index order.
+    using indexes_t = std::map<unsigned, std::unique_ptr<attribute_path_map_node<T>>>;
+    // The prohibition on "overlap" and "conflict" explained above means
+    // That only one of data, members or indexes is non-empty.
+    std::optional<std::variant<data_t, members_t, indexes_t>> _content;
+
+    bool is_empty() const { return !_content; }
+    bool has_value() const { return _content && std::holds_alternative<data_t>(*_content); }
+    bool has_members() const { return _content && std::holds_alternative<members_t>(*_content); }
+    bool has_indexes() const { return _content && std::holds_alternative<indexes_t>(*_content); }
+    // get_members() assumes that has_members() is true
+    members_t& get_members() { return std::get<members_t>(*_content); }
+    const members_t& get_members() const { return std::get<members_t>(*_content); }
+    indexes_t& get_indexes() { return std::get<indexes_t>(*_content); }
+    const indexes_t& get_indexes() const { return std::get<indexes_t>(*_content); }
+    T& get_value() { return std::get<T>(*_content); }
+    const T& get_value() const { return std::get<T>(*_content); }
+};
+
+template<typename T>
+using attribute_path_map = std::unordered_map<std::string, attribute_path_map_node<T>>;
+
+using attrs_to_get_node = attribute_path_map_node<std::monostate>;
+// attrs_to_get lists which top-level attribute are needed, and possibly also
+// which part of the top-level attribute is really needed (when nested
+// attribute paths appeared in the query).
+// Most code actually uses optional<attrs_to_get>. There, a disengaged
+// optional means we should get all attributes, not specific ones.
+using attrs_to_get = attribute_path_map<std::monostate>;
+
+// takes a given JSON value and drops its parts which weren't asked to be
+// kept. It modifies the given JSON value, or returns false to signify that
+// the entire object should be dropped.
+// Note that The JSON value is assumed to be encoded using the DynamoDB
+// conventions - i.e., it is really a map whose key has a type string,
+// and the value is the real object.
+template<typename T>
+bool hierarchy_filter(rjson::value& val, const attribute_path_map_node<T>& h) {
+    if (!val.IsObject() || val.MemberCount() != 1) {
+        // This shouldn't happen. We shouldn't have stored malformed objects.
+        // But today Alternator does not validate the structure of nested
+        // documents before storing them, so this can happen on read.
+        throw api_error::internal(format("Malformed value object read: {}", val));
+    }
+    const char* type = val.MemberBegin()->name.GetString();
+    rjson::value& v = val.MemberBegin()->value;
+    if (h.has_members()) {
+        const auto& members = h.get_members();
+        if (type[0] != 'M' || !v.IsObject()) {
+            // If v is not an object (dictionary, map), none of the members
+            // can match.
+            return false;
+        }
+        rjson::value newv = rjson::empty_object();
+        for (auto it = v.MemberBegin(); it != v.MemberEnd(); ++it) {
+            std::string attr = rjson::to_string(it->name);
+            auto x = members.find(attr);
+            if (x != members.end()) {
+                if (x->second) {
+                    // Only a part of this attribute is to be filtered, do it.
+                    if (hierarchy_filter(it->value, *x->second)) {
+                        // because newv started empty and attr are unique
+                        // (keys of v), we can use add() here
+                        rjson::add_with_string_name(newv, attr, std::move(it->value));
+                    }
+                } else {
+                    // The entire attribute is to be kept
+                    rjson::add_with_string_name(newv, attr, std::move(it->value));
+                }
+            }
+        }
+        if (newv.MemberCount() == 0) {
+            return false;
+        }
+        v = newv;
+    } else if (h.has_indexes()) {
+        const auto& indexes = h.get_indexes();
+        if (type[0] != 'L' || !v.IsArray()) {
+            return false;
+        }
+        rjson::value newv = rjson::empty_array();
+        const auto& a = v.GetArray();
+        for (unsigned i = 0; i < v.Size(); i++) {
+            auto x = indexes.find(i);
+            if (x != indexes.end()) {
+                if (x->second) {
+                    if (hierarchy_filter(a[i], *x->second)) {
+                        rjson::push_back(newv, std::move(a[i]));
+                    }
+                } else {
+                    // The entire attribute is to be kept
+                    rjson::push_back(newv, std::move(a[i]));
+                }
+            }
+        }
+        if (newv.Size() == 0) {
+            return false;
+        }
+        v = newv;
+    }
+    return true;
+}
+
+// Add a path to an attribute_path_map. Throws a validation error if the path
+// "overlaps" with one already in the filter (one is a sub-path of the other)
+// or "conflicts" with it (both a member and index is requested).
+template<typename T>
+void attribute_path_map_add(const char* source, attribute_path_map<T>& map, const parsed::path& p, T value = {}) {
+   using node = attribute_path_map_node<T>;
+    // The first step is to look for the top-level attribute (p.root()):
+    auto it = map.find(p.root());
+    if (it == map.end()) {
+        if (p.has_operators()) {
+            it = map.emplace(p.root(), node {std::nullopt}).first;
+        } else {
+            (void) map.emplace(p.root(), node {std::move(value)}).first;
+            // Value inserted for top-level node. We're done.
+            return;
+        }
+    } else if(!p.has_operators()) {
+        // If p is top-level and we already have it or a part of it
+        // in map, it's a forbidden overlapping path.
+        throw api_error::validation(fmt::format(
+            "Invalid {}: two document paths overlap at {}", source, p.root()));
+    } else if (it->second.has_value()) {
+        // If we're here, it != map.end() && p.has_operators && it->second.has_value().
+        // This means the top-level attribute already has a value, and we're
+        // trying to add a non-top-level value. It's an overlap.
+        throw api_error::validation(fmt::format("Invalid {}: two document paths overlap at {}", source, p.root()));
+    }
+    node* h = &it->second;
+    // The second step is to walk h from the top-level node to the inner node
+    // where we're supposed to insert the value:
+    for (const auto& op : p.operators()) {
+        std::visit(overloaded_functor {
+            [&] (const std::string& member) {
+                if (h->is_empty()) {
+                    *h = node {typename node::members_t()};
+                } else if (h->has_indexes()) {
+                    throw api_error::validation(format("Invalid {}: two document paths conflict at {}", source, p));
+                } else if (h->has_value()) {
+                    throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
+                }
+                typename node::members_t& members = h->get_members();
+                auto it = members.find(member);
+                if (it == members.end()) {
+                    it = members.insert({member, std::make_unique<node>()}).first;
+                }
+                h = it->second.get();
+            },
+            [&] (unsigned index) {
+                if (h->is_empty()) {
+                    *h = node {typename node::indexes_t()};
+                } else if (h->has_members()) {
+                    throw api_error::validation(format("Invalid {}: two document paths conflict at {}", source, p));
+                } else if (h->has_value()) {
+                    throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
+                }
+                typename node::indexes_t& indexes = h->get_indexes();
+                auto it = indexes.find(index);
+                if (it == indexes.end()) {
+                    it = indexes.insert({index, std::make_unique<node>()}).first;
+                }
+                h = it->second.get();
+            }
+        }, op);
+    }
+    // Finally, insert the value in the node h.
+    if (h->is_empty()) {
+        *h = node {std::move(value)};
+    } else {
+        throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
+    }
+}
+
+// A very simplified version of the above function for the special case of
+// adding only top-level attribute. It's not only simpler, we also use a
+// different error message, referring to a "duplicate attribute" instead of
+// "overlapping paths". DynamoDB also has this distinction (errors in
+// AttributesToGet refer to duplicates, not overlaps, but errors in
+// ProjectionExpression refer to overlap - even if it's an exact duplicate).
+template<typename T>
+void attribute_path_map_add(const char* source, attribute_path_map<T>& map, const std::string& attr, T value = {}) {
+   using node = attribute_path_map_node<T>;
+    auto it = map.find(attr);
+    if (it == map.end()) {
+        map.emplace(attr, node {std::move(value)});
+    } else {
+        throw api_error::validation(fmt::format(
+            "Invalid {}: Duplicate attribute: {}", source, attr));
+    }
+}
+
+} // namespace alternator
--- a/alternator/controller.cc
+++ b/alternator/controller.cc
@@ -18,6 +18,7 @@
 #include "service/memory_limiter.hh"
 #include "auth/service.hh"
 #include "service/qos/service_level_controller.hh"
+#include "vector_search/vector_store_client.hh"

 using namespace seastar;

@@ -31,10 +32,12 @@ controller::controller(
        sharded<service::storage_service>& ss,
        sharded<service::migration_manager>& mm,
        sharded<db::system_distributed_keyspace>& sys_dist_ks,
+        sharded<db::system_keyspace>& sys_ks,
        sharded<cdc::generation_service>& cdc_gen_svc,
        sharded<service::memory_limiter>& memory_limiter,
        sharded<auth::service>& auth_service,
        sharded<qos::service_level_controller>& sl_controller,
+        sharded<vector_search::vector_store_client>& vsc,
        const db::config& config,
        seastar::scheduling_group sg)
    : protocol_server(sg)
@@ -43,10 +46,12 @@ controller::controller(
    , _ss(ss)
    , _mm(mm)
    , _sys_dist_ks(sys_dist_ks)
+    , _sys_ks(sys_ks)
    , _cdc_gen_svc(cdc_gen_svc)
    , _memory_limiter(memory_limiter)
    , _auth_service(auth_service)
    , _sl_controller(sl_controller)
+    , _vsc(vsc)
    , _config(config)
 {
 }
@@ -91,8 +96,8 @@ future<> controller::start_server() {
        auto get_timeout_in_ms = [] (const db::config& cfg) -> utils::updateable_value<uint32_t> {
            return cfg.alternator_timeout_in_ms;
        };
-        _executor.start(std::ref(_gossiper), std::ref(_proxy), std::ref(_ss), std::ref(_mm), std::ref(_sys_dist_ks),
-                        sharded_parameter(get_cdc_metadata, std::ref(_cdc_gen_svc)), _ssg.value(),
+        _executor.start(std::ref(_gossiper), std::ref(_proxy), std::ref(_ss), std::ref(_mm), std::ref(_sys_dist_ks), std::ref(_sys_ks),
+                        sharded_parameter(get_cdc_metadata, std::ref(_cdc_gen_svc)), std::ref(_vsc), _ssg.value(),
                        sharded_parameter(get_timeout_in_ms, std::ref(_config))).get();
        _server.start(std::ref(_executor), std::ref(_proxy), std::ref(_gossiper), std::ref(_auth_service), std::ref(_sl_controller)).get();
        // Note: from this point on, if start_server() throws for any reason,
--- a/alternator/controller.hh
+++ b/alternator/controller.hh
@@ -22,6 +22,7 @@ class memory_limiter;

 namespace db {
 class system_distributed_keyspace;
+class system_keyspace;
 class config;
 }

@@ -43,6 +44,10 @@ namespace qos {
 class service_level_controller;
 }

+namespace vector_search {
+class vector_store_client;
+}
+
 namespace alternator {

 // This is the official DynamoDB API version.
@@ -61,10 +66,12 @@ class controller : public protocol_server {
    sharded<service::storage_service>& _ss;
    sharded<service::migration_manager>& _mm;
    sharded<db::system_distributed_keyspace>& _sys_dist_ks;
+    sharded<db::system_keyspace>& _sys_ks;
    sharded<cdc::generation_service>& _cdc_gen_svc;
    sharded<service::memory_limiter>& _memory_limiter;
    sharded<auth::service>& _auth_service;
    sharded<qos::service_level_controller>& _sl_controller;
+    sharded<vector_search::vector_store_client>& _vsc;
    const db::config& _config;

    std::vector<socket_address> _listen_addresses;
@@ -79,10 +86,12 @@ public:
        sharded<service::storage_service>& ss,
        sharded<service::migration_manager>& mm,
        sharded<db::system_distributed_keyspace>& sys_dist_ks,
+        sharded<db::system_keyspace>& sys_ks,
        sharded<cdc::generation_service>& cdc_gen_svc,
        sharded<service::memory_limiter>& memory_limiter,
        sharded<auth::service>& auth_service,
        sharded<qos::service_level_controller>& sl_controller,
+        sharded<vector_search::vector_store_client>& vsc,
        const db::config& config,
        seastar::scheduling_group sg);

--- a/alternator/executor.cc
+++ b/alternator/executor.cc
--- a/alternator/executor.hh
+++ b/alternator/executor.hh
@@ -9,7 +9,9 @@
 #pragma once

 #include <seastar/core/future.hh>
+#include "audit/audit.hh"
 #include "seastarx.hh"
+#include <seastar/core/future.hh>
 #include <seastar/core/sharded.hh>
 #include <seastar/util/noncopyable_function.hh>

@@ -20,15 +22,23 @@
 #include "db/config.hh"

 #include "alternator/error.hh"
-#include "stats.hh"
+#include "alternator/attribute_path.hh"
+#include "alternator/stats.hh"
+#include "alternator/executor_util.hh"
+
 #include "utils/rjson.hh"
 #include "utils/updateable_value.hh"
-#include "utils/simple_value_with_expiry.hh"

 #include "tracing/trace_state.hh"

+
 namespace db {
    class system_distributed_keyspace;
+    class system_keyspace;
+}
+
+namespace audit {
+class audit_info_alternator;
 }

 namespace query {
@@ -46,6 +56,10 @@ namespace service {
    class storage_service;
 }

+namespace vector_search {
+    class vector_store_client;
+}
+
 namespace cdc {
    class metadata;
 }
@@ -58,82 +72,13 @@ class gossiper;

 class schema_builder;

+
 namespace alternator {

 enum class table_status;
 class rmw_operation;
 class put_or_delete_item;

-schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& request);
-bool is_alternator_keyspace(const sstring& ks_name);
-// Wraps the db::get_tags_of_table and throws if the table is missing the tags extension.
-const std::map<sstring, sstring>& get_tags_of_table_or_throw(schema_ptr schema);
-
-// An attribute_path_map object is used to hold data for various attributes
-// paths (parsed::path) in a hierarchy of attribute paths. Each attribute path
-// has a root attribute, and then modified by member and index operators -
-// for example in "a.b[2].c" we have "a" as the root, then ".b" member, then
-// "[2]" index, and finally ".c" member.
-// Data can be added to an attribute_path_map using the add() function, but
-// requires that attributes with data not be *overlapping* or *conflicting*:
-//
-// 1. Two attribute paths which are identical or an ancestor of one another
-//    are considered *overlapping* and not allowed. If a.b.c has data,
-//    we can't add more data in a.b.c or any of its descendants like a.b.c.d.
-//
-// 2. Two attribute paths which need the same parent to have both a member and
-//    an index are considered *conflicting* and not allowed. E.g., if a.b has
-//    data, you can't add a[1]. The meaning of adding both would be that the
-//    attribute a is both a map and an array, which isn't sensible.
-//
-// These two requirements are common to the two places where Alternator uses
-// this abstraction to describe how a hierarchical item is to be transformed:
-//
-// 1. In ProjectExpression: for filtering from a full top-level attribute
-//    only the parts for which user asked in ProjectionExpression.
-//
-// 2. In UpdateExpression: for taking the previous value of a top-level
-//    attribute, and modifying it based on the instructions in the user
-//    wrote in UpdateExpression.
-
-template<typename T>
-class attribute_path_map_node {
-public:
-    using data_t = T;
-    // We need the extra unique_ptr<> here because libstdc++ unordered_map
-    // doesn't work with incomplete types :-(
-    using members_t =  std::unordered_map<std::string, std::unique_ptr<attribute_path_map_node<T>>>;
-    // The indexes list is sorted because DynamoDB requires handling writes
-    // beyond the end of a list in index order.
-    using indexes_t = std::map<unsigned, std::unique_ptr<attribute_path_map_node<T>>>;
-    // The prohibition on "overlap" and "conflict" explained above means
-    // That only one of data, members or indexes is non-empty.
-    std::optional<std::variant<data_t, members_t, indexes_t>> _content;
-
-    bool is_empty() const { return !_content; }
-    bool has_value() const { return _content && std::holds_alternative<data_t>(*_content); }
-    bool has_members() const { return _content && std::holds_alternative<members_t>(*_content); }
-    bool has_indexes() const { return _content && std::holds_alternative<indexes_t>(*_content); }
-    // get_members() assumes that has_members() is true
-    members_t& get_members() { return std::get<members_t>(*_content); }
-    const members_t& get_members() const { return std::get<members_t>(*_content); }
-    indexes_t& get_indexes() { return std::get<indexes_t>(*_content); }
-    const indexes_t& get_indexes() const { return std::get<indexes_t>(*_content); }
-    T& get_value() { return std::get<T>(*_content); }
-    const T& get_value() const { return std::get<T>(*_content); }
-};
-
-template<typename T>
-using attribute_path_map = std::unordered_map<std::string, attribute_path_map_node<T>>;
-
-using attrs_to_get_node = attribute_path_map_node<std::monostate>;
-// attrs_to_get lists which top-level attribute are needed, and possibly also
-// which part of the top-level attribute is really needed (when nested
-// attribute paths appeared in the query).
-// Most code actually uses optional<attrs_to_get>. There, a disengaged
-// optional means we should get all attributes, not specific ones.
-using attrs_to_get = attribute_path_map<std::monostate>;
-
 namespace parsed {
 class expression_cache;
 }
@@ -144,9 +89,12 @@ class executor : public peering_sharded_service<executor> {
    service::storage_proxy& _proxy;
    service::migration_manager& _mm;
    db::system_distributed_keyspace& _sdks;
+    db::system_keyspace& _system_keyspace;
    cdc::metadata& _cdc_metadata;
+    vector_search::vector_store_client& _vsc;
    utils::updateable_value<bool> _enforce_authorization;
    utils::updateable_value<bool> _warn_authorization;
+    seastar::sharded<audit::audit>& _audit;
    // An smp_service_group to be used for limiting the concurrency when
    // forwarding Alternator request between shards - if necessary for LWT.
    smp_service_group _ssg;
@@ -171,7 +119,6 @@ public:
    // is written in chunks to the output_stream. This allows for efficient
    // handling of large responses without needing to allocate a large buffer
    // in memory.
-    using body_writer = noncopyable_function<future<>(output_stream<char>&&)>;
    using request_return_type = std::variant<std::string, body_writer, api_error>;
    stats _stats;
    // The metric_groups object holds this stat object's metrics registered
@@ -186,53 +133,60 @@ public:
             service::storage_service& ss,
             service::migration_manager& mm,
             db::system_distributed_keyspace& sdks,
+             db::system_keyspace& system_keyspace,
             cdc::metadata& cdc_metadata,
+             vector_search::vector_store_client& vsc,
             smp_service_group ssg,
             utils::updateable_value<uint32_t> default_timeout_in_ms);
    ~executor();

-    future<request_return_type> create_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> describe_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> delete_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> update_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> put_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> get_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> delete_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> update_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> list_tables(client_state& client_state, service_permit permit, rjson::value request);
-    future<request_return_type> scan(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> describe_endpoints(client_state& client_state, service_permit permit, rjson::value request, std::string host_header);
-    future<request_return_type> batch_write_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> batch_get_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> query(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
-    future<request_return_type> tag_resource(client_state& client_state, service_permit permit, rjson::value request);
-    future<request_return_type> untag_resource(client_state& client_state, service_permit permit, rjson::value request);
-    future<request_return_type> list_tags_of_resource(client_state& client_state, service_permit permit, rjson::value request);
-    future<request_return_type> update_time_to_live(client_state& client_state, service_permit permit, rjson::value request);
-    future<request_return_type> describe_time_to_live(client_state& client_state, service_permit permit, rjson::value request);
-    future<request_return_type> list_streams(client_state& client_state, service_permit permit, rjson::value request);
-    future<request_return_type> describe_stream(client_state& client_state, service_permit permit, rjson::value request);
-    future<request_return_type> get_shard_iterator(client_state& client_state, service_permit permit, rjson::value request);
-    future<request_return_type> get_records(client_state& client_state, tracing::trace_state_ptr, service_permit permit, rjson::value request);
-    future<request_return_type> describe_continuous_backups(client_state& client_state, service_permit permit, rjson::value request);
+    future<request_return_type> create_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> describe_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> delete_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> update_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> put_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> get_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> delete_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> update_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> list_tables(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> scan(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> describe_endpoints(client_state& client_state, service_permit permit, rjson::value request, std::string host_header, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> batch_write_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> batch_get_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> query(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> tag_resource(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> untag_resource(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> list_tags_of_resource(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> update_time_to_live(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> describe_time_to_live(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> list_streams(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> describe_stream(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> get_shard_iterator(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> get_records(client_state& client_state, tracing::trace_state_ptr, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
+    future<request_return_type> describe_continuous_backups(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);

    future<> start();
    future<> stop();

-    static sstring table_name(const schema&);
    static db::timeout_clock::time_point default_timeout();
 private:
    static thread_local utils::updateable_value<uint32_t> s_default_timeout_in_ms;
-public:
-    static schema_ptr find_table(service::storage_proxy&, std::string_view table_name);
-    static schema_ptr find_table(service::storage_proxy&, const rjson::value& request);
-
-private:
    friend class rmw_operation;

-    static void describe_key_schema(rjson::value& parent, const schema&, std::unordered_map<std::string,std::string> * = nullptr, const std::map<sstring, sstring> *tags = nullptr);
+    // Helper to set up auditing for an Alternator operation. Checks whether
+    // the operation should be audited (via will_log()) and if so, allocates
+    // and populates audit_info. No allocation occurs when auditing is disabled.
+    void maybe_audit(std::unique_ptr<audit::audit_info_alternator>& audit_info,
+                     audit::statement_category category,
+                     std::string_view ks_name,
+                     std::string_view table_name,
+                     std::string_view operation_name,
+                     const rjson::value& request,
+                     std::optional<db::consistency_level> cl = std::nullopt);
+
    future<rjson::value> fill_table_description(schema_ptr schema, table_status tbl_status, service::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit);
-    future<executor::request_return_type> create_table_on_shard0(service::client_state&& client_state, tracing::trace_state_ptr trace_state, rjson::value request, bool enforce_authorization, bool warn_authorization, const db::tablets_mode_t::mode tablets_mode);
+    future<executor::request_return_type> create_table_on_shard0(service::client_state&& client_state, tracing::trace_state_ptr trace_state, rjson::value request, bool enforce_authorization,
+            bool warn_authorization, const db::tablets_mode_t::mode tablets_mode, std::unique_ptr<audit::audit_info_alternator>& audit_info);

    future<> do_batch_write(
        std::vector<std::pair<schema_ptr, put_or_delete_item>> mutation_builders,
@@ -245,60 +199,34 @@ private:
        tracing::trace_state_ptr trace_state, service_permit permit);

 public:
-    static void describe_key_schema(rjson::value& parent, const schema& schema, std::unordered_map<std::string,std::string>&, const std::map<sstring, sstring> *tags = nullptr);
-
-    static std::optional<rjson::value> describe_single_item(schema_ptr,
-        const query::partition_slice&,
-        const cql3::selection::selection&,
-        const query::result&,
-        const std::optional<attrs_to_get>&,
-        uint64_t* = nullptr);
-
-    // Converts a multi-row selection result to JSON compatible with DynamoDB.
-    // For each row, this method calls item_callback, which takes the size of
-    // the item as the parameter.
-    static future<std::vector<rjson::value>> describe_multi_item(schema_ptr schema,
-        const query::partition_slice&& slice,
-        shared_ptr<cql3::selection::selection> selection,
-        foreign_ptr<lw_shared_ptr<query::result>> query_result,
-        shared_ptr<const std::optional<attrs_to_get>> attrs_to_get,
-        noncopyable_function<void(uint64_t)> item_callback = {});
-
-    static void describe_single_item(const cql3::selection::selection&,
-        const std::vector<managed_bytes_opt>&,
-        const std::optional<attrs_to_get>&,
-        rjson::value&,
-        uint64_t* item_length_in_bytes = nullptr,
-        bool = false);
-
    static bool add_stream_options(const rjson::value& stream_spec, schema_builder&, service::storage_proxy& sp);
    static void supplement_table_info(rjson::value& descr, const schema& schema, service::storage_proxy& sp);
    static void supplement_table_stream_info(rjson::value& descr, const schema& schema, const service::storage_proxy& sp);
 };

-// is_big() checks approximately if the given JSON value is "bigger" than
-// the given big_size number of bytes. The goal is to *quickly* detect
-// oversized JSON that, for example, is too large to be serialized to a
-// contiguous string - we don't need an accurate size for that. Moreover,
-// as soon as we detect that the JSON is indeed "big", we can return true
-// and don't need to continue calculating its exact size.
-// For simplicity, we use a recursive implementation. This is fine because
-// Alternator limits the depth of JSONs it reads from inputs, and doesn't
-// add more than a couple of levels in its own output construction.
-bool is_big(const rjson::value& val, int big_size = 100'000);
+// returns table creation time in seconds since epoch for `db_clock`
+double get_table_creation_time(const schema &schema);

-// Check CQL's Role-Based Access Control (RBAC) permission (MODIFY,
-// SELECT, DROP, etc.) on the given table. When permission is denied an
-// appropriate user-readable api_error::access_denied is thrown.
-future<> verify_permission(bool enforce_authorization, bool warn_authorization, const service::client_state&, const schema_ptr&, auth::permission, alternator::stats& stats);
-
-/**
- * Make return type for serializing the object "streamed",
- * i.e. direct to HTTP output stream. Note: only useful for
- * (very) large objects as there are overhead issues with this
- * as well, but for massive lists of return objects this can
- * help avoid large allocations/many re-allocs
- */
-executor::body_writer make_streamed(rjson::value&&);
+// result of parsing ARN (Amazon Resource Name)
+// ARN format is `arn:<partition>:<service>:<region>:<account-id>:<resource-type>/<resource-id>/<postfix>`
+// we ignore partition, service and account-id
+// resource-type must be string "table"
+// resource-id will be returned as table_name
+// region will be returned as keyspace_name
+// postfix is a string after resource-id and will be returned as is (whole), including separator.
+struct arn_parts {
+    std::string_view keyspace_name;
+    std::string_view table_name;
+    std::string_view postfix;
+};
+// arn - arn to parse
+// arn_field_name - identifier of the ARN, used only when reporting an error (in error messages), for example "Incorrect resource identifier `<arn_field_name>`"
+// type_name - used only when reporting an error (in error messages), for example "... is not a valid <type_name> ARN ..."
+// expected_postfix - optional filter of postfix value (part of ARN after resource-id, including separator, see comments for struct arn_parts).
+//    If is empty - then postfix value must be empty as well
+//    if not empty - postfix value must start with expected_postfix, but might be longer
+arn_parts parse_arn(std::string_view arn, std::string_view arn_field_name, std::string_view type_name, std::string_view expected_postfix);

+// The format is ks1|ks2|ks3... and table1|table2|table3...
+sstring print_names_for_audit(const std::set<sstring>& names);
 }
--- a/alternator/executor_read.cc
+++ b/alternator/executor_read.cc
--- a/alternator/executor_util.cc
+++ b/alternator/executor_util.cc
@@ -0,0 +1,559 @@
+/*
+ * Copyright 2019-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
+ */
+
+#include "alternator/executor_util.hh"
+#include "alternator/executor.hh"
+#include "alternator/error.hh"
+#include "auth/resource.hh"
+#include "auth/service.hh"
+#include "cdc/log.hh"
+#include "data_dictionary/data_dictionary.hh"
+#include "db/tags/utils.hh"
+#include "replica/database.hh"
+#include "cql3/selection/selection.hh"
+#include "cql3/result_set.hh"
+#include "serialization.hh"
+#include "service/storage_proxy.hh"
+#include "types/map.hh"
+#include <fmt/format.h>
+
+namespace alternator {
+
+extern logging::logger elogger; // from executor.cc
+
+std::optional<int> get_int_attribute(const rjson::value& value, std::string_view attribute_name) {
+    const rjson::value* attribute_value = rjson::find(value, attribute_name);
+    if (!attribute_value)
+        return {};
+    if (!attribute_value->IsInt()) {
+        throw api_error::validation(fmt::format("Expected integer value for attribute {}, got: {}",
+                attribute_name, value));
+    }
+    return attribute_value->GetInt();
+}
+
+std::string get_string_attribute(const rjson::value& value, std::string_view attribute_name, const char* default_return) {
+    const rjson::value* attribute_value = rjson::find(value, attribute_name);
+    if (!attribute_value)
+        return default_return;
+    if (!attribute_value->IsString()) {
+        throw api_error::validation(fmt::format("Expected string value for attribute {}, got: {}",
+                attribute_name, value));
+    }
+    return rjson::to_string(*attribute_value);
+}
+
+bool get_bool_attribute(const rjson::value& value, std::string_view attribute_name, bool default_return) {
+    const rjson::value* attribute_value = rjson::find(value, attribute_name);
+    if (!attribute_value) {
+        return default_return;
+    }
+    if (!attribute_value->IsBool()) {
+        throw api_error::validation(fmt::format("Expected boolean value for attribute {}, got: {}",
+                attribute_name, value));
+    }
+    return attribute_value->GetBool();
+}
+
+std::optional<std::string> find_table_name(const rjson::value& request) {
+    const rjson::value* table_name_value = rjson::find(request, "TableName");
+    if (!table_name_value) {
+        return std::nullopt;
+    }
+    if (!table_name_value->IsString()) {
+        throw api_error::validation("Non-string TableName field in request");
+    }
+    std::string table_name = rjson::to_string(*table_name_value);
+    return table_name;
+}
+
+std::string get_table_name(const rjson::value& request) {
+    auto name = find_table_name(request);
+    if (!name) {
+        throw api_error::validation("Missing TableName field in request");
+    }
+    return *name;
+}
+
+schema_ptr find_table(service::storage_proxy& proxy, const rjson::value& request) {
+    auto table_name = find_table_name(request);
+    if (!table_name) {
+        return nullptr;
+    }
+    return find_table(proxy, *table_name);
+}
+
+schema_ptr find_table(service::storage_proxy& proxy, std::string_view table_name) {
+    try {
+        return proxy.data_dictionary().find_schema(sstring(executor::KEYSPACE_NAME_PREFIX) + sstring(table_name), table_name);
+    } catch(data_dictionary::no_such_column_family&) {
+        // DynamoDB returns validation error even when table does not exist
+        // and the table name is invalid.
+        validate_table_name(table_name);
+
+        throw api_error::resource_not_found(
+                fmt::format("Requested resource not found: Table: {} not found", table_name));
+    }
+}
+
+schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& request) {
+    auto schema = find_table(proxy, request);
+    if (!schema) {
+        // if we get here then the name was missing, since syntax or missing actual CF
+        // checks throw. Slow path, but just call get_table_name to generate exception.
+        get_table_name(request);
+    }
+    return schema;
+}
+
+map_type attrs_type() {
+    static thread_local auto t = map_type_impl::get_instance(utf8_type, bytes_type, true);
+    return t;
+}
+
+const std::map<sstring, sstring>& get_tags_of_table_or_throw(schema_ptr schema) {
+    auto tags_ptr = db::get_tags_of_table(schema);
+    if (tags_ptr) {
+        return *tags_ptr;
+    } else {
+        throw api_error::validation(format("Table {} does not have valid tagging information", schema->ks_name()));
+    }
+}
+
+bool is_alternator_keyspace(std::string_view ks_name) {
+    return ks_name.starts_with(executor::KEYSPACE_NAME_PREFIX);
+}
+
+// This tag is set on a GSI when the user did not specify a range key, causing
+// Alternator to add the base table's range key as a spurious range key. It is
+// used by describe_key_schema() to suppress reporting that key.
+extern const sstring SPURIOUS_RANGE_KEY_ADDED_TO_GSI_AND_USER_DIDNT_SPECIFY_RANGE_KEY_TAG_KEY;
+
+void describe_key_schema(rjson::value& parent, const schema& schema, std::unordered_map<std::string, std::string>* attribute_types, const std::map<sstring, sstring>* tags) {
+    rjson::value key_schema = rjson::empty_array();
+    const bool ignore_range_keys_as_spurious = tags != nullptr && tags->contains(SPURIOUS_RANGE_KEY_ADDED_TO_GSI_AND_USER_DIDNT_SPECIFY_RANGE_KEY_TAG_KEY);
+
+    for (const column_definition& cdef : schema.partition_key_columns()) {
+        rjson::value key = rjson::empty_object();
+        rjson::add(key, "AttributeName", rjson::from_string(cdef.name_as_text()));
+        rjson::add(key, "KeyType", "HASH");
+        rjson::push_back(key_schema, std::move(key));
+        if (attribute_types) {
+            (*attribute_types)[cdef.name_as_text()] = type_to_string(cdef.type);
+        }
+    }
+    if (!ignore_range_keys_as_spurious) {
+        // NOTE: user requested key (there can be at most one) will always come first.
+        // There might be more keys following it, which were added, but those were
+        // not requested by the user, so we ignore them.
+        for (const column_definition& cdef : schema.clustering_key_columns()) {
+            rjson::value key = rjson::empty_object();
+            rjson::add(key, "AttributeName", rjson::from_string(cdef.name_as_text()));
+            rjson::add(key, "KeyType", "RANGE");
+            rjson::push_back(key_schema, std::move(key));
+            if (attribute_types) {
+                (*attribute_types)[cdef.name_as_text()] = type_to_string(cdef.type);
+            }
+            break;
+        }
+    }
+    rjson::add(parent, "KeySchema", std::move(key_schema));
+}
+
+// Check if the given string has valid characters for a table name, i.e. only
+// a-z, A-Z, 0-9, _ (underscore), - (dash), . (dot). Note that this function
+// does not check the length of the name - instead, use validate_table_name()
+// to validate both the characters and the length.
+static bool valid_table_name_chars(std::string_view name) {
+    for (auto c : name) {
+        if ((c < 'a' || c > 'z') &&
+            (c < 'A' || c > 'Z') &&
+            (c < '0' || c > '9') &&
+            c != '_' &&
+            c != '-' &&
+            c != '.') {
+            return false;
+        }
+    }
+    return true;
+}
+
+std::string view_name(std::string_view table_name, std::string_view index_name, const std::string& delim, bool validate_len) {
+    if (index_name.length() < 3) {
+        throw api_error::validation("IndexName must be at least 3 characters long");
+    }
+    if (!valid_table_name_chars(index_name)) {
+        throw api_error::validation(
+                fmt::format("IndexName '{}' must satisfy regular expression pattern: [a-zA-Z0-9_.-]+", index_name));
+    }
+    std::string ret = std::string(table_name) + delim + std::string(index_name);
+    if (ret.length() > max_auxiliary_table_name_length && validate_len) {
+        throw api_error::validation(
+                fmt::format("The total length of TableName ('{}') and IndexName ('{}') cannot exceed {} characters",
+                        table_name, index_name, max_auxiliary_table_name_length - delim.size()));
+    }
+    return ret;
+}
+
+std::string gsi_name(std::string_view table_name, std::string_view index_name, bool validate_len) {
+    return view_name(table_name, index_name, ":", validate_len);
+}
+
+std::string lsi_name(std::string_view table_name, std::string_view index_name, bool validate_len) {
+    return view_name(table_name, index_name, "!:", validate_len);
+}
+
+void check_key(const rjson::value& key, const schema_ptr& schema) {
+    if (key.MemberCount() != (schema->clustering_key_size() == 0 ? 1 : 2)) {
+        throw api_error::validation("Given key attribute not in schema");
+    }
+}
+
+void verify_all_are_used(const rjson::value* field,
+        const std::unordered_set<std::string>& used, const char* field_name, const char* operation) {
+    if (!field) {
+        return;
+    }
+    for (auto it = field->MemberBegin(); it != field->MemberEnd(); ++it) {
+        if (!used.contains(rjson::to_string(it->name))) {
+            throw api_error::validation(
+                format("{} has spurious '{}', not used in {}",
+                    field_name, rjson::to_string_view(it->name), operation));
+        }
+    }
+}
+
+// This function increments the authorization_failures counter, and may also
+// log a warn-level message and/or throw an access_denied exception, depending
+// on what enforce_authorization and warn_authorization are set to.
+// Note that if enforce_authorization is false, this function will return
+// without throwing. So a caller that doesn't want to continue after an
+// authorization_error must explicitly return after calling this function.
+static void authorization_error(stats& stats, bool enforce_authorization, bool warn_authorization, std::string msg) {
+    stats.authorization_failures++;
+    if (enforce_authorization) {
+        if (warn_authorization) {
+            elogger.warn("alternator_warn_authorization=true: {}", msg);
+        }
+        throw api_error::access_denied(std::move(msg));
+    } else {
+        if (warn_authorization) {
+            elogger.warn("If you set alternator_enforce_authorization=true the following will be enforced: {}", msg);
+        }
+    }
+}
+
+future<> verify_permission(
+    bool enforce_authorization,
+    bool warn_authorization,
+    const service::client_state& client_state,
+    const schema_ptr& schema,
+    auth::permission permission_to_check,
+    stats& stats) {
+    if (!enforce_authorization && !warn_authorization) {
+        co_return;
+    }
+    // Unfortunately, the fix for issue #23218 did not modify the function
+    // that we use here - check_has_permissions(). So if we want to allow
+    // writes to internal tables (from try_get_internal_table()) only to a
+    // superuser, we need to explicitly check it here.
+    if (permission_to_check == auth::permission::MODIFY && is_internal_keyspace(schema->ks_name())) {
+        if (!client_state.user() ||
+            !client_state.user()->name ||
+            !co_await client_state.get_auth_service()->underlying_role_manager().is_superuser(*client_state.user()->name)) {
+                sstring username = "<anonymous>";
+                if (client_state.user() && client_state.user()->name) {
+                    username = client_state.user()->name.value();
+                }
+                authorization_error(stats, enforce_authorization, warn_authorization, fmt::format(
+                    "Write access denied on internal table {}.{} to role {} because it is not a superuser",
+                    schema->ks_name(), schema->cf_name(), username));
+                co_return;
+        }
+    }
+    auto resource = auth::make_data_resource(schema->ks_name(), schema->cf_name());
+    if (!client_state.user() || !client_state.user()->name ||
+        !co_await client_state.check_has_permission(auth::command_desc(permission_to_check, resource))) {
+        sstring username = "<anonymous>";
+        if (client_state.user() && client_state.user()->name) {
+            username = client_state.user()->name.value();
+        }
+        // Using exceptions for errors makes this function faster in the
+        // success path (when the operation is allowed).
+        authorization_error(stats, enforce_authorization, warn_authorization, fmt::format(
+            "{} access on table {}.{} is denied to role {}, client address {}",
+            auth::permissions::to_string(permission_to_check),
+            schema->ks_name(), schema->cf_name(), username, client_state.get_client_address()));
+    }
+}
+
+// Similar to verify_permission() above, but just for CREATE operations.
+// Those do not operate on any specific table, so require permissions on
+// ALL KEYSPACES instead of any specific table.
+future<> verify_create_permission(bool enforce_authorization, bool warn_authorization, const service::client_state& client_state, stats& stats) {
+    if (!enforce_authorization && !warn_authorization) {
+        co_return;
+    }
+    auto resource = auth::resource(auth::resource_kind::data);
+    if (!co_await client_state.check_has_permission(auth::command_desc(auth::permission::CREATE, resource))) {
+        sstring username = "<anonymous>";
+        if (client_state.user() && client_state.user()->name) {
+            username = client_state.user()->name.value();
+        }
+        authorization_error(stats, enforce_authorization, warn_authorization, fmt::format(
+            "CREATE access on ALL KEYSPACES is denied to role {}", username));
+    }
+}
+
+schema_ptr try_get_internal_table(const data_dictionary::database& db, std::string_view table_name) {
+    size_t it = table_name.find(executor::INTERNAL_TABLE_PREFIX);
+    if (it != 0) {
+        return schema_ptr{};
+    }
+    table_name.remove_prefix(executor::INTERNAL_TABLE_PREFIX.size());
+    size_t delim = table_name.find_first_of('.');
+    if (delim == std::string_view::npos) {
+        return schema_ptr{};
+    }
+    std::string_view ks_name = table_name.substr(0, delim);
+    table_name.remove_prefix(ks_name.size() + 1);
+    // Only internal keyspaces can be accessed to avoid leakage
+    auto ks = db.try_find_keyspace(ks_name);
+    if (!ks || !ks->is_internal()) {
+        return schema_ptr{};
+    }
+    try {
+        return db.find_schema(ks_name, table_name);
+    } catch (data_dictionary::no_such_column_family&) {
+        // DynamoDB returns validation error even when table does not exist
+        // and the table name is invalid.
+        validate_table_name(table_name);
+        throw api_error::resource_not_found(
+            fmt::format("Requested resource not found: Internal table: {}.{} not found", ks_name, table_name));
+    }
+}
+
+schema_ptr get_table_from_batch_request(const service::storage_proxy& proxy, const rjson::value::ConstMemberIterator& batch_request) {
+    sstring table_name = rjson::to_sstring(batch_request->name); // JSON keys are always strings
+    try {
+        return proxy.data_dictionary().find_schema(sstring(executor::KEYSPACE_NAME_PREFIX) + table_name, table_name);
+    } catch(data_dictionary::no_such_column_family&) {
+        // DynamoDB returns validation error even when table does not exist
+        // and the table name is invalid.
+        validate_table_name(table_name);
+        throw api_error::resource_not_found(format("Requested resource not found: Table: {} not found", table_name));
+    }
+}
+
+lw_shared_ptr<stats> get_stats_from_schema(service::storage_proxy& sp, const schema& schema) {
+    try {
+        replica::table& table = sp.local_db().find_column_family(schema.id());
+        if (!table.get_stats().alternator_stats) {
+            table.get_stats().alternator_stats = seastar::make_shared<table_stats>(schema.ks_name(), schema.cf_name());
+        }
+        return table.get_stats().alternator_stats->_stats;
+    } catch (std::runtime_error&) {
+        // If we're here it means that a table we are currently working on was deleted before the
+        // operation completed, returning a temporary object is fine, if the table get deleted so will its metrics
+        return make_lw_shared<stats>();
+    }
+}
+
+void describe_single_item(const cql3::selection::selection& selection,
+    const std::vector<managed_bytes_opt>& result_row,
+    const std::optional<attrs_to_get>& attrs_to_get,
+    rjson::value& item,
+    uint64_t* item_length_in_bytes,
+    bool include_all_embedded_attributes)
+{
+    const auto& columns = selection.get_columns();
+    auto column_it = columns.begin();
+    for (const managed_bytes_opt& cell : result_row) {
+        if (!cell) {
+            ++column_it;
+            continue;
+        }
+        std::string column_name = (*column_it)->name_as_text();
+        if (column_name != executor::ATTRS_COLUMN_NAME) {
+            if (item_length_in_bytes) {
+                (*item_length_in_bytes) += column_name.length() + cell->size();
+            }
+            if (!attrs_to_get || attrs_to_get->contains(column_name)) {
+                // item is expected to start empty, and column_name are unique
+                // so add() makes sense
+                rjson::add_with_string_name(item, column_name, rjson::empty_object());
+                rjson::value& field = item[column_name.c_str()];
+                cell->with_linearized([&] (bytes_view linearized_cell) {
+                    rjson::add_with_string_name(field, type_to_string((*column_it)->type), json_key_column_value(linearized_cell, **column_it));
+                });
+            }
+        } else {
+            auto deserialized = attrs_type()->deserialize(*cell);
+            auto keys_and_values = value_cast<map_type_impl::native_type>(deserialized);
+            for (auto entry : keys_and_values) {
+                std::string attr_name = value_cast<sstring>(entry.first);
+                if (item_length_in_bytes) {
+                    (*item_length_in_bytes) += attr_name.length();
+                }
+                if (include_all_embedded_attributes || !attrs_to_get || attrs_to_get->contains(attr_name)) {
+                    bytes value = value_cast<bytes>(entry.second);
+                    if (item_length_in_bytes && value.length()) {
+                        // ScyllaDB uses one extra byte compared to DynamoDB for the bytes length
+                        (*item_length_in_bytes) += value.length() - 1;
+                    }
+                    rjson::value v = deserialize_item(value);
+                    if (attrs_to_get) {
+                        auto it = attrs_to_get->find(attr_name);
+                        if (it != attrs_to_get->end()) {
+                            // attrs_to_get may have asked for only part of
+                            // this attribute. hierarchy_filter() modifies v,
+                            // and returns false when nothing is to be kept.
+                            if (!hierarchy_filter(v, it->second)) {
+                                continue;
+                            }
+                        }
+                    }
+                    // item is expected to start empty, and attribute
+                    // names are unique so add() makes sense
+                    rjson::add_with_string_name(item, attr_name, std::move(v));
+                } else if (item_length_in_bytes) {
+                    (*item_length_in_bytes) += value_cast<bytes>(entry.second).length() - 1;
+                }
+            }
+        }
+        ++column_it;
+    }
+}
+
+std::optional<rjson::value> describe_single_item(schema_ptr schema,
+        const query::partition_slice& slice,
+        const cql3::selection::selection& selection,
+        const query::result& query_result,
+        const std::optional<attrs_to_get>& attrs_to_get,
+        uint64_t* item_length_in_bytes) {
+    rjson::value item = rjson::empty_object();
+
+    cql3::selection::result_set_builder builder(selection, gc_clock::now());
+    query::result_view::consume(query_result, slice, cql3::selection::result_set_builder::visitor(builder, *schema, selection));
+
+    auto result_set = builder.build();
+    if (result_set->empty()) {
+        if (item_length_in_bytes) {
+            // empty results is counted as having a minimal length (e.g. 1 byte).
+            (*item_length_in_bytes) += 1;
+        }
+        // If there is no matching item, we're supposed to return an empty
+        // object without an Item member - not one with an empty Item member
+        return {};
+    }
+    if (result_set->size() > 1) {
+        // If the result set contains multiple rows, the code should have
+        // called describe_multi_item(), not this function.
+        throw std::logic_error("describe_single_item() asked to describe multiple items");
+    }
+    describe_single_item(selection, *result_set->rows().begin(), attrs_to_get, item, item_length_in_bytes);
+    return item;
+}
+
+static void check_big_array(const rjson::value& val, int& size_left);
+static void check_big_object(const rjson::value& val, int& size_left);
+
+// For simplicity, we use a recursive implementation. This is fine because
+// Alternator limits the depth of JSONs it reads from inputs, and doesn't
+// add more than a couple of levels in its own output construction.
+bool is_big(const rjson::value& val, int big_size) {
+    if (val.IsString()) {
+        return ssize_t(val.GetStringLength()) > big_size;
+    } else if (val.IsObject()) {
+        check_big_object(val, big_size);
+        return big_size < 0;
+    } else if (val.IsArray()) {
+        check_big_array(val, big_size);
+        return big_size < 0;
+    }
+    return false;
+}
+
+static void check_big_array(const rjson::value& val, int& size_left) {
+    // Assume a fixed size of 10 bytes for each number, boolean, etc., or
+    // beginning of a sub-object. This doesn't have to be accurate.
+    size_left -= 10 * val.Size();
+    for (const auto& v : val.GetArray()) {
+        if (size_left < 0) {
+            return;
+        }
+        // Note that we avoid recursive calls for the leaves (anything except
+        // array or object) because usually those greatly outnumber the trunk.
+        if (v.IsString()) {
+            size_left -= v.GetStringLength();
+        } else if (v.IsObject()) {
+            check_big_object(v, size_left);
+        } else if (v.IsArray()) {
+            check_big_array(v, size_left);
+        }
+    }
+}
+
+static void check_big_object(const rjson::value& val, int& size_left) {
+    size_left -= 10 * val.MemberCount();
+    for (const auto& m : val.GetObject()) {
+        if (size_left < 0) {
+            return;
+        }
+        size_left -= m.name.GetStringLength();
+        if (m.value.IsString()) {
+            size_left -= m.value.GetStringLength();
+        } else if (m.value.IsObject()) {
+            check_big_object(m.value, size_left);
+        } else if (m.value.IsArray()) {
+            check_big_array(m.value, size_left);
+        }
+    }
+}
+
+void validate_table_name(std::string_view name, const char* source) {
+    if (name.length() < 3 || name.length() > max_table_name_length) {
+        throw api_error::validation(
+                format("{} must be at least 3 characters long and at most {} characters long", source, max_table_name_length));
+    }
+    if (!valid_table_name_chars(name)) {
+        throw api_error::validation(
+                format("{} must satisfy regular expression pattern: [a-zA-Z0-9_.-]+", source));
+    }
+}
+
+void validate_cdc_log_name_length(std::string_view table_name) {
+    if (cdc::log_name(table_name).length() > max_auxiliary_table_name_length) {
+        // CDC will add cdc_log_suffix ("_scylla_cdc_log") to the table name
+        // to create its log table, and this will exceed the maximum allowed
+        // length. To provide a more helpful error message, we assume that
+        // cdc::log_name() always adds a suffix of the same length.
+        int suffix_len = cdc::log_name(table_name).length() - table_name.length();
+        throw api_error::validation(fmt::format("Streams or vector search cannot be enabled on a table whose name is longer than {} characters: {}",
+            max_auxiliary_table_name_length - suffix_len, table_name));
+    }
+}
+
+body_writer make_streamed(rjson::value&& value) {
+    return [value = std::move(value)](output_stream<char>&& _out) mutable -> future<> {
+        auto out = std::move(_out);
+        std::exception_ptr ex;
+        try {
+            co_await rjson::print(value, out);
+        } catch (...) {
+            ex = std::current_exception();
+        }
+        co_await out.close();
+        co_await rjson::destroy_gently(std::move(value));
+        if (ex) {
+            co_await coroutine::return_exception_ptr(std::move(ex));
+        }
+    };
+}
+
+} // namespace alternator
--- a/alternator/executor_util.hh
+++ b/alternator/executor_util.hh
@@ -0,0 +1,247 @@
+/*
+ * Copyright 2019-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
+ */
+
+// This header file, and the implementation file executor_util.cc, contain
+// various utility functions that are reused in many different operations
+// (API requests) across Alternator's code - in files such as executor.cc,
+// executor_read.cc, streams.cc, ttl.cc, and more. These utility functions
+// include things like extracting and validating pieces from a JSON request,
+// checking permissions, constructing auxiliary table names, and more.
+
+#pragma once
+
+#include <map>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <unordered_set>
+
+#include <seastar/core/future.hh>
+#include <seastar/util/noncopyable_function.hh>
+
+#include "utils/rjson.hh"
+#include "schema/schema_fwd.hh"
+#include "types/types.hh"
+#include "auth/permission.hh"
+#include "alternator/stats.hh"
+#include "alternator/attribute_path.hh"
+#include "utils/managed_bytes.hh"
+
+namespace query { class partition_slice; class result; }
+namespace cql3::selection { class selection; }
+namespace data_dictionary { class database; }
+namespace service { class storage_proxy; class client_state; }
+
+namespace alternator {
+
+/// The body_writer is used for streaming responses - where the response body
+/// is written in chunks to the output_stream. This allows for efficient
+/// handling of large responses without needing to allocate a large buffer in
+/// memory. It is one of the variants of executor::request_return_type.
+using body_writer = noncopyable_function<future<>(output_stream<char>&&)>;
+
+/// Get the value of an integer attribute, or an empty optional if it is
+/// missing. If the attribute exists, but is not an integer, a descriptive
+/// api_error is thrown.
+std::optional<int> get_int_attribute(const rjson::value& value, std::string_view attribute_name);
+
+/// Get the value of a string attribute, or a default value if it is missing.
+/// If the attribute exists, but is not a string, a descriptive api_error is
+/// thrown.
+std::string get_string_attribute(const rjson::value& value, std::string_view attribute_name, const char* default_return);
+
+/// Get the value of a boolean attribute, or a default value if it is missing.
+/// If the attribute exists, but is not a bool, a descriptive api_error is
+/// thrown.
+bool get_bool_attribute(const rjson::value& value, std::string_view attribute_name, bool default_return);
+
+/// Extract table name from a request.
+/// Most requests expect the table's name to be listed in a "TableName" field.
+/// get_table_name() returns the name or api_error in case the table name is
+/// missing or not a string.
+std::string get_table_name(const rjson::value& request);
+
+/// find_table_name() is like get_table_name() except that it returns an
+/// optional table name - it returns an empty optional when the TableName
+/// is missing from the request, instead of throwing as get_table_name()
+/// does. However, find_table_name() still throws if a TableName exists but
+/// is not a string.
+std::optional<std::string> find_table_name(const rjson::value& request);
+
+/// Extract table schema from a request.
+/// Many requests expect the table's name to be listed in a "TableName" field
+/// and need to look it up as an existing table. The get_table() function
+/// does this, with the appropriate validation and api_error in case the table
+/// name is missing, invalid or the table doesn't exist. If everything is
+/// successful, it returns the table's schema.
+schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& request);
+
+/// This find_table() variant is like get_table() excepts that it returns a
+/// nullptr instead of throwing if the request does not mention a TableName.
+/// In other cases of errors (i.e., a table is mentioned but doesn't exist)
+/// this function throws too.
+schema_ptr find_table(service::storage_proxy& proxy, const rjson::value& request);
+
+/// This find_table() variant is like the previous one except that it takes
+/// the table name directly instead of a request object. It is used in cases
+/// where we already have the table name extracted from the request.
+schema_ptr find_table(service::storage_proxy& proxy, std::string_view table_name);
+
+// We would have liked to support table names up to 255 bytes, like DynamoDB.
+// But Scylla creates a directory whose name is the table's name plus 33
+// bytes (dash and UUID), and since directory names are limited to 255 bytes,
+// we need to limit table names to 222 bytes, instead of 255. See issue #4480.
+// We actually have two limits here,
+// * max_table_name_length is the limit that Alternator will impose on names
+//   of new Alternator tables.
+// * max_auxiliary_table_name_length is the potentially higher absolute limit
+//   that Scylla imposes on the names of auxiliary tables that Alternator
+//   wants to create internally - i.e. materialized views or CDC log tables.
+// The second limit might mean that it is not possible to add a GSI to an
+// existing table, because the name of the new auxiliary table may go over
+// the limit. The second limit is also one of the reasons why the first limit
+// is set lower than 222 - to have room to enable streams which add the extra
+// suffix "_scylla_cdc_log" to the table name.
+inline constexpr int max_table_name_length = 192;
+inline constexpr int max_auxiliary_table_name_length = 222;
+
+/// validate_table_name() validates the TableName parameter in a request - it
+/// should be called in CreateTable, and in other requests only when noticing
+/// that the named table doesn't exist. 
+/// The DynamoDB developer guide, https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.NamingRulesDataTypes.html#HowItWorks.NamingRules
+/// specifies that table "names must be between 3 and 255 characters long and
+/// can contain only the following characters: a-z, A-Z, 0-9, _ (underscore),
+/// - (dash), . (dot)". However, Alternator only allows max_table_name_length
+/// characters (see above) - not 255.
+/// validate_table_name() throws the appropriate api_error if this validation
+/// fails.
+void validate_table_name(std::string_view name, const char* source = "TableName");
+
+/// Validate that a CDC log table could be created for the base table with a
+/// given table_name, and if not, throw a user-visible api_error::validation.
+/// It is not possible to create a CDC log table if the table name is so long
+/// that adding the 15-character suffix "_scylla_cdc_log" (cdc_log_suffix)
+/// makes it go over max_auxiliary_table_name_length.
+/// Note that if max_table_name_length is set to less than 207 (which is
+/// max_auxiliary_table_name_length-15), then this function will never
+/// fail. However, it's still important to call it in UpdateTable, in case
+/// we have pre-existing tables with names longer than this to avoid #24598.
+void validate_cdc_log_name_length(std::string_view table_name);
+
+/// Checks if a keyspace, given by its name, is an Alternator keyspace.
+/// This just checks if the name begins in executor::KEYSPACE_NAME_PREFIX,
+/// a prefix that all keyspaces created by Alternator's CreateTable use.
+bool is_alternator_keyspace(std::string_view ks_name);
+
+/// Wraps db::get_tags_of_table() and throws api_error::validation if the
+/// table is missing the tags extension.
+const std::map<sstring, sstring>& get_tags_of_table_or_throw(schema_ptr schema);
+
+/// Returns a type object representing the type of the ":attrs" column used
+/// by Alternator to store all non-key attribute. This type is a map from
+/// string (attribute name) to bytes (serialized attribute value).
+map_type attrs_type();
+
+// In DynamoDB index names are local to a table, while in Scylla, materialized
+// view names are global (in a keyspace). So we need to compose a unique name
+// for the view taking into account both the table's name and the index name.
+// We concatenate the table and index name separated by a delim character
+// (a character not allowed by DynamoDB in ordinary table names, default: ":").
+// The downside of this approach is that it limits the sum of the lengths,
+// instead of each component individually as DynamoDB does.
+// The view_name() function assumes the table_name has already been validated
+// but validates the legality of index_name and the combination of both.
+std::string view_name(std::string_view table_name, std::string_view index_name,
+        const std::string& delim = ":", bool validate_len = true);
+std::string gsi_name(std::string_view table_name, std::string_view index_name,
+        bool validate_len = true);
+std::string lsi_name(std::string_view table_name, std::string_view index_name,
+        bool validate_len = true);
+
+/// After calling pk_from_json() and ck_from_json() to extract the pk and ck
+/// components of a key, and if that succeeded, call check_key() to further
+/// check that the key doesn't have any spurious components.
+void check_key(const rjson::value& key, const schema_ptr& schema);
+
+/// Fail with api_error::validation if the expression if has unused attribute
+/// names or values. This is how DynamoDB behaves, so we do too.
+void verify_all_are_used(const rjson::value* field,
+        const std::unordered_set<std::string>& used,
+        const char* field_name,
+        const char* operation);
+
+/// Check CQL's Role-Based Access Control (RBAC) permission (MODIFY,
+/// SELECT, DROP, etc.) on the given table. When permission is denied an
+/// appropriate user-readable api_error::access_denied is thrown.
+future<> verify_permission(bool enforce_authorization, bool warn_authorization, const service::client_state&, const schema_ptr&, auth::permission, stats& stats);
+
+/// Similar to verify_permission() above, but just for CREATE operations.
+/// Those do not operate on any specific table, so require permissions on
+/// ALL KEYSPACES instead of any specific table.
+future<> verify_create_permission(bool enforce_authorization, bool warn_authorization, const service::client_state&, stats& stats);
+
+// Sets a KeySchema JSON array inside the given parent object describing the
+// key attributes of the given schema as HASH or RANGE keys. Additionally,
+// adds mappings from key attribute names to their DynamoDB type string into
+// attribute_types.
+void describe_key_schema(rjson::value& parent, const schema&, std::unordered_map<std::string, std::string>* attribute_types = nullptr, const std::map<sstring, sstring>* tags = nullptr);
+
+/// is_big() checks approximately if the given JSON value is "bigger" than
+/// the given big_size number of bytes. The goal is to *quickly* detect
+/// oversized JSON that, for example, is too large to be serialized to a
+/// contiguous string - we don't need an accurate size for that. Moreover,
+/// as soon as we detect that the JSON is indeed "big", we can return true
+/// and don't need to continue calculating its exact size.
+bool is_big(const rjson::value& val, int big_size = 100'000);
+
+/// try_get_internal_table() handles the special case that the given table_name
+/// begins with INTERNAL_TABLE_PREFIX (".scylla.alternator."). In that case,
+/// this function assumes that the rest of the name refers to an internal
+/// Scylla table (e.g., system table) and returns the schema of that table -
+/// or an exception if it doesn't exist. Otherwise, if table_name does not
+/// start with INTERNAL_TABLE_PREFIX, this function returns an empty schema_ptr
+/// and the caller should look for a normal Alternator table with that name.
+schema_ptr try_get_internal_table(const data_dictionary::database& db, std::string_view table_name);
+
+/// get_table_from_batch_request() is used by batch write/read operations to
+/// look up the schema for a table named in a batch request, by the JSON member
+/// name (which is the table name in a BatchWriteItem or BatchGetItem request).
+schema_ptr get_table_from_batch_request(const service::storage_proxy& proxy, const rjson::value::ConstMemberIterator& batch_request);
+
+/// Returns (or lazily creates) the per-table stats object for the given schema.
+/// If the table has been deleted, returns a temporary stats object.
+lw_shared_ptr<stats> get_stats_from_schema(service::storage_proxy& sp, const schema& schema);
+
+/// Writes one item's attributes into `item` from the given selection result
+/// row. If include_all_embedded_attributes is true, all attributes from the
+/// ATTRS_COLUMN map column are included regardless of attrs_to_get.
+void describe_single_item(const cql3::selection::selection&,
+    const std::vector<managed_bytes_opt>&,
+    const std::optional<attrs_to_get>&,
+    rjson::value&,
+    uint64_t* item_length_in_bytes = nullptr,
+    bool include_all_embedded_attributes = false);
+
+/// Converts a single result row to a JSON item, or returns an empty optional
+/// if the result is empty.
+std::optional<rjson::value> describe_single_item(schema_ptr,
+    const query::partition_slice&,
+    const cql3::selection::selection&,
+    const query::result&,
+    const std::optional<attrs_to_get>&,
+    uint64_t* item_length_in_bytes = nullptr);
+
+/// Make a body_writer (function that can write output incrementally to the
+/// HTTP stream) from the given JSON object.
+/// Note: only useful for (very) large objects as there are overhead issues
+/// with this as well, but for massive lists of return objects this can
+/// help avoid large allocations/many re-allocs.
+body_writer make_streamed(rjson::value&&);
+
+} // namespace alternator
--- a/alternator/expressions.cc
+++ b/alternator/expressions.cc
@@ -744,7 +744,7 @@ void validate_attr_name_length(std::string_view supplementary_context, size_t at
    constexpr const size_t DYNAMODB_NONKEY_ATTR_NAME_SIZE_MAX = 65535;

    const size_t max_length = is_key ? DYNAMODB_KEY_ATTR_NAME_SIZE_MAX : DYNAMODB_NONKEY_ATTR_NAME_SIZE_MAX;
-    if (attr_name_length > max_length) {
+    if (attr_name_length > max_length || attr_name_length == 0) {
        std::string error_msg;
        if (!error_msg_prefix.empty()) {
            error_msg += error_msg_prefix;
@@ -754,7 +754,11 @@ void validate_attr_name_length(std::string_view supplementary_context, size_t at
            error_msg += supplementary_context;
            error_msg += " - ";
        }
-        error_msg += fmt::format("Attribute name is too large, must be less than {} bytes", std::to_string(max_length + 1));
+        if (attr_name_length == 0) {
+            error_msg += "Empty attribute name";
+        } else {
+            error_msg += fmt::format("Attribute name is too large, must be less than {} bytes", std::to_string(max_length + 1));
+        }
        throw api_error::validation(error_msg);
    }
 }
--- a/alternator/http_compression.cc
+++ b/alternator/http_compression.cc
@@ -264,7 +264,7 @@ private:
    }
 };

-executor::body_writer compress(response_compressor::compression_type ct, const db::config& cfg, executor::body_writer&& bw) {
+body_writer compress(response_compressor::compression_type ct, const db::config& cfg, body_writer&& bw) {
    return [bw = std::move(bw), ct, level = cfg.alternator_response_gzip_compression_level()](output_stream<char>&& out) mutable -> future<> {
        output_stream_options opts;
        opts.trim_to_size = true;
@@ -287,7 +287,7 @@ executor::body_writer compress(response_compressor::compression_type ct, const d
    };
 }

-future<std::unique_ptr<http::reply>> response_compressor::generate_reply(std::unique_ptr<http::reply> rep, sstring accept_encoding, const char* content_type, executor::body_writer&& body_writer) {
+future<std::unique_ptr<http::reply>> response_compressor::generate_reply(std::unique_ptr<http::reply> rep, sstring accept_encoding, const char* content_type, body_writer&& body_writer) {
    response_compressor::compression_type ct = find_compression(accept_encoding, std::numeric_limits<size_t>::max());
    if (ct != response_compressor::compression_type::none) {
        rep->add_header("Content-Encoding", get_encoding_name(ct));
--- a/alternator/http_compression.hh
+++ b/alternator/http_compression.hh
@@ -85,7 +85,7 @@ public:
    future<std::unique_ptr<http::reply>> generate_reply(std::unique_ptr<http::reply> rep,
         sstring accept_encoding, const char* content_type, std::string&& response_body);
    future<std::unique_ptr<http::reply>> generate_reply(std::unique_ptr<http::reply> rep,
-         sstring accept_encoding, const char* content_type, executor::body_writer&& body_writer);
+         sstring accept_encoding, const char* content_type, body_writer&& body_writer);
 };

 }
--- a/alternator/serialization.cc
+++ b/alternator/serialization.cc
@@ -14,12 +14,12 @@
 #include "types/concrete_types.hh"
 #include "types/json_utils.hh"
 #include "mutation/position_in_partition.hh"
+#include "alternator/executor_util.hh"

 static logging::logger slogger("alternator-serialization");

 namespace alternator {

-bool is_alternator_keyspace(const sstring& ks_name);

 type_info type_info_from_string(std::string_view type) {
    static thread_local const std::unordered_map<std::string_view, type_info> type_infos = {
--- a/alternator/server.cc
+++ b/alternator/server.cc
@@ -7,6 +7,8 @@
 */

 #include "alternator/server.hh"
+#include "audit/audit.hh"
+#include "alternator/executor_util.hh"
 #include "gms/application_state.hh"
 #include "utils/log.hh"
 #include <fmt/ranges.h>
@@ -142,7 +144,7 @@ public:
                    return _response_compressor.generate_reply(std::move(rep), std::move(accept_encoding),
                                                               REPLY_CONTENT_TYPE, std::move(str));
                },
-                [&] (executor::body_writer&& body_writer) {
+                [&] (body_writer&& body_writer) {
                    return _response_compressor.generate_reply(std::move(rep), std::move(accept_encoding),
                                                               REPLY_CONTENT_TYPE, std::move(body_writer));
                },
@@ -785,12 +787,25 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
    auto f = [this, content = std::move(content), &callback = callback_it->second,
            client_state = std::move(client_state), trace_state = std::move(trace_state),
            units = std::move(units), req = std::move(req)] () mutable -> future<executor::request_return_type> {
-                rjson::value json_request = co_await _json_parser.parse(std::move(content));
-                if (!json_request.IsObject()) {
-                    co_return api_error::validation("Request content must be an object");
-                }
-                co_return co_await callback(_executor, client_state, trace_state,
-                    make_service_permit(std::move(units)), std::move(json_request), std::move(req));
+        rjson::value json_request = co_await _json_parser.parse(std::move(content));
+        if (!json_request.IsObject()) {
+            co_return api_error::validation("Request content must be an object");
+        }
+        std::unique_ptr<audit::audit_info_alternator> audit_info;
+        std::exception_ptr ex = {};
+        executor::request_return_type ret;
+        try {
+            ret = co_await callback(_executor, client_state, trace_state, make_service_permit(std::move(units)), std::move(json_request), std::move(req), audit_info);
+        } catch (...) {
+            ex = std::current_exception();
+        }
+        if (audit_info) {
+            co_await audit::inspect(*audit_info, client_state, ex != nullptr);
+        }
+        if (ex) {
+            co_return coroutine::exception(std::move(ex));
+        }
+        co_return ret;
    };
    co_return co_await _sl_controller.with_user_service_level(user, std::ref(f));
 }
@@ -834,77 +849,77 @@ server::server(executor& exec, service::storage_proxy& proxy, gms::gossiper& gos
        , _pending_requests("alternator::server::pending_requests")
        , _timeout_config(_proxy.data_dictionary().get_config())
      , _callbacks{
-        {"CreateTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.create_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
+        {"CreateTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.create_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
        }},
-        {"DescribeTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.describe_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
+        {"DescribeTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.describe_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
        }},
-        {"DeleteTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.delete_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
+        {"DeleteTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.delete_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
        }},
-        {"UpdateTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.update_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
+        {"UpdateTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.update_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
        }},
-        {"PutItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.put_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
+        {"PutItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.put_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
        }},
-        {"UpdateItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.update_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
+        {"UpdateItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.update_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
        }},
-        {"GetItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.get_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
+        {"GetItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.get_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
        }},
-        {"DeleteItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.delete_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
+        {"DeleteItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.delete_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
        }},
-        {"ListTables", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.list_tables(client_state, std::move(permit), std::move(json_request));
+        {"ListTables", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.list_tables(client_state, std::move(permit), std::move(json_request), audit_info);
        }},
-        {"Scan", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.scan(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
+        {"Scan", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.scan(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
        }},
-        {"DescribeEndpoints", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.describe_endpoints(client_state, std::move(permit), std::move(json_request), req->get_header("Host"));
+        {"DescribeEndpoints", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.describe_endpoints(client_state, std::move(permit), std::move(json_request), req->get_header("Host"), audit_info);
        }},
-        {"BatchWriteItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.batch_write_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
+        {"BatchWriteItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.batch_write_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
        }},
-        {"BatchGetItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.batch_get_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
+        {"BatchGetItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.batch_get_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
        }},
-        {"Query", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.query(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
+        {"Query", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.query(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
        }},
-        {"TagResource", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.tag_resource(client_state, std::move(permit), std::move(json_request));
+        {"TagResource", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.tag_resource(client_state, std::move(permit), std::move(json_request), audit_info);
        }},
-        {"UntagResource", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.untag_resource(client_state, std::move(permit), std::move(json_request));
+        {"UntagResource", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.untag_resource(client_state, std::move(permit), std::move(json_request), audit_info);
        }},
-        {"ListTagsOfResource", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.list_tags_of_resource(client_state, std::move(permit), std::move(json_request));
+        {"ListTagsOfResource", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.list_tags_of_resource(client_state, std::move(permit), std::move(json_request), audit_info);
        }},
-        {"UpdateTimeToLive", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.update_time_to_live(client_state, std::move(permit), std::move(json_request));
+        {"UpdateTimeToLive", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.update_time_to_live(client_state, std::move(permit), std::move(json_request), audit_info);
        }},
-        {"DescribeTimeToLive", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.describe_time_to_live(client_state, std::move(permit), std::move(json_request));
+        {"DescribeTimeToLive", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.describe_time_to_live(client_state, std::move(permit), std::move(json_request), audit_info);
        }},
-        {"ListStreams", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.list_streams(client_state, std::move(permit), std::move(json_request));
+        {"ListStreams", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.list_streams(client_state, std::move(permit), std::move(json_request), audit_info);
        }},
-        {"DescribeStream", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.describe_stream(client_state, std::move(permit), std::move(json_request));
+        {"DescribeStream", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.describe_stream(client_state, std::move(permit), std::move(json_request), audit_info);
        }},
-        {"GetShardIterator", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.get_shard_iterator(client_state, std::move(permit), std::move(json_request));
+        {"GetShardIterator", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.get_shard_iterator(client_state, std::move(permit), std::move(json_request), audit_info);
        }},
-        {"GetRecords", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.get_records(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
+        {"GetRecords", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.get_records(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
        }},
-        {"DescribeContinuousBackups", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
-            return e.describe_continuous_backups(client_state, std::move(permit), std::move(json_request));
+        {"DescribeContinuousBackups", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+            return e.describe_continuous_backups(client_state, std::move(permit), std::move(json_request), audit_info);
        }},
    } {
 }
--- a/alternator/server.hh
+++ b/alternator/server.hh
@@ -34,7 +34,7 @@ class server : public peering_sharded_service<server> {
    // DynamoDB also has the same limit set to 16 MB.
    static constexpr size_t request_content_length_limit = 16*MB;
    using alternator_callback = std::function<future<executor::request_return_type>(executor&, executor::client_state&,
-            tracing::trace_state_ptr, service_permit, rjson::value, std::unique_ptr<http::request>)>;
+            tracing::trace_state_ptr, service_permit, rjson::value, std::unique_ptr<http::request>, std::unique_ptr<audit::audit_info_alternator>&)>;
    using alternator_callbacks_map = std::unordered_map<std::string_view, alternator_callback>;

    httpd::http_server _http_server;
--- a/alternator/streams.cc
+++ b/alternator/streams.cc
@@ -7,6 +7,8 @@
 */

 #include <type_traits>
+#include <ranges>
+#include <generator>
 #include <boost/lexical_cast.hpp>
 #include <boost/io/ios_state.hpp>
 #include <boost/multiprecision/cpp_int.hpp>
@@ -24,12 +26,15 @@
 #include "cql3/selection/selection.hh"
 #include "cql3/result_set.hh"
 #include "cql3/column_identifier.hh"
+#include "replica/database.hh"
 #include "schema/schema_builder.hh"
 #include "service/storage_proxy.hh"
 #include "gms/feature.hh"
 #include "gms/feature_service.hh"

 #include "executor.hh"
+#include "streams.hh"
+#include "alternator/executor_util.hh"
 #include "data_dictionary/data_dictionary.hh"
 #include "utils/rjson.hh"

@@ -91,45 +96,117 @@ static sstring stream_label(const schema& log_schema) {
    return seastar::json::formatter::to_json(tm);
 }

-namespace alternator {
+// Debug printer for cdc::stream_id - used only for logging/debugging, not for
+// serialization or user-visible output. We print both signed and unsigned value
+// as we use both.
+template <>
+struct fmt::formatter<cdc::stream_id> : fmt::formatter<string_view> {
+    template <typename FormatContext>
+    auto format(const cdc::stream_id &id, FormatContext& ctx) const {
+        fmt::format_to(ctx.out(), "{} ", id.token());

-// stream arn _has_ to be 37 or more characters long. ugh...
-// see https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_streams_DescribeStream.html#API_streams_DescribeStream_RequestSyntax
+        for (auto b : id.to_bytes()) {
+            fmt::format_to(ctx.out(), "{:02x}", (unsigned char)b);
+        }
+        return ctx.out();
+    }
+};
+
+namespace alternator {
+// stream arn has certain format (see https://docs.aws.amazon.com/IAM/latest/UserGuide/reference-arns.html)
+// we need to follow it as Kinesis Client Library does check
+// NOTE: we're holding inside a name of cdc log table, not a user table
+class stream_arn {
+    std::string _arn;
+    size_t _table_name_offset, _table_name_size;
+    size_t _keyspace_name_offset, _keyspace_name_size;
+
+    void _initialize_offsets() {
+        auto parts = parse_arn(_arn, "StreamArn", "stream", "/stream/");
+        _table_name_offset = parts.table_name.data() - _arn.data();
+        _table_name_size = parts.table_name.size();
+        _keyspace_name_offset = parts.keyspace_name.data() - _arn.data();
+        _keyspace_name_size = parts.keyspace_name.size();
+    }
+public:
+    // ARN to get table name from
+    stream_arn(std::string arn) : _arn(std::move(arn)) {
+        _initialize_offsets();
+    }
+    // NOTE: it must be a schema of a CDC log table, not a base table, because that's what we are encoding in ARN and returning to users.
+    // we need base schema for creation time
+    stream_arn(schema_ptr s, schema_ptr base_schema) {
+        auto creation_time = get_table_creation_time(*base_schema);
+        auto now = std::chrono::system_clock::time_point{ std::chrono::duration_cast<std::chrono::system_clock::duration>(std::chrono::duration<double>(creation_time)) };
+
+        // KCL checks for arn / aws / dynamodb and account-id being a number
+        _arn = fmt::format("arn:aws:dynamodb:us-east-1:000000000000:table/{}@{}/stream/{:%FT%T}", s->ks_name(), s->cf_name(), now);
+
+        _initialize_offsets();
+    }
+
+    std::string_view unparsed() const { return _arn; }
+    std::string_view table_name() const { return std::string_view{ _arn }.substr(_table_name_offset, _table_name_size); }
+    std::string_view keyspace_name() const { return std::string_view{ _arn }.substr(_keyspace_name_offset, _keyspace_name_size); }
+    friend std::ostream& operator<<(std::ostream& os, const stream_arn& arn) {
+        os << arn._arn;
+        return os;
+    }
+};
+
+// NOTE: this will return schema for cdc log table, not the base table.
+static schema_ptr get_schema_from_arn(service::storage_proxy& proxy, const stream_arn& arn)
+{
+    if (!cdc::is_log_name(arn.table_name())) {
+        throw api_error::resource_not_found(fmt::format("{} as found in ARN {} is not a valid name for a CDC table", arn.table_name(), arn.unparsed()));
+    }
+    try {
+        return proxy.data_dictionary().find_schema(arn.keyspace_name(), arn.table_name());
+    } catch(data_dictionary::no_such_column_family&) {
+        throw api_error::resource_not_found(fmt::format("`{}` is not a valid StreamArn - table {} not found", arn.unparsed(), arn.table_name()));
+    }
+}
+
+// ShardId. Must be between 28 and 65 characters inclusive.
 // UUID is 36 bytes as string (including dashes). 
-// Prepend a version/type marker -> 37
-class stream_arn : public utils::UUID {
+// Prepend a version/type marker (`S`) -> 37
+class stream_shard_id : public utils::UUID {
 public:
    using UUID = utils::UUID;
    static constexpr char marker = 'S';

-    stream_arn() = default;
-    stream_arn(const UUID& uuid)
+    stream_shard_id() = default;
+    stream_shard_id(const UUID& uuid)
        : UUID(uuid)
    {}
-    stream_arn(const table_id& tid)
+    stream_shard_id(const table_id& tid)
        : UUID(tid.uuid())
    {}
-    stream_arn(std::string_view v)
+    stream_shard_id(std::string_view v)
        : UUID(v.substr(1))
    {
        if (v[0] != marker) {
            throw std::invalid_argument(std::string(v));
        }
    }
-    friend std::ostream& operator<<(std::ostream& os, const stream_arn& arn) {
+    friend std::ostream& operator<<(std::ostream& os, const stream_shard_id& arn) {
        const UUID& uuid = arn;
        return os << marker << uuid;
    }
-    friend std::istream& operator>>(std::istream& is, stream_arn& arn) {
+    friend std::istream& operator>>(std::istream& is, stream_shard_id& arn) {
        std::string s;
        is >> s;
-        arn = stream_arn(s);
+        arn = stream_shard_id(s);
        return is;
    }
 };

 } // namespace alternator

+template<typename ValueType>
+struct rapidjson::internal::TypeHelper<ValueType, alternator::stream_shard_id>
+    : public from_string_helper<ValueType, alternator::stream_shard_id>
+{};
 template<typename ValueType>
 struct rapidjson::internal::TypeHelper<ValueType, alternator::stream_arn>
    : public from_string_helper<ValueType, alternator::stream_arn>
@@ -137,11 +214,11 @@ struct rapidjson::internal::TypeHelper<ValueType, alternator::stream_arn>

 namespace alternator {

-future<alternator::executor::request_return_type> alternator::executor::list_streams(client_state& client_state, service_permit permit, rjson::value request) {
+future<alternator::executor::request_return_type> alternator::executor::list_streams(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
    _stats.api_operations.list_streams++;

    auto limit = rjson::get_opt<int>(request, "Limit").value_or(100);
-    auto streams_start = rjson::get_opt<stream_arn>(request, "ExclusiveStartStreamArn");
+    auto streams_start = rjson::get_opt<stream_shard_id>(request, "ExclusiveStartStreamArn");
    auto table = find_table(_proxy, request);
    auto db = _proxy.data_dictionary();

@@ -149,6 +226,11 @@ future<alternator::executor::request_return_type> alternator::executor::list_str
        throw api_error::validation("Limit must be 1 or more");
    }

+    // Audit the input table name (if specified), not the output table names.
+    maybe_audit(audit_info, audit::statement_category::QUERY,
+                table ? table->ks_name() : "", table ? table->cf_name() : "",
+                "ListStreams", request);
+
    std::vector<data_dictionary::table> cfs;

    if (table) {
@@ -189,26 +271,23 @@ future<alternator::executor::request_return_type> alternator::executor::list_str

    auto ret = rjson::empty_object();
    auto streams = rjson::empty_array();
-
-    std::optional<stream_arn> last;
+    std::optional<stream_shard_id> last;

    for (;limit > 0 && i != e; ++i) {
        auto s = i->schema();
        auto& ks_name = s->ks_name();
        auto& cf_name = s->cf_name();
-
        if (!is_alternator_keyspace(ks_name)) {
            continue;
        }
        if (cdc::is_log_for_some_table(db.real_database(), ks_name, cf_name)) {
            rjson::value new_entry = rjson::empty_object();
-
            last = i->schema()->id();
-            rjson::add(new_entry, "StreamArn", *last);
+            auto arn = stream_arn{ i->schema(), cdc::get_base_table(db.real_database(), *i->schema()) };
+            rjson::add(new_entry, "StreamArn", arn);
            rjson::add(new_entry, "StreamLabel", rjson::from_string(stream_label(*s)));
-            rjson::add(new_entry, "TableName", rjson::from_string(cdc::base_name(table_name(*s))));
+            rjson::add(new_entry, "TableName", rjson::from_string(cdc::base_name(s->cf_name())));
            rjson::push_back(streams, std::move(new_entry));
-
            --limit;
        }
    }
@@ -218,7 +297,6 @@ future<alternator::executor::request_return_type> alternator::executor::list_str
    if (last) {
        rjson::add(ret, "LastEvaluatedStreamArn", *last);
    }
-
    return make_ready_future<executor::request_return_type>(rjson::print(std::move(ret)));
 }

@@ -430,7 +508,7 @@ using namespace std::chrono_literals;
 // Dynamo docs says no data shall live longer than 24h.
 static constexpr auto dynamodb_streams_max_window = 24h;

-// find the parent shard in previous generation for the given child shard
+// find the parent Streams shard in previous generation for the given child Streams shard
 // takes care of wrap-around case in vnodes
 // prev_streams must be sorted by token
 const cdc::stream_id& find_parent_shard_in_previous_generation(db_clock::time_point prev_timestamp, const utils::chunked_vector<cdc::stream_id> &prev_streams, const cdc::stream_id &child) {
@@ -449,7 +527,305 @@ const cdc::stream_id& find_parent_shard_in_previous_generation(db_clock::time_po
    return *it;
 }

-future<executor::request_return_type> executor::describe_stream(client_state& client_state, service_permit permit, rjson::value request) {
+// The function compare_lexicographically() below sorts stream shard ids in the
+// way we need to present them in our output. However, when processing lists of
+// shards internally, especially for finding child shards, it's more convenient
+// for us to sort the shard ids by the different function defined here -
+// compare_by_token(). It sorts the ids by numeric token (the end token of the
+// token range belonging to this shard), and makes algorithms like lower_bound()
+// possible.
+static bool compare_by_token(const cdc::stream_id& id1, const cdc::stream_id& id2) {
+    return id1.token() < id2.token();
+}
+
+// #7409 - shards must be returned in lexicographical order.
+// Normal bytes compare is string_traits<int8_t>::compare,
+// thus bytes 0x8000 is less than 0x0000. Instead, we need to use unsigned compare.
+// KCL depends on this ordering, so we need to adhere.
+static bool compare_lexicographically(const cdc::stream_id& id1, const cdc::stream_id& id2) {
+    return compare_unsigned(id1.to_bytes(), id2.to_bytes()) < 0;
+}
+
+stream_id_range::stream_id_range(
+        utils::chunked_vector<cdc::stream_id> &items,
+        utils::chunked_vector<cdc::stream_id>::iterator lo1,
+        utils::chunked_vector<cdc::stream_id>::iterator end1) : stream_id_range(items, lo1, end1, items.end(), items.end()) {}
+stream_id_range::stream_id_range(
+        utils::chunked_vector<cdc::stream_id> &items,
+        utils::chunked_vector<cdc::stream_id>::iterator lo1,
+        utils::chunked_vector<cdc::stream_id>::iterator end1,
+        utils::chunked_vector<cdc::stream_id>::iterator lo2,
+        utils::chunked_vector<cdc::stream_id>::iterator end2)
+    : _lo1(lo1)
+    , _end1(end1)
+    , _lo2(lo2)
+    , _end2(end2)
+{
+    if (_lo2 != items.end()) {
+        if (_lo1 != items.begin()) {
+            on_internal_error(slogger, fmt::format("Invalid stream_id_range: _lo1 != items.begin()"));
+        }
+        if (_end2 != items.end()) {
+            on_internal_error(slogger, fmt::format("Invalid stream_id_range: _end2 != items.end()"));
+        }
+    }
+    if (_end1 > _lo2)
+        on_internal_error(slogger, fmt::format("Invalid stream_id_range: _end1 > _lo2"));
+}
+
+void stream_id_range::set_starting_position(const cdc::stream_id &update_to) {
+    _skip_to = &update_to;
+}
+
+void stream_id_range::prepare_for_iterating()
+{
+    if (_prepared) return;
+    _prepared = true;
+    // here we deal with unfortunate possibility of wrap around range - in which case we actually have
+    // two ranges (lo1, end1) and (lo2, end2), where lo1 will be begin() and end2 will be end().
+    // the whole range needs to be sorted by `compare_lexicographically`, so we have to manually merge two ranges together and then sort them.
+    // We also need to apply starting position update, if it was set, after merging and sorting.
+    if (_end1 > _lo2)
+        on_internal_error(slogger, fmt::format("Invalid stream_id_range: _end1 > _lo2"));
+
+    auto tgt = _end1;
+    auto src = _lo2;
+    // just try to move second range just after first one - if we have only one range,
+    // second range will be empty and nothing will happen here
+    for(; src != _end2; ++src, ++tgt) {
+        std::swap(*tgt, *src);
+    }
+    // sort merged ranges by compare_lexicographically
+    std::sort(_lo1, tgt, compare_lexicographically);
+
+    // apply starting position update if it was set
+    // as a sanity check we require to find EXACT token match
+    if (_skip_to) {
+        auto it = std::lower_bound(_lo1, tgt, *_skip_to, compare_lexicographically);
+        if (it == tgt || it->token() != _skip_to->token()) {
+            slogger.info("Could not find starting position update shard id {}", *_skip_to);
+        } else {
+            _lo1 = std::next(it);
+        }
+    }
+    _end1 = tgt;
+}
+
+// the function returns `stream_id_range` that will allow iteration over children Streams shards for the Streams shard `parent`
+// a child Streams shard is defined as a Streams shard that touches token range that was previously covered by `parent` Streams shard
+// Streams shard contains a token, that represents end of the token range for that Streams shard (inclusive)
+// begginning of the token range is defined by previous Streams shard's token + 1
+// NOTE: With vnodes, ranges of Streams' shards wrap, while with tablets the biggest allowed token number is always a range end.
+// NOTE: both streams generation are guaranteed to cover whole range and be non-empty
+// NOTE: it's possible to get more than one stream shard with the same token value (thus some of those stream shards will be empty) -
+// for simplicity we will emit empty stream shards as well.
+//
+// to find children we will first find parent Streams shard in parent_streams by its token
+// then we will find previous Streams shard in parent stream - that will determine range
+// then based on the range we will find children Streams shards in current_streams
+// NOTE: function sorts / reorders current_streams
+// NOTE: function assumes parent_streams is sorted by compare_by_token and it doesn't modify it
+stream_id_range find_children_range_from_parent_token(
+    const utils::chunked_vector<cdc::stream_id>& parent_streams,
+    utils::chunked_vector<cdc::stream_id>& current_streams,
+    cdc::stream_id parent,
+    bool uses_tablets
+) {
+    // sanity checks for required preconditions
+    if (parent_streams.empty()) {
+        on_internal_error(slogger, fmt::format("parent_streams is empty") );
+    }
+    if (current_streams.empty()) {
+        on_internal_error(slogger, fmt::format("current_streams is empty") );
+    }
+
+    // first let's cover obvious cases
+    // if we have only one parent Streams shard, then all children belong to it
+    if (parent_streams.size() == 1) {
+        return stream_id_range{ current_streams, current_streams.begin(), current_streams.end() };
+    }
+    // if we have only one current Streams shard, then every parent maps to it
+    if (current_streams.size() == 1) {
+        return stream_id_range{ current_streams, current_streams.begin(), current_streams.end() };
+    }
+
+    // find parent Streams shard in parent_streams, it must be present and have exact match
+    auto parent_shard_end_it = std::lower_bound(parent_streams.begin(), parent_streams.end(), parent.token(), [](const cdc::stream_id& id, const dht::token& t) {
+        return id.token() < t;
+    });
+    if (parent_shard_end_it == parent_streams.end() || parent_shard_end_it->token() != parent.token()) {
+        throw api_error::validation(fmt::format("Invalid ShardFilter.ShardId value - shard {} not found", parent));
+    }
+
+    std::sort(current_streams.begin(), current_streams.end(), compare_by_token);
+
+    utils::chunked_vector<cdc::stream_id>::iterator child_shard_begin_it;
+    // upper_bound gives us the first element with token strictly greater than
+    // parent's end token - this is the correct one-past-end for an inclusive
+    // boundary and handles duplicate tokens (multiple children sharing a token)
+    auto child_shard_end_it = std::upper_bound(current_streams.begin(), current_streams.end(), parent_shard_end_it->token(), [](const dht::token& t, const cdc::stream_id& id) {
+            return t < id.token();
+    });
+
+    if (uses_tablets) {
+        // tablets version - tablets don't wrap around and last token is always present
+        // let's assume we've parent (first line) and child generation (second line):
+        // NOTE: token space doesn't wrap around - instead we have a guarantee that last token
+        // will be present as one of the shards
+        // P=|    1    2    3    4|
+        // C=| a  b    c       d e|
+        // we want to find children for each token from parent:
+        // 1 -> a,b
+        // 2 -> c
+        // 3 -> d
+        // 4 -> d, e
+        // first we find token in P that is end of range of parent - parent_shard_end_it
+        // - if parent_shard_end_it - 1 exists
+        //   - we take it as parent_shard_begin_it
+        //   - find the first child with token > parent_shard_begin_it and set it to child_shard_begin_it
+        // - else previous one to parent_shard_end_it does not exist
+        //   - set child_shard_begin_it = C.begin()
+        // - find the first child with token > parent_shard_end_it and set it to child_shard_end_it
+        // - range [child_shard_begin_it, child_shard_end_it) represents children
+
+        // When the parent's end token is not directly present in the children
+        // (merge scenario: several parent shards merged into fewer children),
+        // the child whose range absorbs the parent's end is the first child
+        // with token > parent_end_token.  upper_bound already points there,
+        // so we advance past it to include it in the [begin, end) range.
+        if (child_shard_end_it == current_streams.begin() || std::prev(child_shard_end_it)->token() != parent_shard_end_it->token()) {
+            if (child_shard_end_it == current_streams.end()) {
+                on_internal_error(slogger, fmt::format("parent end token not present in children tokens and no child with greater token exists, for parent shard id {}, got parent shards [{}] and children shards [{}]",
+                    parent, fmt::join(parent_streams, "; "), fmt::join(current_streams, "; ")));
+            }
+            ++child_shard_end_it;
+        }
+
+        // end of parent token is also first token in parent streams - it means beginning of the parent's range
+        // is the beginning of the token space - this means first child stream will be start of the children range
+        if (parent_shard_end_it == parent_streams.begin()) {
+            child_shard_begin_it = current_streams.begin();
+        } else {
+            // normal case - we have previous parent Streams shard that determines beginning of the range (exclusive)
+            // upper_bound skips past all children at the previous parent's token (including duplicates)
+            auto parent_shard_begin_it = std::prev(parent_shard_end_it);
+            child_shard_begin_it = std::upper_bound(current_streams.begin(), current_streams.end(), parent_shard_begin_it->token(), [](const dht::token& t, const cdc::stream_id& id) {
+                return t < id.token();
+            });
+        }
+
+        // simple range
+        return stream_id_range{ current_streams, child_shard_begin_it, child_shard_end_it };
+    } else {
+        // vnodes version - vnodes wrap around
+        // wrapping around make whole algorithm extremely confusing, because we wrap around on two levels,
+        // both parent Streams shard might wrap around and children range might wrap around as well
+
+        // helper function to find a range in current_streams based on range from parent_streams, but without wrap around
+        // if lo is not set, it means start from beginning of current_streams
+        // if end is not set, it means go until end of current_streams
+        auto find_range_in_children = [&](std::optional<utils::chunked_vector<cdc::stream_id>::const_iterator> lo, std::optional<utils::chunked_vector<cdc::stream_id>::const_iterator> end) -> std::pair<utils::chunked_vector<cdc::stream_id>::iterator, utils::chunked_vector<cdc::stream_id>::iterator> {
+            utils::chunked_vector<cdc::stream_id>::iterator res_lo, res_end;
+            if (!lo) {
+                // beginning of the range
+                res_lo = current_streams.begin();
+            } else {
+                // we use upper_bound as beginning of the range is exclusive
+                res_lo = std::upper_bound(current_streams.begin(), current_streams.end(), (*lo)->token(), [](const dht::token& t, const cdc::stream_id& id) {
+                    return t < id.token();
+                });
+            }
+            if (!end) {
+                // end of the range
+                res_end = current_streams.end();
+            } else {
+                // end of the range is inclusive, so we use upper_bound to find the first element
+                // with token strictly greater than the end token - this correctly handles the case
+                // where multiple children share the same token (e.g. small vnodes where several
+                // shards fall back to the vnode-end token)
+                res_end = std::upper_bound(current_streams.begin(), current_streams.end(), (*end)->token(), [](const dht::token& t, const cdc::stream_id& id) {
+                    return t < id.token();
+                });
+                // When the parent's end token is not directly present in the
+                // children (merge scenario), the child whose range absorbs the
+                // parent's end is at res_end.  Advance past it so that the
+                // half-open range [res_lo, res_end) includes it.
+                if (res_end != current_streams.end() &&
+                        (res_end == current_streams.begin() || std::prev(res_end)->token() != (*end)->token())) {
+                    ++res_end;
+                }
+            }
+            return { res_lo, res_end };
+        };
+        auto parent_shard_begin_it = parent_shard_end_it;
+        if (parent_shard_begin_it == parent_streams.begin()) {
+            // end of the parent Streams shard is also first token in parent streams - it means wrap around case for parent
+            // beginning of the parent's range is the last token in the parent streams
+            // for example:
+            // P=|         0 10    |
+            // C=| -20 -10         |
+            // searching for parent Streams shard at 0 will get us here - end of the parent is the first parent Streams shard
+            // so beginning of the parent's range is the last parent Streams shard (10)
+            parent_shard_begin_it = std::prev(parent_streams.end());
+
+            // we find two unwrapped ranges here - from beginning of current_streams to the end of the parent's range
+            // (end is inclusive) - in our example it's (-inf, 0]
+            auto [ lo1, end1 ] = find_range_in_children(std::nullopt, parent_shard_end_it);
+            // and from the beginning of the parent's range (exclusive) to the end of current_streams
+            // our example is (10, +inf)
+            auto [ lo2, end2 ] = find_range_in_children(parent_shard_begin_it, std::nullopt);
+
+            // in rare cases those two ranges might overlap - so we check and merge if needed
+            // for example:
+            // P=|     -30 -20      |
+            // C=| -40          -10 |
+            // searching for parent Streams shard at -30 will get us here - end of the parent is -30, beginning is -20
+            // first search will give us (-inf, +inf) with end1 pointing to current_streams.end()
+            // (because the range needs to include -10 position, so the iterator will point to the next one after - end of the current_streams)
+            // second search will give us [-10, +inf) with lo2 pointing to current_streams[1]
+            // which is less then end1 - so we need to merge those two ranges
+            if (lo2 < end1) {
+                assert(lo1 <= lo2);
+                assert(end1 <= end2);
+                end1 = end2;
+                lo2 = end2 = current_streams.end();
+            }
+            return stream_id_range{ current_streams, lo1, end1, lo2, end2 };
+        } else {
+            // simpler case - parent doesn't wrap around and we have both begin and end in normal order
+            // we search for single unwrapped range and adjust later if needed
+            --parent_shard_begin_it;
+            auto [ lo1, end1 ] = find_range_in_children(parent_shard_begin_it, parent_shard_end_it);
+            auto lo2 = current_streams.end();
+            auto end2 = current_streams.end();
+
+            // it's possible for simple case to still wrap around, when parent range lies after all children Streams shards
+            // for example:
+            // P=|         0 10    |
+            // C=| -20 -10         |
+            // when searching for parent shart at 0, we get parent range [0, 10)
+            // unwrapped search will produce empty range and miss -20 child Streams shard, which is actually
+            // owner of [0, 10) range (and is also a first Streams shard in current generation)
+            // note, that searching for 0 parent will give correct result, but because algorithm in that case
+            // detects wrap around case and chooses different if
+            if (parent_shard_end_it->token() > current_streams.back().token() && lo1 != current_streams.begin()) {
+                // wrap around case - children at the beginning of the sorted array
+                // wrap around the ring and cover the parent's range.  Include all
+                // children sharing the first token (duplicate tokens are possible
+                // for small vnodes where multiple shards fall back to the same token)
+                end2 = lo2 = current_streams.begin();
+                while(end2 != current_streams.end() && end2->token() == current_streams.front().token()) {
+                    ++end2;
+                }
+                std::swap(lo1, lo2);
+                std::swap(end1, end2);
+            }
+            return stream_id_range{ current_streams, lo1, end1, lo2, end2 };
+        }
+    }
+}
+
+future<executor::request_return_type> executor::describe_stream(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
    _stats.api_operations.describe_stream++;

    auto limit = rjson::get_opt<int>(request, "Limit").value_or(100); // according to spec
@@ -459,12 +835,11 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
    // I.e. unparsable arn -> error. 
    auto stream_arn = rjson::get<alternator::stream_arn>(request, "StreamArn");

-    schema_ptr schema, bs;
+    schema_ptr bs;
    auto db = _proxy.data_dictionary();
+    auto schema = get_schema_from_arn(_proxy, stream_arn);

    try {
-        auto cf = db.find_column_family(table_id(stream_arn));
-        schema = cf.schema();
        bs = cdc::get_base_table(db.real_database(), *schema);
    } catch (...) {        
    }
@@ -472,6 +847,12 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
    if (!schema || !bs || !is_alternator_keyspace(schema->ks_name())) {
        throw api_error::resource_not_found("Invalid StreamArn");
    }
+    auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();
+
+    // _sdks.cdc_get_versioned_streams() uses quorum_if_many() underneath, which uses CL=QUORUM for many token owners and CL=ONE otherwise.
+    auto describe_cl = (normal_token_owners > 1) ? db::consistency_level::QUORUM : db::consistency_level::ONE;
+    maybe_audit(audit_info, audit::statement_category::QUERY, schema->ks_name(),
+                bs->cf_name() + "|" + schema->cf_name(), "DescribeStream", request, describe_cl);

    if (limit < 1) {
        throw api_error::validation("Limit must be 1 or more");
@@ -496,6 +877,8 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
        } else {
            status = "ENABLED";
        }
+    } else if (opts.enable_requested()) {
+        status = "ENABLING";
    }

    auto ttl = std::chrono::seconds(opts.ttl());
@@ -504,9 +887,9 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl

    stream_view_type type = cdc_options_to_steam_view_type(opts);

-    rjson::add(stream_desc, "StreamArn", alternator::stream_arn(schema->id()));
+    rjson::add(stream_desc, "StreamArn", stream_arn);
    rjson::add(stream_desc, "StreamViewType", type);
-    rjson::add(stream_desc, "TableName", rjson::from_string(table_name(*bs)));
+    rjson::add(stream_desc, "TableName", rjson::from_string(bs->cf_name()));

    describe_key_schema(stream_desc, *bs);

@@ -518,13 +901,48 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
    // TODO: label
    // TODO: creation time

-    auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();
+    std::map<db_clock::time_point, cdc::streams_version> topologies;

    // filter out cdc generations older than the table or now() - cdc::ttl (typically dynamodb_streams_max_window - 24h)
-    auto low_ts = std::max(as_timepoint(schema->id()), db_clock::now() - ttl);
+    if (schema->table().uses_tablets()) {
+        // We can't use table creation time here, as tablets might report a
+        // generation timestamp just before table creation. This is safe
+        // because CDC generations are per-table and cannot pre-date the
+        // table, so expanding the window won't pull in unrelated data.
+        auto low_ts = db_clock::now() - ttl;
+        topologies = co_await _system_keyspace.read_cdc_for_tablets_versioned_streams(bs->ks_name(), bs->cf_name(), low_ts);
+    } else {
+        auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();
+        auto low_ts = std::max(as_timepoint(schema->id()), db_clock::now() - ttl);
+        topologies = co_await _sdks.cdc_get_versioned_streams(low_ts, { normal_token_owners });
+    }
+
+    const auto e = topologies.end();
+    std::optional<shard_id> shard_filter;
+
+    if (const rjson::value *shard_filter_obj = rjson::find(request, "ShardFilter")) {
+        if (!shard_filter_obj->IsObject()) {
+            throw api_error::validation("Invalid ShardFilter value - must be object");
+        }
+        std::string type;
+        try {
+            type = rjson::get<std::string>(*shard_filter_obj, "Type");
+        } catch (...) {
+            throw api_error::validation("Invalid ShardFilter.Type value - must be string `CHILD_SHARDS`");
+        }
+        if (type != "CHILD_SHARDS") {
+            throw api_error::validation("Invalid ShardFilter.Type value - must be string `CHILD_SHARDS`");
+        }
+        try {
+            shard_filter = rjson::get<shard_id>(*shard_filter_obj, "ShardId");
+        } catch (const std::exception &e) {
+            throw api_error::validation(fmt::format("Invalid ShardFilter.ShardId value - not a valid ShardId: {}", e.what()));
+        }
+        if (topologies.find(shard_filter->time) == topologies.end()) {
+            throw api_error::validation(fmt::format("Invalid ShardFilter.ShardId value - corresponding generation not found: {}", shard_filter->id));
+        }
+    }

-    std::map<db_clock::time_point, cdc::streams_version> topologies = co_await _sdks.cdc_get_versioned_streams(low_ts, { normal_token_owners });
-    auto e = topologies.end();
    auto prev = e;
    auto shards = rjson::empty_array();

@@ -536,25 +954,6 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
        i = topologies.find(shard_start->time);
    }

-    // for parent-child stuff we need id:s to be sorted by token
-    // (see explanation above) since we want to find closest
-    // token boundary when determining parent.
-    // #7346 - we processed and searched children/parents in
-    // stored order, which is not necessarily token order,
-    // so the finding of "closest" token boundary (using upper bound)
-    // could give somewhat weird results.
-    static auto token_cmp = [](const cdc::stream_id& id1, const cdc::stream_id& id2) {
-        return id1.token() < id2.token();
-    };
-
-    // #7409 - shards must be returned in lexicographical order,
-    // normal bytes compare is string_traits<int8_t>::compare.
-    // thus bytes 0x8000 is less than 0x0000. By doing unsigned
-    // compare instead we inadvertently will sort in string lexical.
-    static auto id_cmp = [](const cdc::stream_id& id1, const cdc::stream_id& id2) {
-        return compare_unsigned(id1.to_bytes(), id2.to_bytes()) < 0;
-    };
-
    // need a prev even if we are skipping stuff
    if (i != topologies.begin()) {
        prev = std::prev(i);
@@ -563,24 +962,18 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
    for (; limit > 0 && i != e; prev = i, ++i) {
        auto& [ts, sv] = *i;

+        if (shard_filter && (prev == e || prev->first != shard_filter->time)) {
+            shard_start = std::nullopt;
+            continue;
+        }
        last = std::nullopt;

-        auto lo = sv.streams.begin();
-        auto end = sv.streams.end();
-
        // #7409 - shards must be returned in lexicographical order,
-        std::sort(lo, end, id_cmp);
-
-        if (shard_start) {
-            // find next shard position
-            lo = std::upper_bound(lo, end, shard_start->id, id_cmp);
-            shard_start = std::nullopt;
-        }
-
-        if (lo != end && prev != e) {
+        std::sort(sv.streams.begin(), sv.streams.end(), compare_lexicographically);
+        if (prev != e) {
            // We want older stuff sorted in token order so we can find matching
-            // token range when determining parent shard.
-            std::stable_sort(prev->second.streams.begin(), prev->second.streams.end(), token_cmp);
+            // token range when determining parent Streams shard.
+            std::stable_sort(prev->second.streams.begin(), prev->second.streams.end(), compare_by_token);
        }

        auto expired = [&]() -> std::optional<db_clock::time_point> {
@@ -593,9 +986,29 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
            return j->first + confidence_interval(db);
        }();

-        while (lo != end) {
-            auto& id = *lo++;
+        std::optional<stream_id_range> shard_range;

+        if (shard_filter) {
+            // sanity check - we should never get here as there is if above (`shard_filter && prev == e` => `continue`)
+            if (prev == e) {
+                on_internal_error(slogger, fmt::format("Could not find parent generation for shard id {}, got generations [{}]", shard_filter->id, fmt::join(topologies | std::ranges::views::keys, "; ")));
+            }
+
+            const bool uses_tablets = schema->table().uses_tablets();
+            shard_range = find_children_range_from_parent_token(
+                prev->second.streams,
+                i->second.streams,
+                shard_filter->id,
+                uses_tablets
+            );
+        } else {
+            shard_range = stream_id_range{ i->second.streams, i->second.streams.begin(), i->second.streams.end() };
+        }
+        if (shard_start) {
+            shard_range->set_starting_position(shard_start->id);
+        }
+        shard_range->prepare_for_iterating();
+        for(const auto &id : *shard_range) {
            auto shard = rjson::empty_object();

            if (prev != e) {
@@ -620,6 +1033,7 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl

            last = std::nullopt;
        }
+        shard_start = std::nullopt;
    }

    if (last) {
@@ -720,7 +1134,7 @@ struct rapidjson::internal::TypeHelper<ValueType, alternator::shard_iterator_typ

 namespace alternator {

-future<executor::request_return_type> executor::get_shard_iterator(client_state& client_state, service_permit permit, rjson::value request) {
+future<executor::request_return_type> executor::get_shard_iterator(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
    _stats.api_operations.get_shard_iterator++;

    auto type = rjson::get<shard_iterator_type>(request, "ShardIteratorType");
@@ -736,18 +1150,22 @@ future<executor::request_return_type> executor::get_shard_iterator(client_state&
    auto stream_arn = rjson::get<alternator::stream_arn>(request, "StreamArn");
    auto db = _proxy.data_dictionary();

-    schema_ptr schema = nullptr;
    std::optional<shard_id> sid;
-
+    auto schema = get_schema_from_arn(_proxy, stream_arn);
+    schema_ptr base_schema = nullptr;
    try {
-        auto cf = db.find_column_family(table_id(stream_arn));
-        schema = cf.schema();
+        base_schema = cdc::get_base_table(db.real_database(), *schema);
        sid = rjson::get<shard_id>(request, "ShardId");
    } catch (...) {
    }
-    if (!schema || !cdc::get_base_table(db.real_database(), *schema) || !is_alternator_keyspace(schema->ks_name())) {
+    if (!schema || !base_schema || !is_alternator_keyspace(schema->ks_name())) {
        throw api_error::resource_not_found("Invalid StreamArn");
    }
+
+    // Uses only node-local context (the metadata) to generate response
+    maybe_audit(audit_info, audit::statement_category::QUERY, schema->ks_name(),
+                base_schema->cf_name() + "|" + schema->cf_name(), "GetShardIterator", request);
+
    if (!sid) {
        throw api_error::resource_not_found("Invalid ShardId");
    }
@@ -776,11 +1194,10 @@ future<executor::request_return_type> executor::get_shard_iterator(client_state&
            break;
    }

-    shard_iterator iter(stream_arn, *sid, threshold, inclusive_of_threshold);
+    shard_iterator iter(schema->id().uuid(), *sid, threshold, inclusive_of_threshold);

    auto ret = rjson::empty_object();
    rjson::add(ret, "ShardIterator", iter);
-
    return make_ready_future<executor::request_return_type>(rjson::print(std::move(ret)));
 }

@@ -823,7 +1240,7 @@ namespace alternator {
        };
    }

-future<executor::request_return_type> executor::get_records(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request) {
+future<executor::request_return_type> executor::get_records(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
    _stats.api_operations.get_records++;
    auto start_time = std::chrono::steady_clock::now();

@@ -849,16 +1266,17 @@ future<executor::request_return_type> executor::get_records(client_state& client
    if (!schema || !base || !is_alternator_keyspace(schema->ks_name())) {
        co_return api_error::resource_not_found(fmt::to_string(iter.table));
    }
+    db::consistency_level cl = db::consistency_level::LOCAL_QUORUM;
+
+    maybe_audit(audit_info, audit::statement_category::QUERY, schema->ks_name(),
+                base->cf_name() + "|" + schema->cf_name(), "GetRecords", request, cl);

    tracing::add_table_name(trace_state, schema->ks_name(), schema->cf_name());

    co_await verify_permission(_enforce_authorization, _warn_authorization, client_state, schema, auth::permission::SELECT, _stats);

-    db::consistency_level cl = db::consistency_level::LOCAL_QUORUM;
    partition_key pk = iter.shard.id.to_partition_key(*schema);
-
    dht::partition_range_vector partition_ranges{ dht::partition_range::make_singular(dht::decorate_key(*schema, pk)) };
-
    auto high_ts = db_clock::now() - confidence_interval(db);
    auto high_uuid = utils::UUID_gen::min_time_UUID(high_ts.time_since_epoch());
    auto lo = clustering_key_prefix::from_exploded(*schema, { iter.threshold.serialize() });
@@ -938,17 +1356,17 @@ future<executor::request_return_type> executor::get_records(client_state& client

    auto& metadata = result_set->get_metadata();

-    auto op_index = std::distance(metadata.get_names().begin(), 
+    auto op_index = std::distance(metadata.get_names().begin(),
        std::find_if(metadata.get_names().begin(), metadata.get_names().end(), [](const lw_shared_ptr<cql3::column_specification>& cdef) {
            return cdef->name->name() == op_column_name;
        })
    );
-    auto ts_index = std::distance(metadata.get_names().begin(), 
+    auto ts_index = std::distance(metadata.get_names().begin(),
        std::find_if(metadata.get_names().begin(), metadata.get_names().end(), [](const lw_shared_ptr<cql3::column_specification>& cdef) {
            return cdef->name->name() == timestamp_column_name;
        })
    );
-    auto eor_index = std::distance(metadata.get_names().begin(), 
+    auto eor_index = std::distance(metadata.get_names().begin(),
        std::find_if(metadata.get_names().begin(), metadata.get_names().end(), [](const lw_shared_ptr<cql3::column_specification>& cdef) {
            return cdef->name->name() == eor_column_name;
        })
@@ -993,19 +1411,19 @@ future<executor::request_return_type> executor::get_records(client_state& client
        /**
         * We merge rows with same timestamp into a single event.
         * This is pretty much needed, because a CDC row typically
-         * encodes ~half the info of an alternator write. 
-         * 
+         * encodes ~half the info of an alternator write.
+         *
         * A big, big downside to how alternator records are written
         * (i.e. CQL), is that the distinction between INSERT and UPDATE
-         * is somewhat lost/unmappable to actual eventName. 
+         * is somewhat lost/unmappable to actual eventName.
         * A write (currently) always looks like an insert+modify
-         * regardless whether we wrote existing record or not. 
-         * 
-         * Maybe RMW ops could be done slightly differently so 
+         * regardless whether we wrote existing record or not.
+         *
+         * Maybe RMW ops could be done slightly differently so
         * we can distinguish them here...
-         * 
+         *
         * For now, all writes will become MODIFY.
-         * 
+         *
         * Note: we do not check the current pre/post
         * flags on CDC log, instead we use data to 
         * drive what is returned. This is (afaict)
@@ -1084,9 +1502,15 @@ future<executor::request_return_type> executor::get_records(client_state& client
    }

    // ugh. figure out if we are and end-of-shard
-    auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();

-    db_clock::time_point ts = co_await _sdks.cdc_current_generation_timestamp({ normal_token_owners });
+    db_clock::time_point ts;
+    if (schema->table().uses_tablets()) {
+        ts = co_await _system_keyspace.read_cdc_for_tablets_current_generation_timestamp(base->ks_name(), base->cf_name());
+    } else {
+        auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();
+        ts = co_await _sdks.cdc_current_generation_timestamp({ normal_token_owners });
+    }
+
    auto& shard = iter.shard;

    if (shard.time < ts && ts < high_ts) {
@@ -1122,6 +1546,7 @@ bool executor::add_stream_options(const rjson::value& stream_specification, sche

        cdc::options opts;
        opts.enabled(true);
+        opts.tablet_merge_blocked(true);
        // cdc::delta_mode is ignored by Alternator, so aim for the least overhead.
        opts.set_delta_mode(cdc::delta_mode::keys);
        opts.ttl(std::chrono::duration_cast<std::chrono::seconds>(dynamodb_streams_max_window).count());
@@ -1156,24 +1581,30 @@ void executor::supplement_table_stream_info(rjson::value& descr, const schema& s
    if (opts.enabled()) {
        auto db = sp.data_dictionary();
        auto cf = db.find_table(schema.ks_name(), cdc::log_name(schema.cf_name()));
-        stream_arn arn(cf.schema()->id());
+        stream_arn arn(cf.schema(), cdc::get_base_table(db.real_database(), *cf.schema()));
        rjson::add(descr, "LatestStreamArn", arn);
        rjson::add(descr, "LatestStreamLabel", rjson::from_string(stream_label(*cf.schema())));
-
-        auto stream_desc = rjson::empty_object();
-        rjson::add(stream_desc, "StreamEnabled", true);
-
-        auto mode = stream_view_type::KEYS_ONLY;
-        if (opts.preimage() && opts.postimage()) {
-            mode = stream_view_type::NEW_AND_OLD_IMAGES;
-        } else if (opts.preimage()) {
-            mode = stream_view_type::OLD_IMAGE;
-        } else if (opts.postimage()) {
-            mode = stream_view_type::NEW_IMAGE;
-        }
-        rjson::add(stream_desc, "StreamViewType", mode);
-        rjson::add(descr, "StreamSpecification", std::move(stream_desc));
+    } else if (!opts.enable_requested()) {
+        return;
    }
+    // For both enabled() and enable_requested():
+    // DynamoDB returns StreamEnabled=true in StreamSpecification even when
+    // the stream status is ENABLING (not yet fully active). We mirror this
+    // behavior: enable_requested means the user asked for streams but CDC
+    // is not yet finalized, so we still report StreamEnabled=true.
+    auto stream_desc = rjson::empty_object();
+    rjson::add(stream_desc, "StreamEnabled", true);
+
+    auto mode = stream_view_type::KEYS_ONLY;
+    if (opts.preimage() && opts.postimage()) {
+        mode = stream_view_type::NEW_AND_OLD_IMAGES;
+    } else if (opts.preimage()) {
+        mode = stream_view_type::OLD_IMAGE;
+    } else if (opts.postimage()) {
+        mode = stream_view_type::NEW_IMAGE;
+    }
+    rjson::add(stream_desc, "StreamViewType", mode);
+    rjson::add(descr, "StreamSpecification", std::move(stream_desc));
 }

 } // namespace alternator
--- a/alternator/streams.hh
+++ b/alternator/streams.hh
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2026-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
+ */
+
+#pragma once
+
+#include "utils/chunked_vector.hh"
+#include "cdc/generation.hh"
+#include <generator>
+
+namespace cdc {
+    class stream_id;
+}
+
+namespace alternator {
+    class stream_id_range {
+        // helper class for manipulating (possibly wrapped around) range of stream_ids
+        // it holds one or two ranges [lo1, end1) and [lo2, end2)
+        // if the range doesn't wrap around, then lo2 == end2 == items.end()
+        // if the range wraps around, then
+        // `lo1 == items.begin() and end2 == items.end()` must be true
+        // the object doesn't own `items`, but it does manipulate it - it will
+        // reorder elements (so both ranges were next to each other) and sort them by unsigned comparison
+        // usage - create an object with needed ranges. before iteration call `prepare_for_iterating` method -
+        // it will reorder elements of `items` array to what is needed and then call begin / end pair.
+        // note - `items` array will be modified - elements will be reordered, but no elements will be added or removed.
+        // `items` array must stay intact as long as iteration is in progress.
+        utils::chunked_vector<cdc::stream_id>::iterator _lo1 = {}, _end1 = {}, _lo2 = {}, _end2 = {};
+        const cdc::stream_id* _skip_to = nullptr;
+        bool _prepared = false;
+    public:
+        stream_id_range(
+                utils::chunked_vector<cdc::stream_id> &items,
+                utils::chunked_vector<cdc::stream_id>::iterator lo1,
+                utils::chunked_vector<cdc::stream_id>::iterator end1);
+        stream_id_range(
+                utils::chunked_vector<cdc::stream_id> &items,
+                utils::chunked_vector<cdc::stream_id>::iterator lo1,
+                utils::chunked_vector<cdc::stream_id>::iterator end1,
+                utils::chunked_vector<cdc::stream_id>::iterator lo2,
+                utils::chunked_vector<cdc::stream_id>::iterator end2);
+
+        void set_starting_position(const cdc::stream_id &update_to);
+        // Must be called after construction and after set_starting_position()
+        // (if used), but before begin()/end() iteration.
+        void prepare_for_iterating();
+
+        utils::chunked_vector<cdc::stream_id>::iterator begin() const { return _lo1; }
+        utils::chunked_vector<cdc::stream_id>::iterator end() const { return _end1; }
+    };
+
+    stream_id_range find_children_range_from_parent_token(
+        const utils::chunked_vector<cdc::stream_id>& parent_streams,
+        utils::chunked_vector<cdc::stream_id>& current_streams,
+        cdc::stream_id parent,
+        bool uses_tablets
+    );
+}
--- a/alternator/ttl.cc
+++ b/alternator/ttl.cc
@@ -44,6 +44,7 @@
 #include "cql3/query_options.hh"
 #include "cql3/column_identifier.hh"
 #include "alternator/executor.hh"
+#include "alternator/executor_util.hh"
 #include "alternator/controller.hh"
 #include "alternator/serialization.hh"
 #include "alternator/ttl_tag.hh"
@@ -58,13 +59,17 @@ static logging::logger tlogger("alternator_ttl");

 namespace alternator {

-future<executor::request_return_type> executor::update_time_to_live(client_state& client_state, service_permit permit, rjson::value request) {
+future<executor::request_return_type> executor::update_time_to_live(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
    _stats.api_operations.update_time_to_live++;
    if (!_proxy.features().alternator_ttl) {
        co_return api_error::unknown_operation("UpdateTimeToLive not yet supported. Upgrade all nodes to a version that supports it.");
    }

    schema_ptr schema = get_table(_proxy, request);
+
+    maybe_audit(audit_info, audit::statement_category::DDL,
+                schema->ks_name(), schema->cf_name(), "UpdateTimeToLive", request);
+
    rjson::value* spec = rjson::find(request, "TimeToLiveSpecification");
    if (!spec || !spec->IsObject()) {
        co_return api_error::validation("UpdateTimeToLive missing mandatory TimeToLiveSpecification");
@@ -114,9 +119,13 @@ future<executor::request_return_type> executor::update_time_to_live(client_state
    co_return rjson::print(std::move(response));
 }

-future<executor::request_return_type> executor::describe_time_to_live(client_state& client_state, service_permit permit, rjson::value request) {
+future<executor::request_return_type> executor::describe_time_to_live(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
    _stats.api_operations.describe_time_to_live++;
    schema_ptr schema = get_table(_proxy, request);
+    
+    maybe_audit(audit_info, audit::statement_category::QUERY,
+                schema->ks_name(), schema->cf_name(), "DescribeTimeToLive", request);
+
    std::map<sstring, sstring> tags_map = get_tags_of_table_or_throw(schema);
    rjson::value desc = rjson::empty_object();
    auto i = tags_map.find(TTL_TAG_KEY);
--- a/api/config.cc
+++ b/api/config.cc
@@ -82,15 +82,16 @@ void set_config(std::shared_ptr < api_registry_builder20 > rb, http_context& ctx
        });
    });

-    cs::find_config_id.set(r, [&cfg] (const_req r) {
-        auto id = r.get_path_param("id");
-        for (auto&& cfg_ref : cfg.values()) {
-            auto&& cfg = cfg_ref.get();
-            if (id == cfg.name()) {
-                return cfg.value_as_json();
-            }
+    cs::find_config_id.set(r, [&cfg] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
+        auto id = req->get_path_param("id");
+        auto value = co_await cfg.value_as_json_string_for_name(id);
+        if (!value) {
+            throw bad_param_exception(sstring("No such config entry: ") + id);
        }
-        throw bad_param_exception(sstring("No such config entry: ") + id);
+        //value is already a json string 
+        json::json_return_type ret{json::json_void()};
+        ret._res = std::move(*value);
+        co_return ret;
    });

    sp::get_rpc_timeout.set(r, [&cfg](const_req req)  {
--- a/api/storage_proxy.cc
+++ b/api/storage_proxy.cc
@@ -123,12 +123,13 @@ static future<json::json_return_type>  sum_estimated_histogram(sharded<service::
    });
 }

-static future<json::json_return_type>  sum_estimated_histogram(sharded<service::storage_proxy>& proxy, utils::estimated_histogram service::storage_proxy_stats::stats::*f) {
+static future<json::json_return_type>  sum_estimated_histogram(sharded<service::storage_proxy>& proxy, service::storage_proxy_stats::cas_contention_histogram service::storage_proxy_stats::stats::*f) {

-    return two_dimensional_map_reduce(proxy, f, utils::estimated_histogram_merge,
-            utils::estimated_histogram()).then([](const utils::estimated_histogram& val) {
+    return two_dimensional_map_reduce(proxy, f, utils::estimated_histogram_with_max_merge<service::storage_proxy_stats::cas_contention_histogram::MAX>,
+            service::storage_proxy_stats::cas_contention_histogram()).then([](const service::storage_proxy_stats::cas_contention_histogram& val) {
        utils_json::estimated_histogram res;
-        res = val;
+        res.bucket_offsets = val.get_buckets_offsets();
+        res.buckets = val.get_buckets_counts();
        return make_ready_future<json::json_return_type>(res);
    });
 }
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -1743,11 +1743,11 @@ rest_get_vnode_tablet_migration(http_context& ctx, sharded<service::storage_serv
        throw std::runtime_error("vnodes-to-tablets migration requires all nodes to support the VNODES_TO_TABLETS_MIGRATIONS cluster feature");
    }
    auto keyspace = validate_keyspace(ctx, req);
-    auto status = co_await ss.local().get_tablets_migration_status(keyspace);
+    auto status = co_await ss.local().get_tablets_migration_status_with_node_details(keyspace);

    ss::vnode_tablet_migration_status result;
    result.keyspace = status.keyspace;
-    result.status = status.status;
+    result.status = fmt::format("{}", status.status);
    result.nodes._set = true;
    for (const auto& node : status.nodes) {
        ss::vnode_tablet_migration_node_status n;
--- a/audit/audit.cc
+++ b/audit/audit.cc
@@ -126,6 +126,13 @@ static std::map<sstring, std::set<sstring>> parse_audit_tables(const sstring& da
            }
            boost::trim(parts[0]);
            boost::trim(parts[1]);
+            // The real keyspace name of an Alternator table T is
+            // "alternator_T". The audit_tables config flag uses the format
+            // "alternator.T" to refer to such tables, so we expand it here
+            // to the real keyspace name.
+            if (parts[0] == "alternator") {
+                parts[0] = "alternator_" + parts[1];
+            }
            result[parts[0]].insert(std::move(parts[1]));
        }
    }
@@ -228,27 +235,55 @@ future<> audit::shutdown() {
    return make_ready_future<>();
 }

-future<> audit::log(const audit_info* audit_info, service::query_state& query_state, const cql3::query_options& options, bool error) {
-    const service::client_state& client_state = query_state.get_client_state();
-    socket_address node_ip = _token_metadata.get()->get_topology().my_address().addr();
-    db::consistency_level cl = options.get_consistency();
+future<> audit::log(const audit_info& audit_info, const service::client_state& client_state, std::optional<db::consistency_level> cl, bool error) {
    thread_local static sstring no_username("undefined");
    static const sstring anonymous_username("anonymous");
    const sstring& username = client_state.user() ? client_state.user()->name.value_or(anonymous_username) : no_username;
    socket_address client_ip = client_state.get_client_address().addr();
+    socket_address node_ip = _token_metadata.get()->get_topology().my_address().addr();
    if (logger.is_enabled(logging::log_level::debug)) {
        logger.debug("Log written: node_ip {} category {} cl {} error {} keyspace {} query '{}' client_ip {} table {} username {}",
-            node_ip, audit_info->category_string(), cl, error, audit_info->keyspace(),
-            audit_info->query(), client_ip, audit_info->table(), username);
+            node_ip, audit_info.category_string(), cl, error, audit_info.keyspace(),
+            audit_info.query(), client_ip, audit_info.table(), username);
    }
-    return futurize_invoke(std::mem_fn(&storage_helper::write), _storage_helper_ptr, audit_info, node_ip, client_ip, cl, username, error)
+    return futurize_invoke(std::mem_fn(&storage_helper::write), _storage_helper_ptr, &audit_info, node_ip, client_ip, cl, username, error)
        .handle_exception([audit_info, node_ip, client_ip, cl, username, error] (auto ep) {
            logger.error("Unexpected exception when writing log with: node_ip {} category {} cl {} error {} keyspace {} query '{}' client_ip {} table {} username {} exception {}",
-                node_ip, audit_info->category_string(), cl, error, audit_info->keyspace(),
-                audit_info->query(), client_ip, audit_info->table(),username, ep);
+                node_ip, audit_info.category_string(), cl, error, audit_info.keyspace(),
+                audit_info.query(), client_ip, audit_info.table(), username, ep);
    });
 }

+static future<> maybe_log(const audit_info& audit_info, const service::client_state& client_state, std::optional<db::consistency_level> cl, bool error) {
+    if(audit::audit_instance().local_is_initialized() && audit::local_audit_instance().should_log(audit_info)) {
+        return audit::local_audit_instance().log(audit_info, client_state, cl, error);
+    }
+    return make_ready_future<>();
+}
+
+static future<> inspect(const audit_info& audit_info, const service::query_state& query_state, const cql3::query_options& options, bool error) {
+    return maybe_log(audit_info, query_state.get_client_state(), options.get_consistency(), error);
+}
+
+future<> inspect(shared_ptr<cql3::cql_statement> statement, const service::query_state& query_state, const cql3::query_options& options, bool error) {
+    const auto audit_info = statement->get_audit_info();
+    if (audit_info == nullptr) {
+        return make_ready_future<>();
+    }
+    if (audit_info->batch()) {
+        cql3::statements::batch_statement* batch = static_cast<cql3::statements::batch_statement*>(statement.get());
+        return do_for_each(batch->statements().begin(), batch->statements().end(), [&query_state, &options, error] (auto&& m) {
+            return inspect(m.statement, query_state, options, error);
+        });
+    } else {
+        return inspect(*audit_info, query_state, options, error);
+    }
+}
+
+future<> inspect(const audit_info_alternator& ai, const service::client_state& client_state, bool error) {
+    return maybe_log(static_cast<const audit_info&>(ai), client_state, ai.get_cl(), error);
+}
+
 future<> audit::log_login(const sstring& username, socket_address client_ip, bool error) noexcept {
    socket_address node_ip = _token_metadata.get()->get_topology().my_address().addr();
    if (logger.is_enabled(logging::log_level::debug)) {
@@ -262,24 +297,6 @@ future<> audit::log_login(const sstring& username, socket_address client_ip, boo
    });
 }

-future<> inspect(shared_ptr<cql3::cql_statement> statement, service::query_state& query_state, const cql3::query_options& options, bool error) {
-    auto audit_info = statement->get_audit_info();
-    if (!audit_info) {
-        return make_ready_future<>();
-    }
-    if (audit_info->batch()) {
-        cql3::statements::batch_statement* batch = static_cast<cql3::statements::batch_statement*>(statement.get());
-        return do_for_each(batch->statements().begin(), batch->statements().end(), [&query_state, &options, error] (auto&& m) {
-            return inspect(m.statement, query_state, options, error);
-        });
-    } else {
-        if (audit::local_audit_instance().should_log(audit_info)) {
-            return audit::local_audit_instance().log(audit_info, query_state, options, error);
-        }
-        return make_ready_future<>();
-    }
-}
-
 future<> inspect_login(const sstring& username, socket_address client_ip, bool error) {
    if (!audit::audit_instance().local_is_initialized() || !audit::local_audit_instance().should_log_login()) {
        return make_ready_future<>();
@@ -292,13 +309,21 @@ bool audit::should_log_table(const sstring& keyspace, const sstring& name) const
    return keyspace_it != _audited_tables.cend() && keyspace_it->second.find(name) != keyspace_it->second.cend();
 }

-bool audit::should_log(const audit_info* audit_info) const {
-    return _audited_categories.contains(audit_info->category())
-           && (_audited_keyspaces.find(audit_info->keyspace()) != _audited_keyspaces.cend()
-                         || should_log_table(audit_info->keyspace(), audit_info->table())
-                         || audit_info->category() == statement_category::AUTH
-                         || audit_info->category() == statement_category::ADMIN
-                         || audit_info->category() == statement_category::DCL);
+bool audit::should_log(const audit_info& audit_info) const {
+    return will_log(audit_info.category(), audit_info.keyspace(), audit_info.table());
+}
+
+bool audit::will_log(statement_category cat, std::string_view keyspace, std::string_view table) const {
+    // If keyspace is empty (e.g., ListTables, or batch operations spanning
+    // multiple tables), the operation cannot be filtered by keyspace/table,
+    // so it is logged whenever the category matches.
+    return _audited_categories.contains(cat)
+           && (keyspace.empty()
+                         || _audited_keyspaces.find(sstring(keyspace)) != _audited_keyspaces.cend()
+                         || should_log_table(sstring(keyspace), sstring(table))
+                         || cat == statement_category::AUTH
+                         || cat == statement_category::ADMIN
+                         || cat == statement_category::DCL);
 }

 template<class T>
--- a/audit/audit.hh
+++ b/audit/audit.hh
@@ -10,14 +10,15 @@
 #include "seastarx.hh"
 #include "utils/log.hh"
 #include "utils/observable.hh"
-#include "db/consistency_level.hh"
-#include "locator/token_metadata_fwd.hh"
+#include "service/client_state.hh"
+#include "db/consistency_level_type.hh"
 #include <seastar/core/sharded.hh>
 #include <seastar/util/log.hh>

 #include "enum_set.hh"

 #include <memory>
+#include <optional>

 namespace db {

@@ -70,12 +71,15 @@ using category_set = enum_set<super_enum<statement_category, statement_category:
                                                             statement_category::AUTH,
                                                             statement_category::ADMIN>>;

-class audit_info final {
+// Holds the audit metadata for a single request: the operation category,
+// target keyspace/table, and the query string to be logged.
+class audit_info {
+protected:
    statement_category _category;
    sstring _keyspace;
    sstring _table;
    sstring _query;
-    bool _batch;
+    bool _batch; // used only for unpacking batches in CQL, not relevant for Alternator
 public:
    audit_info(statement_category cat, sstring keyspace, sstring table, bool batch)
        : _category(cat)
@@ -83,8 +87,17 @@ public:
        , _table(std::move(table))
        , _batch(batch)
    { }
-    void set_query_string(const std::string_view& query_string) {
-        _query = sstring(query_string);
+    // 'operation' is for the cases where the query string does not contain it, like with Alternator
+    audit_info& set_query_string(std::string_view query_string, std::string_view operation = {}) {
+        return set_query_string(sstring(query_string), sstring(operation));
+    }
+    audit_info& set_query_string(const sstring& query_string, const sstring& operation = "") {
+        if(!operation.empty()) {
+            _query = operation + "|" + query_string;
+        } else {
+            _query = query_string;
+        }
+        return *this;
    }
    const sstring& keyspace() const { return _keyspace; }
    const sstring& table() const { return _table; }
@@ -96,6 +109,23 @@ public:

 using audit_info_ptr = std::unique_ptr<audit_info>;

+// Audit info for Alternator requests.
+// Unlike CQL, where the consistency level is available from query_options and
+// passed separately to audit::log(), Alternator has no query_options, so we
+// store the CL inside the audit_info object.
+// Consistency level is optional: only data read/write operations (GetItem,
+// PutItem, Query, Scan, etc.) have a meaningful CL. Schema operations and
+// metadata queries pass std::nullopt.
+class audit_info_alternator final : public audit_info {
+    std::optional<db::consistency_level> _cl;
+public:
+    audit_info_alternator(statement_category cat, sstring keyspace, sstring table, std::optional<db::consistency_level> cl = std::nullopt)
+        : audit_info(cat, std::move(keyspace), std::move(table), false), _cl(cl)
+    {}
+
+    std::optional<db::consistency_level> get_cl() const { return _cl; }
+};
+
 class storage_helper;

 class audit final : public seastar::async_sharded_service<audit> {
@@ -142,13 +172,15 @@ public:
    future<> start(const db::config& cfg);
    future<> stop();
    future<> shutdown();
-    bool should_log(const audit_info* audit_info) const;
+    bool should_log(const audit_info& audit_info) const;
+    bool will_log(statement_category cat, std::string_view keyspace = {}, std::string_view table = {}) const;
    bool should_log_login() const { return _audited_categories.contains(statement_category::AUTH); }
-    future<> log(const audit_info* audit_info, service::query_state& query_state, const cql3::query_options& options, bool error);
+    future<> log(const audit_info& audit_info, const service::client_state& client_state, std::optional<db::consistency_level> cl, bool error);
    future<> log_login(const sstring& username, socket_address client_ip, bool error) noexcept;
 };

-future<> inspect(shared_ptr<cql3::cql_statement> statement, service::query_state& query_state, const cql3::query_options& options, bool error);
+future<> inspect(const audit_info_alternator& audit_info, const service::client_state& client_state, bool error);
+future<> inspect(shared_ptr<cql3::cql_statement> statement, const service::query_state& query_state, const cql3::query_options& options, bool error);

 future<> inspect_login(const sstring& username, socket_address client_ip, bool error);

--- a/audit/audit_cf_storage_helper.cc
+++ b/audit/audit_cf_storage_helper.cc
@@ -38,7 +38,8 @@ audit_cf_storage_helper::audit_cf_storage_helper(cql3::query_processor& qp, serv
                       "source inet, "
                       "username text, "
                       "error boolean, "
-                       "PRIMARY KEY ((date, node), event_time))",
+                       "PRIMARY KEY ((date, node), event_time))"
+                       " WITH caching = {{'keys': 'NONE', 'rows_per_partition': 'NONE', 'enabled': 'false'}}",
                       KEYSPACE_NAME, TABLE_NAME),
             fmt::format("INSERT INTO {}.{} ("
                       "date,"
@@ -129,7 +130,7 @@ future<> audit_cf_storage_helper::stop() {
 future<> audit_cf_storage_helper::write(const audit_info* audit_info,
                                    socket_address node_ip,
                                    socket_address client_ip,
-                                    db::consistency_level cl,
+                                    std::optional<db::consistency_level> cl,
                                    const sstring& username,
                                    bool error) {
    return _table.insert(_qp, _mm, _dummy_query_state, make_data, audit_info, node_ip, client_ip, cl, username, error);
@@ -145,7 +146,7 @@ future<> audit_cf_storage_helper::write_login(const sstring& username,
 cql3::query_options audit_cf_storage_helper::make_data(const audit_info* audit_info,
                                                       socket_address node_ip,
                                                       socket_address client_ip,
-                                                       db::consistency_level cl,
+                                                       std::optional<db::consistency_level> cl,
                                                       const sstring& username,
                                                       bool error) {
    auto time = std::chrono::system_clock::now();
@@ -154,7 +155,7 @@ cql3::query_options audit_cf_storage_helper::make_data(const audit_info* audit_i
    auto date = millis_since_epoch / ticks_per_day * ticks_per_day;
    thread_local static int64_t last_nanos = 0;
    auto time_id = utils::UUID_gen::get_time_UUID(table_helper::make_monotonic_UUID_tp(last_nanos, time));
-    auto consistency_level = fmt::format("{}", cl);
+    auto consistency_level = cl ? format("{}", *cl) : sstring("");
    std::vector<cql3::raw_value> values {
        cql3::raw_value::make_value(timestamp_type->decompose(date)),
        cql3::raw_value::make_value(inet_addr_type->decompose(node_ip.addr())),
--- a/audit/audit_cf_storage_helper.hh
+++ b/audit/audit_cf_storage_helper.hh
@@ -37,7 +37,7 @@ class audit_cf_storage_helper : public storage_helper {
    static cql3::query_options make_data(const audit_info* audit_info,
                                         socket_address node_ip,
                                         socket_address client_ip,
-                                         db::consistency_level cl,
+                                         std::optional<db::consistency_level> cl,
                                         const sstring& username,
                                         bool error);
    static cql3::query_options make_login_data(socket_address node_ip,
@@ -55,7 +55,7 @@ public:
    virtual future<> write(const audit_info* audit_info,
                           socket_address node_ip,
                           socket_address client_ip,
-                           db::consistency_level cl,
+                           std::optional<db::consistency_level> cl,
                           const sstring& username,
                           bool error) override;
    virtual future<> write_login(const sstring& username,
--- a/audit/audit_composite_storage_helper.cc
+++ b/audit/audit_composite_storage_helper.cc
@@ -42,7 +42,7 @@ future<> audit_composite_storage_helper::stop() {
 future<> audit_composite_storage_helper::write(const audit_info* audit_info,
                                               socket_address node_ip,
                                               socket_address client_ip,
-                                               db::consistency_level cl,
+                                               std::optional<db::consistency_level> cl,
                                               const sstring& username,
                                               bool error) {
    return seastar::parallel_for_each(
--- a/audit/audit_composite_storage_helper.hh
+++ b/audit/audit_composite_storage_helper.hh
@@ -25,7 +25,7 @@ public:
    virtual future<> write(const audit_info* audit_info,
                           socket_address node_ip,
                           socket_address client_ip,
-                           db::consistency_level cl,
+                           std::optional<db::consistency_level> cl,
                           const sstring& username,
                           bool error) override;
    virtual future<> write_login(const sstring& username,
--- a/audit/audit_syslog_storage_helper.cc
+++ b/audit/audit_syslog_storage_helper.cc
@@ -101,18 +101,19 @@ future<> audit_syslog_storage_helper::stop() {
 future<> audit_syslog_storage_helper::write(const audit_info* audit_info,
                                            socket_address node_ip,
                                            socket_address client_ip,
-                                            db::consistency_level cl,
+                                            std::optional<db::consistency_level> cl,
                                            const sstring& username,
                                            bool error) {
    auto now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
    tm time;
    localtime_r(&now, &time);
+    auto cl_str = cl ? format("{}", *cl) : sstring("");
    sstring msg = seastar::format(R"(<{}>{:%h %e %T} scylla-audit: node="{}", category="{}", cl="{}", error="{}", keyspace="{}", query="{}", client_ip="{}", table="{}", username="{}")",
                                    LOG_NOTICE | LOG_USER,
                                    time,
                                    node_ip,
                                    audit_info->category_string(),
-                                    cl,
+                                    cl_str,
                                    (error ? "true" : "false"),
                                    audit_info->keyspace(),
                                    json_escape(audit_info->query()),
--- a/audit/audit_syslog_storage_helper.hh
+++ b/audit/audit_syslog_storage_helper.hh
@@ -35,7 +35,7 @@ public:
    virtual future<> write(const audit_info* audit_info,
                           socket_address node_ip,
                           socket_address client_ip,
-                           db::consistency_level cl,
+                           std::optional<db::consistency_level> cl,
                           const sstring& username,
                           bool error) override;
    virtual future<> write_login(const sstring& username,
--- a/audit/storage_helper.hh
+++ b/audit/storage_helper.hh
@@ -22,7 +22,7 @@ public:
    virtual future<> write(const audit_info* audit_info,
                           socket_address node_ip,
                           socket_address client_ip,
-                           db::consistency_level cl,
+                           std::optional<db::consistency_level> cl,
                           const sstring& username,
                           bool error) = 0;
    virtual future<> write_login(const sstring& username,
--- a/auth/ldap_role_manager.cc
+++ b/auth/ldap_role_manager.cc
@@ -31,6 +31,8 @@ namespace {

 logger mylog{"ldap_role_manager"}; // `log` is taken by math.

+constexpr std::string_view user_placeholder = "{USER}";
+
 struct url_desc_deleter {
    void operator()(LDAPURLDesc *p) {
        ldap_free_urldesc(p);
@@ -39,9 +41,141 @@ struct url_desc_deleter {

 using url_desc_ptr = std::unique_ptr<LDAPURLDesc, url_desc_deleter>;

-url_desc_ptr parse_url(std::string_view url) {
+/// Escapes LDAP filter assertion value per RFC 4515 Section 3.
+/// The characters *, (, ), \, and NUL must be backslash-hex-escaped
+/// to prevent filter injection when interpolating untrusted input.
+sstring escape_filter_value(std::string_view value) {
+    size_t escapable_chars = 0;
+    for (unsigned char ch : value) {
+        switch (ch) {
+        case '*':
+        case '(':
+        case ')':
+        case '\\':
+        case '\0':
+            ++escapable_chars;
+            break;
+        default:
+            break;
+        }
+    }
+
+    if (escapable_chars == 0) {
+        return sstring(value);
+    }
+
+    sstring escaped(value.size() + escapable_chars * 2, 0);
+    size_t pos = 0;
+    for (unsigned char ch : value) {
+        switch (ch) {
+        case '*':
+            escaped[pos++] = '\\';
+            escaped[pos++] = '2';
+            escaped[pos++] = 'a';
+            break;
+        case '(':
+            escaped[pos++] = '\\';
+            escaped[pos++] = '2';
+            escaped[pos++] = '8';
+            break;
+        case ')':
+            escaped[pos++] = '\\';
+            escaped[pos++] = '2';
+            escaped[pos++] = '9';
+            break;
+        case '\\':
+            escaped[pos++] = '\\';
+            escaped[pos++] = '5';
+            escaped[pos++] = 'c';
+            break;
+        case '\0':
+            escaped[pos++] = '\\';
+            escaped[pos++] = '0';
+            escaped[pos++] = '0';
+            break;
+        default:
+            escaped[pos++] = static_cast<char>(ch);
+            break;
+        }
+    }
+
+    return escaped;
+}
+
+/// Percent-encodes characters that are not RFC 3986 "unreserved"
+/// (ALPHA / DIGIT / '-' / '.' / '_' / '~').
+///
+/// Uses explicit ASCII range checks instead of std::isalnum() because
+/// the latter is locale-dependent and could pass non-ASCII characters
+/// through unencoded under certain locale settings.
+///
+/// This is applied AFTER RFC 4515 filter escaping when the value is
+/// substituted into an LDAP URL.  It serves two purposes:
+///  1. Prevents URL-level metacharacters ('?', '#') from breaking
+///     the URL structure parsed by ldap_url_parse.
+///  2. Prevents percent-decoding (which ldap_url_parse performs on
+///     each component) from undoing the filter escaping, e.g. a
+///     literal "%2a" in the username would otherwise decode to '*'.
+sstring percent_encode_for_url(std::string_view value) {
+    static constexpr char hex[] = "0123456789ABCDEF";
+
+    size_t chars_to_encode = 0;
+    for (unsigned char ch : value) {
+        if (!((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')
+                || ch == '-' || ch == '.' || ch == '_' || ch == '~')) {
+            ++chars_to_encode;
+        }
+    }
+
+    if (chars_to_encode == 0) {
+        return sstring(value);
+    }
+
+    sstring encoded(value.size() + chars_to_encode * 2, 0);
+    size_t pos = 0;
+    for (unsigned char ch : value) {
+        if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')
+                || ch == '-' || ch == '.' || ch == '_' || ch == '~') {
+            encoded[pos++] = static_cast<char>(ch);
+        } else {
+            encoded[pos++] = '%';
+            encoded[pos++] = hex[ch >> 4];
+            encoded[pos++] = hex[ch & 0x0F];
+        }
+    }
+
+    return encoded;
+}
+
+/// Checks whether \p sentinel appears in any parsed URL component
+/// other than the filter (host, DN, attributes, extensions).
+bool sentinel_outside_filter(const LDAPURLDesc& desc, std::string_view sentinel) {
+    auto contains = [&](const char* field) {
+        return field && std::string_view(field).find(sentinel) != std::string_view::npos;
+    };
+    if (contains(desc.lud_host) || contains(desc.lud_dn)) {
+        return true;
+    }
+    if (desc.lud_attrs) {
+        for (int i = 0; desc.lud_attrs[i]; ++i) {
+            if (contains(desc.lud_attrs[i])) {
+                return true;
+            }
+        }
+    }
+    if (desc.lud_exts) {
+        for (int i = 0; desc.lud_exts[i]; ++i) {
+            if (contains(desc.lud_exts[i])) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+url_desc_ptr parse_url(const sstring& url) {
    LDAPURLDesc *desc = nullptr;
-    if (ldap_url_parse(url.data(), &desc)) {
+    if (ldap_url_parse(url.c_str(), &desc)) {
        mylog.error("error in ldap_url_parse({})", url);
    }
    return url_desc_ptr(desc);
@@ -112,6 +246,7 @@ const resource_set& ldap_role_manager::protected_resources() const {
 }

 future<> ldap_role_manager::start() {
+    validate_query_template();
    if (!parse_url(get_url("dummy-user"))) { // Just need host and port -- any user should do.
        return make_exception_future(
                std::runtime_error(fmt::format("error getting LDAP server address from template {}", _query_template)));
@@ -216,7 +351,7 @@ future<> ldap_role_manager::revoke(std::string_view, std::string_view, ::service
 }

 future<role_set> ldap_role_manager::query_granted(std::string_view grantee_name, recursive_role_query) {
-    const auto url = get_url(grantee_name.data());
+    const auto url = get_url(grantee_name);
    auto desc = parse_url(url);
    if (!desc) {
        return make_exception_future<role_set>(std::runtime_error(format("Error parsing URL {}", url)));
@@ -348,7 +483,46 @@ future<> ldap_role_manager::remove_attribute(std::string_view role_name, std::st
 }

 sstring ldap_role_manager::get_url(std::string_view user) const {
-    return boost::replace_all_copy(_query_template, "{USER}", user);
+    // Two-layer encoding protects against injection:
+    // 1. RFC 4515 filter escaping neutralizes filter metacharacters (*, (, ), \, NUL)
+    // 2. URL percent-encoding prevents URL structure injection (?, #) and blocks
+    //    ldap_url_parse's percent-decoding from undoing the filter escaping (%2a -> *)
+    return boost::replace_all_copy(_query_template, user_placeholder,
+            percent_encode_for_url(escape_filter_value(user)));
+}
+
+void ldap_role_manager::validate_query_template() const {
+    if (_query_template.find(user_placeholder) == sstring::npos) {
+        return;
+    }
+
+    // Substitute {USER} with a sentinel and let ldap_url_parse tell us
+    // which URL component it landed in.  The sentinel is purely
+    // alphanumeric so it cannot affect URL parsing.
+    static constexpr std::string_view sentinel = "XLDAPSENTINELX";
+    sstring test_url = boost::replace_all_copy(_query_template, user_placeholder, sentinel);
+    auto desc = parse_url(test_url);
+    if (!desc) {
+        throw url_error(format("LDAP URL template is not a valid URL when {{USER}} is substituted: {}", _query_template));
+    }
+
+    // The sentinel must appear in the filter ...
+    if (!desc->lud_filter
+            || std::string_view(desc->lud_filter).find(sentinel) == std::string_view::npos) {
+        throw url_error(format(
+                "LDAP URL template places {{USER}} outside the filter component. "
+                "RFC 4515 filter escaping only protects the filter; other components "
+                "(e.g. the base DN) require different escaping and are not supported. "
+                "Template: {}", _query_template));
+    }
+    // ... and nowhere else (host, DN, attributes, extensions).
+    if (sentinel_outside_filter(*desc, sentinel)) {
+        throw url_error(format(
+                "LDAP URL template places {{USER}} outside the filter component. "
+                "RFC 4515 filter escaping only protects the filter; other components "
+                "(e.g. the host) require different escaping and are not supported. "
+                "Template: {}", _query_template));
+    }
 }

 future<std::vector<cql3::description>> ldap_role_manager::describe_role_grants() {
--- a/auth/ldap_role_manager.hh
+++ b/auth/ldap_role_manager.hh
@@ -115,6 +115,9 @@ class ldap_role_manager : public role_manager {
    /// Macro-expands _query_template, returning the result.
    sstring get_url(std::string_view user) const;

+    /// Validates that {USER}, if present, is used only in the LDAP filter component.
+    void validate_query_template() const;
+
    /// Used to auto-create roles returned by ldap.
    future<> create_role(std::string_view role_name);

--- a/cdc/cdc_options.hh
+++ b/cdc/cdc_options.hh
@@ -35,6 +35,15 @@ enum class image_mode : uint8_t {

 class options final {
    std::optional<bool> _enabled;
+    bool _enable_requested = false;
+    // When CDC is employed for the purpose of Alternator Streams and tablets are used,
+    // tablet merges need to be blocked due to limitations of DynamoDB Streams API.
+    // DynamoDB Streams allows to specify a single parent for a stream.
+    // In ScyllaDB, there is a one-to-one association between streams and tablets,
+    // so merging tablets means also merging streams. A merged stream has two parents and both
+    // need to be done reading from before reading from the newly merged tablet. This is impossible
+    // to be conveyed with DynamoDB Streams API and the result can be reordering of events in Streams.
+    bool _tablet_merge_blocked = false;
    image_mode _preimage = image_mode::off;
    bool _postimage = false;
    delta_mode _delta_mode = delta_mode::full;
@@ -48,6 +57,8 @@ public:

    bool enabled() const { return _enabled.value_or(false); }
    bool is_enabled_set() const { return _enabled.has_value(); }
+    bool enable_requested() const { return _enable_requested; }
+    bool tablet_merge_blocked() const { return _tablet_merge_blocked; }
    bool preimage() const { return _preimage != image_mode::off; }
    bool full_preimage() const { return _preimage == image_mode::full; }
    bool postimage() const { return _postimage; }
@@ -56,6 +67,17 @@ public:
    int ttl() const { return _ttl; }

    void enabled(bool b) { _enabled = b; }
+    // For the cases when enabling cannot be immediately enforced, like with Alternator Streams
+    // which is incompatible with tablet merges, we need to be able to defer actual enablement
+    // until any in-progress tablet merges complete. We expect that finalization happens
+    // promptly: on_update_column_family callback in topology_coordinator.cc wakes up
+    // the topology coordinator to run maybe_finalize_pending_stream_enables shortly
+    // after the DDL. However, there is SCYLLADB-1304
+    void enable_requested(bool b = true) { _enable_requested = b; }
+    // Persistent flag checked by the tablet allocator to suppress new merge
+    // decisions. Always set when Alternator Streams are enabled; inert on
+    // vnode tables.
+    void tablet_merge_blocked(bool b = true) { _tablet_merge_blocked = b; }
    void preimage(bool b) { preimage(b ? image_mode::on : image_mode::off); }
    void preimage(image_mode m) { _preimage = m; }
    void postimage(bool b) { _postimage = b; }
--- a/cdc/generation.cc
+++ b/cdc/generation.cc
@@ -16,8 +16,11 @@
 #include "keys/keys.hh"
 #include "replica/database.hh"
 #include "db/system_keyspace.hh"
+#include "db/schema_tables.hh"
 #include "dht/token-sharding.hh"
 #include "locator/token_metadata.hh"
+#include "locator/tablets.hh"
+#include "schema/schema_builder.hh"
 #include "types/set.hh"
 #include "utils/assert.hh"
 #include "utils/error_injection.hh"
@@ -29,6 +32,7 @@
 #include "cdc/cdc_options.hh"
 #include "cdc/generation_service.hh"
 #include "cdc/log.hh"
+#include "service/migration_listener.hh"

 extern logging::logger cdc_log;

@@ -776,4 +780,59 @@ future<> generation_service::garbage_collect_cdc_streams(utils::chunked_vector<c
    }
 }

+future<utils::chunked_vector<canonical_mutation>> generation_service::maybe_finalize_pending_stream_enables(const locator::token_metadata& tm, api::timestamp_type ts) {
+    utils::chunked_vector<canonical_mutation> muts;
+
+    if (utils::get_local_injector().enter("delay_cdc_stream_finalization")) {
+        co_return std::move(muts);
+    }
+
+    co_await _db.get_tables_metadata().for_each_table_gently([&] (table_id id, lw_shared_ptr<replica::table> t) -> future<> {
+        auto s = t->schema();
+        if (!s->cdc_options().enable_requested()) {
+            co_return;
+        }
+
+        // Only tablet tables can have enable_requested set
+        if (!tm.tablets().has_tablet_map(id)) {
+            co_return;
+        }
+
+        auto& tmap = tm.tablets().get_tablet_map(id);
+        if (tmap.needs_merge()) {
+            cdc_log.debug("Table {}.{}: deferring stream enablement, tablet merge still in progress", s->ks_name(), s->cf_name());
+            co_return;
+        }
+
+        cdc_log.info("Table {}.{}: finalizing deferred stream enablement (no in-progress merges)", s->ks_name(), s->cf_name());
+
+        // Build a new schema with enabled=true, enable_requested=false
+        schema_builder builder(s);
+        cdc::options new_opts = s->cdc_options();
+        new_opts.enabled(true);
+        new_opts.enable_requested(false);
+        new_opts.tablet_merge_blocked(true);
+        builder.with_cdc_options(new_opts);
+        auto new_schema = builder.build();
+
+        // Generate the schema mutation (table metadata update only, no columns/indices changed)
+        utils::chunked_vector<mutation> schema_muts;
+        db::schema_tables::add_table_or_view_to_schema_mutation(new_schema, ts, false, schema_muts);
+
+        // Trigger the CDC migration listener hook which creates the CDC log table.
+        // This runs on_before_update_column_family listeners (including CDC's own
+        // listener that creates/updates the log table schema).
+        co_await seastar::async([&] {
+            _db.get_notifier().before_update_column_family(*new_schema, *s, schema_muts, ts);
+        });
+
+        for (auto& m : schema_muts) {
+            muts.emplace_back(canonical_mutation(m));
+            co_await coroutine::maybe_yield();
+        }
+    });
+
+    co_return std::move(muts);
+}
+
 } // namespace cdc
--- a/cdc/generation_service.hh
+++ b/cdc/generation_service.hh
@@ -18,6 +18,7 @@ class system_keyspace;

 namespace locator {
 class tablet_map;
+class token_metadata;
 }

 namespace cdc {
@@ -64,6 +65,12 @@ public:

    future<> generate_tablet_resize_update(utils::chunked_vector<canonical_mutation>& muts, table_id table, const locator::tablet_map& new_tablet_map, api::timestamp_type ts);

+    // Check for tables with enable_requested CDC option and finalize their
+    // stream enablement if no in-progress tablet merges remain.
+    // Returns schema mutations that transition enable_requested -> enabled,
+    // including CDC log table creation side effects.
+    future<utils::chunked_vector<canonical_mutation>> maybe_finalize_pending_stream_enables(const locator::token_metadata& tm, api::timestamp_type ts);
+
    future<utils::chunked_vector<mutation>> garbage_collect_cdc_streams_for_table(table_id table, std::optional<std::chrono::seconds> ttl, api::timestamp_type ts);
    future<> garbage_collect_cdc_streams(utils::chunked_vector<canonical_mutation>& muts, api::timestamp_type ts);

--- a/cdc/log.cc
+++ b/cdc/log.cc
@@ -8,7 +8,7 @@

 #include <utility>
 #include <algorithm>
-
+#include <unordered_set>
 #include <boost/range/irange.hpp>
 #include <seastar/core/thread.hh>
 #include <seastar/core/metrics.hh>
@@ -47,6 +47,7 @@
 #include "tracing/trace_state.hh"
 #include "stats.hh"
 #include "utils/labels.hh"
+#include "alternator/executor.hh"

 namespace std {

@@ -195,7 +196,7 @@ public:
        for (auto sp : cfms) {
            const auto& schema = *sp;

-            if (!schema.cdc_options().enabled()) {
+            if (!cdc_enabled(schema)) {
                continue;
            }

@@ -464,6 +465,18 @@ cdc::options::options(const std::map<sstring, sstring>& map) {
            if (_ttl < 0) {
                throw exceptions::configuration_exception("Invalid CDC option: ttl must be >= 0");
            }
+        } else if (key == "enable_requested") {
+            if (is_true || is_false) {
+                _enable_requested = is_true;
+            } else {
+                throw exceptions::configuration_exception("Invalid value for CDC option \"enable_requested\": " + p.second);
+            }
+        } else if (key == "tablet_merge_blocked") {
+            if (is_true || is_false) {
+                _tablet_merge_blocked = is_true;
+            } else {
+                throw exceptions::configuration_exception("Invalid value for CDC option \"tablet_merge_blocked\": " + p.second);
+            }
        } else {
            throw exceptions::configuration_exception("Invalid CDC option: " + p.first);
        }
@@ -471,7 +484,7 @@ cdc::options::options(const std::map<sstring, sstring>& map) {
 }

 std::map<sstring, sstring> cdc::options::to_map() const {
-    if (!is_enabled_set()) {
+    if (!is_enabled_set() && !_enable_requested) {
        return {};
    }

@@ -481,6 +494,8 @@ std::map<sstring, sstring> cdc::options::to_map() const {
        { "postimage", _postimage ? "true" : "false" },
        { "delta", fmt::format("{}", _delta_mode) },
        { "ttl", std::to_string(_ttl) },
+        { "enable_requested", enable_requested() ? "true" : "false" },
+        { "tablet_merge_blocked", _tablet_merge_blocked ? "true" : "false" },
    };
 }

@@ -489,7 +504,9 @@ sstring cdc::options::to_sstring() const {
 }

 bool cdc::options::operator==(const options& o) const {
-    return enabled() == o.enabled() && _preimage == o._preimage && _postimage == o._postimage && _ttl == o._ttl
+    return enabled() == o.enabled() && enable_requested() == o.enable_requested()
+            && _tablet_merge_blocked == o._tablet_merge_blocked
+            && _preimage == o._preimage && _postimage == o._postimage && _ttl == o._ttl
            && _delta_mode == o._delta_mode;
 }

@@ -1068,6 +1085,14 @@ public:
        return create_ck(_batch_no - 1);
    }

+    api::timestamp_type get_timestamp() const {
+        return _ts;
+    }
+
+    ttl_opt get_ttl() const {
+        return _ttl;
+    }
+
    // A common pattern is to allocate a row and then immediately set its `cdc$operation` column.
    clustering_key allocate_new_log_row(operation op) {
        auto log_ck = allocate_new_log_row();
@@ -1209,15 +1234,25 @@ struct process_row_visitor {
    row_states_map& _clustering_row_states;

    const bool _generate_delta_values = true; 
+    
+    // true if we are processing changes that were produced by Alternator
+    const bool _alternator;

+    // will be set to true, if any kind of change in row will be detected. Used only, when processing Alternator's changes.
+    bool _alternator_any_value_changed = false;
+
+    // will be set to true, if Alternator's collection column (:attrs) will be modified only by removing elements
+    // Used only, when processing Alternator's changes.
+    bool _alternator_only_deletes = false;
+    
    process_row_visitor(
            const clustering_key& log_ck, stats::part_type_set& touched_parts, log_mutation_builder& builder,
            bool enable_updating_state, const clustering_key* base_ck, cell_map* row_state,
-            row_states_map& clustering_row_states, bool generate_delta_values)
+            row_states_map& clustering_row_states, bool generate_delta_values, bool alternator = false)
        : _log_ck(log_ck), _touched_parts(touched_parts), _builder(builder),
          _enable_updating_state(enable_updating_state), _base_ck(base_ck), _row_state(row_state),
          _clustering_row_states(clustering_row_states),
-          _generate_delta_values(generate_delta_values)
+          _generate_delta_values(generate_delta_values), _alternator(alternator)
    {}

    void update_row_state(const column_definition& cdef, managed_bytes_opt value) {
@@ -1227,7 +1262,17 @@ struct process_row_visitor {
            auto [it, _] = _clustering_row_states.try_emplace(*_base_ck);
            _row_state = &it->second;
        }
-        (*_row_state)[&cdef] = std::move(value);
+        auto [ it, inserted ] = _row_state->insert({ &cdef, std::nullopt });
+
+        // we ignore `_alternator_any_value_changed` for non-alternator changes.
+        // we don't filter if `_enable_updating_state` is false, as on top of needing pre image
+        // we also need cdc to build post image for us
+        // we add check for `_alternator` here for performance reasons - no point in byte compare objects
+        // if the return value will be ignored
+        if (_alternator && _enable_updating_state) {
+            _alternator_any_value_changed = _alternator_any_value_changed || it->second != value;
+        }
+        it->second = std::move(value);
    }

    void live_atomic_cell(const column_definition& cdef, const atomic_cell_view& cell) {
@@ -1377,6 +1422,8 @@ struct process_row_visitor {
        auto&& deleted_keys = std::get<1>(result);
        auto&& added_cells = std::get<2>(result);

+        _alternator_only_deletes = cdef.name_as_text() == alternator::executor::ATTRS_COLUMN_NAME && !deleted_keys.empty() && !added_cells.has_value();
+
        // FIXME: we're doing redundant work: first we serialize the set of deleted keys into a blob,
        // then we deserialize again when merging images below
        managed_bytes_opt deleted_elements = std::nullopt;
@@ -1434,12 +1481,31 @@ struct process_change_visitor {
    const bool _enable_updating_state = false;

    row_states_map& _clustering_row_states;
+
+    // clustering keys' as bytes of rows that should be ignored, when writing cdc log changes
+    // filtering will be done in `clean_up_noop_rows` function. Used only, when processing Alternator's changes.
+    // Since Alternator clustering key is always at most single column, we store unpacked clustering key.
+    // If Alternator table is without clustering key, that means partition has at most one row, any value present
+    // in _alternator_clustering_keys_to_ignore will make us ignore that single row -
+    // we will use an empty bytes object.
+    std::unordered_set<bytes>& _alternator_clustering_keys_to_ignore;
+
    cell_map& _static_row_state;

+    const bool _alternator_schema_has_no_clustering_key = false;
+
    const bool _is_update = false;

    const bool _generate_delta_values = true;

+    // only called, when processing Alternator's change
+    void alternator_add_ckey_to_rows_to_ignore(const clustering_key& ckey) {
+        throwing_assert(_request_options.alternator);
+        auto res = ckey.explode();
+        auto ckey_exploded = !res.empty() ? res[0] : bytes{};
+        _alternator_clustering_keys_to_ignore.insert(ckey_exploded);
+    }
+
    void static_row_cells(auto&& visit_row_cells) {
        _touched_parts.set<stats::part_type::STATIC_ROW>();

@@ -1471,16 +1537,29 @@ struct process_change_visitor {
            }
        };

+        auto row_state = get_row_state(_clustering_row_states, ckey);
        clustering_row_cells_visitor v(
                log_ck, _touched_parts, _builder,
-                _enable_updating_state, &ckey, get_row_state(_clustering_row_states, ckey),
-                _clustering_row_states, _generate_delta_values);
+                _enable_updating_state, &ckey, row_state,
+                _clustering_row_states, _generate_delta_values, _request_options.alternator);
        if (_is_update && _request_options.alternator) {
-            v._marker_op = operation::update;
+            v._marker_op = row_state ? operation::update : operation::insert;
        }
        visit_row_cells(v);

        if (_enable_updating_state) {
+            if (_request_options.alternator && !v._alternator_any_value_changed) {
+                // we need additional checks here:
+                // - without `row_state != nullptr` inserting new key without additional fields (so only partition / clustering key) would be
+                //   treated as no-change, because without additional fields given by the user `v` visitor won't visit any cells
+                //   and _alternator_any_value_changed will be false (thus item will be skipped),
+                // - without `row_state == nullptr && v._alternator_only_deletes` check we won't properly ignore
+                //   column deletes for existing items, but without the column we want to delete -
+                //   item exists (so row_state != nullptr), but we delete non-existing column, so no-op
+                if (row_state != nullptr || (row_state == nullptr && v._alternator_only_deletes)) {
+                    alternator_add_ckey_to_rows_to_ignore(ckey);
+                }
+            }
            // #7716: if there are no regular columns, our visitor would not have visited any cells,
            // hence it would not have created a row_state for this row. In effect, postimage wouldn't be produced.
            // Ensure that the row state exists.
@@ -1497,8 +1576,12 @@ struct process_change_visitor {
        auto log_ck = _builder.allocate_new_log_row(_row_delete_op);
        _builder.set_clustering_columns(log_ck, ckey);

-        if (_enable_updating_state && get_row_state(_clustering_row_states, ckey)) {
-            _clustering_row_states.erase(ckey);
+        if (_enable_updating_state) {
+            if (get_row_state(_clustering_row_states, ckey)) {
+                _clustering_row_states.erase(ckey);
+            } else if (_request_options.alternator) {
+                alternator_add_ckey_to_rows_to_ignore(ckey);
+            }
        }
    }

@@ -1540,6 +1623,22 @@ struct process_change_visitor {
        _touched_parts.set<stats::part_type::PARTITION_DELETE>();
        auto log_ck = _builder.allocate_new_log_row(_partition_delete_op);
        if (_enable_updating_state) {
+            if (_request_options.alternator && _alternator_schema_has_no_clustering_key && _clustering_row_states.empty()) {
+                // Alternator's table can be with or without clustering key. If the clustering key exists,
+                // delete request will be `clustered_row_delete` and will be hanlded there.
+                // If the clustering key doesn't exist, delete request will be `partition_delete` and will be handled here.
+                // The no-clustering-key case is slightly tricky, because insert of such item is handled by `clustered_row_cells`
+                // and has some value as clustering_key (the value currently seems to be empty bytes object).
+                // We don't want to rely on knowing the value exactly, instead we rely on the fact that
+                // there will be at most one item in a partition. So if `_clustering_row_states` is empty,
+                // we know the delete is for a non-existing item and we should ignore it.
+                // If `_clustering_row_states` is not empty, then we know the delete is for an existing item
+                // we should log it and clear `_clustering_row_states`.
+                // The same logic applies to `alternator_add_ckey_to_rows_to_ignore` call in `clustered_row_delete`
+                // we need to insert "anything" for no-clustering-key case, so further logic will check
+                // if map is empty or not and will know if it should ignore the single partition item and keep it.
+                alternator_add_ckey_to_rows_to_ignore({});
+            }
            _clustering_row_states.clear();
        }
    }
@@ -1647,6 +1746,47 @@ private:

    stats::part_type_set _touched_parts;

+    std::unordered_set<bytes> _alternator_clustering_keys_to_ignore;
+    const column_definition* _alternator_clustering_key_column = nullptr;
+
+    // the function will process mutations and remove rows that are in _alternator_clustering_keys_to_ignore
+    // we need to take care and reindex clustering keys (cdc$batch_seq_no)
+    // this is used for Alternator's changes only
+    // NOTE: `_alternator_clustering_keys_to_ignore` must be not empty.
+    mutation clean_up_noop_rows(mutation mut) {
+        throwing_assert(!_alternator_clustering_keys_to_ignore.empty());
+        auto after_mut = mutation(_log_schema, mut.key());
+        if (!_alternator_clustering_key_column) {
+            // no clustering key - only single row per partition
+            // since _alternator_clustering_keys_to_ignore is not empty we need to drop that single row
+            // so we just return empty mutation instead
+            return after_mut;
+        }
+        int batch_seq = 0;
+        for (rows_entry &row : mut.partition().mutable_non_dummy_rows()) {
+            auto cell = row.row().cells().find_cell(_alternator_clustering_key_column->id);
+            if (cell) {
+                auto val = cell->as_atomic_cell(*_alternator_clustering_key_column).value().linearize();
+
+                if (_alternator_clustering_keys_to_ignore.contains(val)) {
+                    continue;
+                }
+            }
+            auto new_key = _builder->create_ck(batch_seq++);
+            after_mut.partition().clustered_row(*_log_schema, std::move(new_key)) = std::move(row.row());
+        }
+
+        if (batch_seq > 0) {
+            // update end_of_batch marker
+            // we don't need to clear previous one, as we only removed rows
+            // we need to set it on the last row, because original last row might have been deleted
+            // batch_seq == 0 -> no rows, after_mut is empty, all entries were dropped and there's nothing to write to cdc log
+            auto last_key = _builder->create_ck(batch_seq - 1);
+            after_mut.set_cell(last_key, log_meta_column_name_bytes("end_of_batch"), data_value(true), _builder->get_timestamp(), _builder->get_ttl());
+        }
+
+        return after_mut;
+    }
 public:
    transformer(db_context ctx, schema_ptr s, dht::decorated_key dk, const per_request_options& options)
        : _ctx(ctx)
@@ -1656,7 +1796,20 @@ public:
        , _options(options)
        , _clustering_row_states(0, clustering_key::hashing(*_schema), clustering_key::equality(*_schema))
        , _uses_tablets(ctx._proxy.get_db().local().find_keyspace(_schema->ks_name()).uses_tablets())
+        , _alternator_clustering_keys_to_ignore()
    {
+        if (_options.alternator) {
+            auto cks = _schema->clustering_key_columns();
+            const column_definition *ck_def = nullptr;
+            if (!cks.empty()) {
+                auto it = _log_schema->columns_by_name().find(cks.front().name());
+                if (it == _log_schema->columns_by_name().end()) {
+                    on_internal_error(cdc_log, fmt::format("failed to find clustering key `{}` in cdc log table `{}`", cks.front().name(), _log_schema->id()));
+                }
+                ck_def = it->second;
+            }
+            _alternator_clustering_key_column = ck_def;
+        }
    }

    // DON'T move the transformer after this
@@ -1664,7 +1817,10 @@ public:
        const auto stream_id = _uses_tablets ? _ctx._cdc_metadata.get_tablet_stream(_log_schema->id(), ts, _dk.token()) : _ctx._cdc_metadata.get_vnode_stream(ts, _dk.token());
        _result_mutations.emplace_back(_log_schema, stream_id.to_partition_key(*_log_schema));
        _builder.emplace(_result_mutations.back(), ts, _dk.key(), *_schema);
-        _enable_updating_state = _schema->cdc_options().postimage() || (!is_last && _schema->cdc_options().preimage());
+        // alternator_streams_increased_compatibility set to true reads preimage, but we need to set
+        // _enable_updating_state to true to keep track of changes and produce correct pre/post images even
+        // if upper layer didn't request them explicitly.
+        _enable_updating_state = _schema->cdc_options().postimage() || (!is_last && _schema->cdc_options().preimage()) || (_options.alternator && _options.alternator_streams_increased_compatibility);
    }

    void produce_preimage(const clustering_key* ck, const one_kind_column_set& columns_to_include) override {
@@ -1761,7 +1917,9 @@ public:
            ._builder = *_builder,
            ._enable_updating_state = _enable_updating_state,
            ._clustering_row_states = _clustering_row_states,
+            ._alternator_clustering_keys_to_ignore = _alternator_clustering_keys_to_ignore,
            ._static_row_state = _static_row_state,
+            ._alternator_schema_has_no_clustering_key = (_alternator_clustering_key_column == nullptr),
            ._is_update = _is_update,
            ._generate_delta_values = generate_delta_values(_builder->base_schema())
        };
@@ -1771,10 +1929,19 @@ public:
    void end_record() override {
        SCYLLA_ASSERT(_builder);
        _builder->end_record();
-    }

-    const row_states_map& clustering_row_states() const override {
-        return _clustering_row_states;
+        if (_options.alternator && !_alternator_clustering_keys_to_ignore.empty()) {
+            // we filter mutations for Alternator's changes here.
+            // We do it per mutation object (user might submit a batch of those in one go
+            // and some might be splitted because of different timestamps),
+            // ignore key set is cleared afterwards.
+            // If single mutation object contains two separate changes to the same row
+            // and at least one of them is ignored, all of them will be ignored.
+            // This is not possible in Alternator - Alternator spec forbids reusing 
+            // primary key in single batch.
+            _result_mutations.back() = clean_up_noop_rows(std::move(_result_mutations.back()));
+            _alternator_clustering_keys_to_ignore.clear();
+        }
    }

    // Takes and returns generated cdc log mutations and associated statistics about parts touched during transformer's lifetime.
@@ -2013,7 +2180,7 @@ cdc::cdc_service::impl::augment_mutation_call(lowres_clock::time_point timeout,
                tracing::trace(tr_state, "CDC: Preimage not enabled for the table, not querying current value of {}", m.decorated_key());
            }

-            return f.then([alternator_increased_compatibility, trans = std::move(trans), &mutations, idx, tr_state, &details, &options] (lw_shared_ptr<cql3::untyped_result_set> rs) mutable {
+            return f.then([trans = std::move(trans), &mutations, idx, tr_state, &details, &options] (lw_shared_ptr<cql3::untyped_result_set> rs) mutable {
                auto& m = mutations[idx];
                auto& s = m.schema();

@@ -2031,10 +2198,10 @@ cdc::cdc_service::impl::augment_mutation_call(lowres_clock::time_point timeout,
                if (should_split(m, options)) {
                    tracing::trace(tr_state, "CDC: Splitting {}", m.decorated_key());
                    details.was_split = true;
-                    process_changes_with_splitting(m, trans, preimage, postimage, alternator_increased_compatibility);
+                    process_changes_with_splitting(m, trans, preimage, postimage);
                } else {
                    tracing::trace(tr_state, "CDC: No need to split {}", m.decorated_key());
-                    process_changes_without_splitting(m, trans, preimage, postimage, alternator_increased_compatibility);
+                    process_changes_without_splitting(m, trans, preimage, postimage);
                }
                auto [log_mut, touched_parts] = std::move(trans).finish();
                const int generated_count = log_mut.size();
--- a/cdc/split.cc
+++ b/cdc/split.cc
@@ -6,26 +6,15 @@
 * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
 */

-#include "bytes.hh"
-#include "bytes_fwd.hh"
-#include "mutation/atomic_cell.hh"
-#include "mutation/atomic_cell_or_collection.hh"
-#include "mutation/collection_mutation.hh"
 #include "mutation/mutation.hh"
-#include "mutation/tombstone.hh"
 #include "schema/schema.hh"

-#include <seastar/core/sstring.hh>
 #include "types/concrete_types.hh"
-#include "types/types.hh"
 #include "types/user.hh"

 #include "split.hh"
 #include "log.hh"
 #include "change_visitor.hh"
-#include "utils/managed_bytes.hh"
-#include <string_view>
-#include <unordered_map>

 extern logging::logger cdc_log;

@@ -610,109 +599,8 @@ bool should_split(const mutation& m, const per_request_options& options) {
        || v._ts == api::missing_timestamp;
 }

-// Returns true if the row state and the atomic and nonatomic entries represent
-// an equivalent item.
-static bool entries_match_row_state(const schema_ptr& base_schema, const cell_map& row_state, const std::vector<atomic_column_update>& atomic_entries,
-        std::vector<nonatomic_column_update>& nonatomic_entries) {
-    for (const auto& update : atomic_entries) {
-        const column_definition& cdef = base_schema->column_at(column_kind::regular_column, update.id);
-        const auto it = row_state.find(&cdef);
-        if (it == row_state.end()) {
-            return false;
-        }
-        if (to_managed_bytes_opt(update.cell.value().linearize()) != it->second) {
-            return false;
-        }
-    }
-    if (nonatomic_entries.empty()) {
-        return true;
-    }
-
-    for (const auto& update : nonatomic_entries) {
-        const column_definition& cdef = base_schema->column_at(column_kind::regular_column, update.id);
-        const auto it = row_state.find(&cdef);
-        if (it == row_state.end()) {
-            return false;
-        }
-
-        // The only collection used by Alternator is a non-frozen map.
-        auto current_raw_map = cdef.type->deserialize(*it->second);
-        map_type_impl::native_type current_values = value_cast<map_type_impl::native_type>(current_raw_map);
-
-        if (current_values.size() != update.cells.size()) {
-            return false;
-        }
-        
-        std::unordered_map<sstring_view, bytes> current_values_map;
-        for (const auto& entry : current_values) {
-            const auto attr_name = std::string_view(value_cast<sstring>(entry.first));
-            current_values_map[attr_name] = value_cast<bytes>(entry.second);
-        }
-
-        for (const auto& [key, value] : update.cells) {
-            const auto key_str = to_string_view(key);
-            if (!value.is_live()) {
-                if (current_values_map.contains(key_str)) {
-                    return false;
-                }
-            } else if (current_values_map[key_str] != value.value().linearize()) {
-                return false;
-            }
-        }
-    }
-    return true;
-}
-
-bool should_skip(batch& changes, const mutation& base_mutation, change_processor& processor) {
-    const schema_ptr& base_schema = base_mutation.schema();
-    // Alternator doesn't use static updates and clustered range deletions.
-    if (!changes.static_updates.empty() || !changes.clustered_range_deletions.empty()) {
-        return false;
-    }
-
-    for (clustered_row_insert& u : changes.clustered_inserts) {
-        const cell_map* row_state = get_row_state(processor.clustering_row_states(), u.key);
-        if (!row_state) {
-            return false;
-        }
-        if (!entries_match_row_state(base_schema, *row_state, u.atomic_entries, u.nonatomic_entries)) {
-            return false;
-        }
-    }
-
-    for (clustered_row_update& u : changes.clustered_updates) {
-        const cell_map* row_state = get_row_state(processor.clustering_row_states(), u.key);
-        if (!row_state) {
-            return false;
-        }
-        if (!entries_match_row_state(base_schema, *row_state, u.atomic_entries, u.nonatomic_entries)) {
-            return false;
-        }
-    }
-
-    // Skip only if the row being deleted does not exist (i.e. the deletion is a no-op).
-    for (const auto& row_deletion : changes.clustered_row_deletions) {
-        if (processor.clustering_row_states().contains(row_deletion.key)) {
-            return false;
-        }
-    }
-
-    // Don't skip if the item exists.
-    //
-    // Increased DynamoDB Streams compatibility guarantees that single-item
-    // operations will read the item and store it in the clustering row states.
-    // If it is not found there, we may skip CDC. This is safe as long as the
-    // assumptions of this operation's write isolation are not violated.
-    if (changes.partition_deletions && processor.clustering_row_states().contains(clustering_key::make_empty())) {
-        return false;
-    }
-
-    cdc_log.trace("Skipping CDC log for mutation {}", base_mutation);
-    return true;
-}
-
 void process_changes_with_splitting(const mutation& base_mutation, change_processor& processor,
-        bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility) {
+        bool enable_preimage, bool enable_postimage) {
    const auto base_schema = base_mutation.schema();
    auto changes = extract_changes(base_mutation);
    auto pk = base_mutation.key();
@@ -732,10 +620,6 @@ void process_changes_with_splitting(const mutation& base_mutation, change_proces
            affected_clustered_columns_per_row = btch.get_affected_clustered_columns_per_row(*base_mutation.schema());
        }

-        if (alternator_strict_compatibility && should_skip(btch, base_mutation, processor)) {
-            continue;
-        }
-
        const bool is_last = change_ts == last_timestamp;
        processor.begin_timestamp(change_ts, is_last);
        if (enable_preimage) {
@@ -825,13 +709,7 @@ void process_changes_with_splitting(const mutation& base_mutation, change_proces
 }

 void process_changes_without_splitting(const mutation& base_mutation, change_processor& processor,
-        bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility) {
-    if (alternator_strict_compatibility) {
-        auto changes = extract_changes(base_mutation);
-        if (should_skip(changes.begin()->second, base_mutation, processor)) {
-            return;
-        }
-    }
+        bool enable_preimage, bool enable_postimage) {
    auto ts = find_timestamp(base_mutation);
    processor.begin_timestamp(ts, true);

--- a/cdc/split.hh
+++ b/cdc/split.hh
@@ -66,14 +66,12 @@ public:
    // Tells processor we have reached end of record - last part
    // of a given timestamp batch
    virtual void end_record() = 0;
-
-    virtual const row_states_map& clustering_row_states() const = 0;
 };

 bool should_split(const mutation& base_mutation, const per_request_options& options);
 void process_changes_with_splitting(const mutation& base_mutation, change_processor& processor,
-        bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility);
+        bool enable_preimage, bool enable_postimage);
 void process_changes_without_splitting(const mutation& base_mutation, change_processor& processor,
-        bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility);
+        bool enable_preimage, bool enable_postimage);

 }
--- a/compaction/compaction.cc
+++ b/compaction/compaction.cc
@@ -1355,6 +1355,35 @@ private:
            _sstables.erase(exhausted, _sstables.end());
            dynamic_cast<compaction_read_monitor_generator&>(unwrap_monitor_generator()).remove_exhausted_sstables(exhausted_ssts);
        }
+
+        // Release exhausted garbage collected sstables.
+        // A GC sstable is exhausted when it doesn't overlap with any remaining input sstable.
+        // GC sstables serve as safeguards against data resurrection: their tombstones may shadow
+        // data in not-yet-exhausted input sstables. So a GC sstable can only be released once
+        // all overlapping input sstables have been exhausted.
+        auto gc_not_exhausted = [this] (const sstables::shared_sstable& gc_sst) {
+            auto gc_range = ::wrapping_interval<dht::token>::make(
+                gc_sst->get_first_decorated_key()._token,
+                gc_sst->get_last_decorated_key()._token);
+            for (const auto& input_sst : _sstables) {
+                auto input_range = ::wrapping_interval<dht::token>::make(
+                    input_sst->get_first_decorated_key()._token,
+                    input_sst->get_last_decorated_key()._token);
+                if (gc_range.overlaps(input_range, dht::token_comparator())) {
+                    return true; // overlaps with a remaining input sstable, not exhausted yet
+                }
+            }
+            return false; // no overlap with any remaining input sstable, can be released
+        };
+        exhausted = std::partition(_used_garbage_collected_sstables.begin(), _used_garbage_collected_sstables.end(), gc_not_exhausted);
+        if (exhausted != _used_garbage_collected_sstables.end()) {
+            auto exhausted_gc_ssts = std::vector<sstables::shared_sstable>(exhausted, _used_garbage_collected_sstables.end());
+            log_debug("Releasing {} exhausted GC sstable(s) earlier: [{}]",
+                exhausted_gc_ssts.size(),
+                fmt::join(exhausted_gc_ssts | std::views::transform([] (auto sst) { return to_string(sst, true); }), ","));
+            _replacer(get_compaction_completion_desc(std::move(exhausted_gc_ssts), {}));
+            _used_garbage_collected_sstables.erase(exhausted, _used_garbage_collected_sstables.end());
+        }
    }

    void replace_remaining_exhausted_sstables() {
--- a/compaction/compaction_manager.cc
+++ b/compaction/compaction_manager.cc
@@ -1106,7 +1106,8 @@ void compaction_manager::enable() {

    _compaction_submission_timer.cancel();
    _compaction_submission_timer.arm_periodic(periodic_compaction_submission_interval());
-    _waiting_reevalution = postponed_compactions_reevaluation();
+    throwing_assert(!_waiting_reevaluation);
+    _waiting_reevaluation.emplace(postponed_compactions_reevaluation());
    cmlog.info("Enabled");
 }

@@ -1154,6 +1155,16 @@ void compaction_manager::reevaluate_postponed_compactions() noexcept {
    _postponed_reevaluation.signal();
 }

+future<> compaction_manager::stop_postponed_compactions() noexcept {
+    auto waiting_reevaluation = std::exchange(_waiting_reevaluation, std::nullopt);
+    if (!waiting_reevaluation) {
+        return make_ready_future();
+    }
+    // Trigger a signal to properly exit from postponed_compactions_reevaluation() fiber
+    reevaluate_postponed_compactions();
+    return std::move(*waiting_reevaluation);
+}
+
 void compaction_manager::postpone_compaction_for_table(compaction_group_view* t) {
    _postponed.insert(t);
 }
@@ -1237,8 +1248,7 @@ future<> compaction_manager::drain() {
    _compaction_submission_timer.cancel();
    // Stop ongoing compactions, if the request has not been sent already and wait for them to stop.
    co_await stop_ongoing_compactions("drain");
-    // Trigger a signal to properly exit from postponed_compactions_reevaluation() fiber
-    reevaluate_postponed_compactions();
+    co_await stop_postponed_compactions();
    cmlog.info("Drained");
 }

@@ -1282,8 +1292,7 @@ future<> compaction_manager::really_do_stop() noexcept {
    if (!_tasks.empty()) {
        on_fatal_internal_error(cmlog, format("{} tasks still exist after being stopped", _tasks.size()));
    }
-    reevaluate_postponed_compactions();
-    co_await std::move(_waiting_reevalution);
+    co_await stop_postponed_compactions();
    co_await _sys_ks.close();
    _weight_tracker.clear();
    _compaction_submission_timer.cancel();
--- a/compaction/compaction_manager.hh
+++ b/compaction/compaction_manager.hh
@@ -128,7 +128,7 @@ private:
    // a sstable from being compacted twice.
    std::unordered_set<sstables::shared_sstable> _compacting_sstables;

-    future<> _waiting_reevalution = make_ready_future<>();
+    std::optional<future<>> _waiting_reevaluation;
    condition_variable _postponed_reevaluation;
    // tables that wait for compaction but had its submission postponed due to ongoing compaction.
    std::unordered_set<compaction::compaction_group_view*> _postponed;
@@ -231,6 +231,7 @@ private:

    future<> postponed_compactions_reevaluation();
    void reevaluate_postponed_compactions() noexcept;
+    future<> stop_postponed_compactions() noexcept;
    // Postpone compaction for a table that couldn't be executed due to ongoing
    // similar-sized compaction.
    void postpone_compaction_for_table(compaction::compaction_group_view* t);
--- a/compaction/task_manager_module.hh
+++ b/compaction/task_manager_module.hh
@@ -698,12 +698,13 @@ public:
    table_resharding_compaction_task_impl(tasks::task_manager::module_ptr module,
            std::string keyspace,
            std::string table,
+            tasks::task_id parent_id,
            sharded<sstables::sstable_directory>& dir,
            sharded<replica::database>& db,
            compaction_sstable_creator_fn creator,
            compaction::owned_ranges_ptr owned_ranges_ptr,
            bool vnodes_resharding) noexcept
-        : resharding_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), "table", std::move(keyspace), std::move(table), "", tasks::task_id::create_null_id())
+        : resharding_compaction_task_impl(module, tasks::task_id::create_random_id(), parent_id ? 0 : module->new_sequence_number(), "table", std::move(keyspace), std::move(table), "", parent_id)
        , _dir(dir)
        , _db(db)
        , _creator(std::move(creator))
--- a/configure.py
+++ b/configure.py
@@ -1438,6 +1438,8 @@ alternator = [
       'alternator/controller.cc',
       'alternator/server.cc',
       'alternator/executor.cc',
+       'alternator/executor_read.cc',
+       'alternator/executor_util.cc',
       'alternator/stats.cc',
       'alternator/serialization.cc',
       'alternator/expressions.cc',
@@ -1723,6 +1725,7 @@ deps['test/boost/combined_tests'] += [
    'test/boost/view_schema_test.cc',
    'test/boost/virtual_reader_test.cc',
    'test/boost/virtual_table_test.cc',
+    'test/boost/vnodes_to_tablets_migration_test.cc',
    'tools/schema_loader.cc',
    'tools/read_mutation.cc',
    'test/lib/expr_test_utils.cc',
--- a/cql3/CMakeLists.txt
+++ b/cql3/CMakeLists.txt
@@ -23,7 +23,7 @@ set_property(
      $<$<CONFIG:${unoptimized_modes}>:-O1>
      # use-after-scope sanitizer also uses large amount of stack space
      # and overflows the stack of CqlParser
-      $<$<CONFIG:${sanitized_modes}>:-fsanitize-address-use-after-scope>)
+      $<$<CONFIG:${sanitized_modes}>:-fno-sanitize-address-use-after-scope>)

 add_library(cql3 STATIC)
 target_sources(cql3
--- a/cql3/Cql.g
+++ b/cql3/Cql.g
@@ -429,10 +429,10 @@ unaliasedSelector returns [uexpression tmp]
    :  ( c=cident                                  { tmp = unresolved_identifier{std::move(c)}; }
       | v=value                                   { tmp = std::move(v); }
       | K_COUNT '(' countArgument ')'             { tmp = make_count_rows_function_expression(); }
-       | K_WRITETIME '(' c=cident ')'              { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::writetime,
-                                                                                              unresolved_identifier{std::move(c)}}; }
-       | K_TTL       '(' c=cident ')'              { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::ttl,
-                                                                                              unresolved_identifier{std::move(c)}}; }
+       | K_WRITETIME '(' a=subscriptExpr ')'       { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::writetime,
+                                                                                              std::move(a)}; }
+       | K_TTL       '(' a=subscriptExpr ')'       { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::ttl,
+                                                                                              std::move(a)}; }
       | f=functionName args=selectionFunctionArgs { tmp = function_call{std::move(f), std::move(args)}; }
       | K_CAST      '(' arg=unaliasedSelector K_AS t=native_type ')'  { tmp = cast{.style = cast::cast_style::sql, .arg = std::move(arg), .type = std::move(t)}; }
       )
@@ -1794,7 +1794,9 @@ columnRefExpr returns [uexpression e]

 subscriptExpr returns [uexpression e]
    : col=columnRefExpr { e = std::move(col); }
-        ( '[' sub=term ']'  { e = subscript{std::move(e), std::move(sub)}; } )?
+        ( '[' sub=term ']'  { e = subscript{std::move(e), std::move(sub)}; }
+        | '.' fi=cident     { e = field_selection{std::move(e), std::move(fi)}; }
+        )?
    ;

 singleColumnInValuesOrMarkerExpr returns [uexpression e]
--- a/cql3/column_specification.cc
+++ b/cql3/column_specification.cc
@@ -10,6 +10,7 @@

 #include "utils/assert.hh"
 #include "cql3/column_specification.hh"
+#include "cql3/column_identifier.hh"

 namespace cql3 {

@@ -31,4 +32,12 @@ bool column_specification::all_in_same_table(const std::vector<lw_shared_ptr<col
    });
 }

+lw_shared_ptr<column_specification> make_column_spec(std::string_view ks_name, std::string_view cf_name, sstring name, data_type type) {
+    return make_lw_shared<column_specification>(
+            ks_name,
+            cf_name,
+            ::make_shared<column_identifier>(std::move(name), true),
+            std::move(type));
+}
+
 }
--- a/cql3/column_specification.hh
+++ b/cql3/column_specification.hh
@@ -42,4 +42,6 @@ public:
    static bool all_in_same_table(const std::vector<lw_shared_ptr<column_specification>>& names);
 };

+lw_shared_ptr<column_specification> make_column_spec(std::string_view ks_name, std::string_view cf_name, sstring name, data_type type);
+
 }
--- a/cql3/cql_config.hh
+++ b/cql3/cql_config.hh
@@ -11,6 +11,11 @@
 #pragma once

 #include "restrictions/restrictions_config.hh"
+#include "cql3/restrictions/replication_restrictions.hh"
+#include "cql3/restrictions/twcs_restrictions.hh"
+#include "cql3/restrictions/view_restrictions.hh"
+#include "db/tri_mode_restriction.hh"
+#include "utils/updateable_value.hh"

 namespace db { class config; }

@@ -18,9 +23,44 @@ namespace cql3 {

 struct cql_config {
    restrictions::restrictions_config restrictions;
-    explicit cql_config(const db::config& cfg) : restrictions(cfg) {}
+    replication_restrictions replication_restrictions;
+    twcs_restrictions twcs_restrictions;
+    view_restrictions view_restrictions;
+    utils::updateable_value<uint32_t> select_internal_page_size;
+    utils::updateable_value<db::tri_mode_restriction> strict_allow_filtering;
+    utils::updateable_value<bool> enable_parallelized_aggregation;
+    utils::updateable_value<uint32_t> batch_size_warn_threshold_in_kb;
+    utils::updateable_value<uint32_t> batch_size_fail_threshold_in_kb;
+    utils::updateable_value<bool> restrict_future_timestamp;
+    utils::updateable_value<bool> enable_create_table_with_compact_storage;
+
+    explicit cql_config(const db::config& cfg)
+        : restrictions(cfg)
+        , replication_restrictions(cfg)
+        , twcs_restrictions(cfg)
+        , view_restrictions(cfg)
+        , select_internal_page_size(cfg.select_internal_page_size)
+        , strict_allow_filtering(cfg.strict_allow_filtering)
+        , enable_parallelized_aggregation(cfg.enable_parallelized_aggregation)
+        , batch_size_warn_threshold_in_kb(cfg.batch_size_warn_threshold_in_kb)
+        , batch_size_fail_threshold_in_kb(cfg.batch_size_fail_threshold_in_kb)
+        , restrict_future_timestamp(cfg.restrict_future_timestamp)
+        , enable_create_table_with_compact_storage(cfg.enable_create_table_with_compact_storage)
+    {}
    struct default_tag{};
-    cql_config(default_tag) : restrictions(restrictions::restrictions_config::default_tag{}) {}
+    cql_config(default_tag)
+        : restrictions(restrictions::restrictions_config::default_tag{})
+        , replication_restrictions(replication_restrictions::default_tag{})
+        , twcs_restrictions(twcs_restrictions::default_tag{})
+        , view_restrictions(view_restrictions::default_tag{})
+        , select_internal_page_size(10000)
+        , strict_allow_filtering(db::tri_mode_restriction(db::tri_mode_restriction_t::mode::WARN))
+        , enable_parallelized_aggregation(true)
+        , batch_size_warn_threshold_in_kb(128)
+        , batch_size_fail_threshold_in_kb(1024)
+        , restrict_future_timestamp(true)
+        , enable_create_table_with_compact_storage(false)
+    {}
 };

 extern const cql_config default_cql_config;
--- a/cql3/expr/collection_cell_metadata.hh
+++ b/cql3/expr/collection_cell_metadata.hh
@@ -0,0 +1,21 @@
+// Copyright (C) 2026-present ScyllaDB
+// SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+
+#pragma once
+
+#include <map>
+
+#include "bytes.hh"
+#include "mutation/timestamp.hh"
+
+namespace cql3::expr {
+
+// Per-element timestamps and TTLs for a cell of a map, set or UDT (populated
+// when a WRITETIME() or TTL() of col[key] or col.field are in the query.
+// Keys are the raw serialized keys or serialized field index.
+struct collection_cell_metadata {
+    std::map<bytes, api::timestamp_type> timestamps;
+    std::map<bytes, int32_t> ttls; // remaining TTL in seconds (-1 if no TTL)
+};
+
+} // namespace cql3::expr
--- a/cql3/expr/evaluate.hh
+++ b/cql3/expr/evaluate.hh
@@ -3,6 +3,7 @@

 #pragma once

+#include "collection_cell_metadata.hh"
 #include "expression.hh"

 #include "bytes.hh"
@@ -27,6 +28,7 @@ struct evaluation_inputs {
    std::span<const api::timestamp_type> static_and_regular_timestamps;  // indexes match `selection` member
    std::span<const int32_t> static_and_regular_ttls;  // indexes match `selection` member
    std::span<const cql3::raw_value> temporaries; // indexes match temporary::index
+    std::span<const collection_cell_metadata> collection_element_metadata; // indexes match `selection` member
 };

 // Takes a prepared expression and calculates its value.
--- a/cql3/expr/expression.cc
+++ b/cql3/expr/expression.cc
@@ -1031,7 +1031,7 @@ expression search_and_replace(const expression& e,
                    return cast{c.style, recurse(c.arg), c.type};
                },
                [&] (const field_selection& fs) -> expression {
-                    return field_selection{recurse(fs.structure), fs.field};
+                    return field_selection{recurse(fs.structure), fs.field, fs.field_idx, fs.type};
                },
                [&] (const subscript& s) -> expression {
                    return subscript {
@@ -1206,6 +1206,58 @@ cql3::raw_value do_evaluate(const field_selection& field_select, const evaluatio
 static
 cql3::raw_value
 do_evaluate(const column_mutation_attribute& cma, const evaluation_inputs& inputs) {
+    // Helper for WRITETIME/TTL on a collection element or UDT field: given the
+    // inner column and the serialized element key, validate the index and look
+    // up the per-element timestamp or TTL in collection_element_metadata.
+    auto lookup_element_attribute = [&](const column_value* inner_col, std::string_view context, bytes key) -> cql3::raw_value {
+        int32_t index = inputs.selection->index_of(*inner_col->col);
+        if (inputs.collection_element_metadata.empty() || index < 0 || size_t(index) >= inputs.collection_element_metadata.size()) {
+            on_internal_error(expr_logger, fmt::format("evaluating column_mutation_attribute {}: column {} is not in selection",
+                context, inner_col->col->name_as_text()));
+        }
+        const auto& meta = inputs.collection_element_metadata[index];
+        switch (cma.kind) {
+        case column_mutation_attribute::attribute_kind::writetime: {
+            const auto it = meta.timestamps.find(key);
+            if (it == meta.timestamps.end()) {
+                return cql3::raw_value::make_null();
+            }
+            return raw_value::make_value(data_value(it->second).serialize());
+        }
+        case column_mutation_attribute::attribute_kind::ttl: {
+            const auto it = meta.ttls.find(key);
+            // The test it->second <= 0 (rather than < 0) matches the
+            // single-TTL check ttl_v <= 0 below.
+            if (it == meta.ttls.end() || it->second <= 0) {
+                return cql3::raw_value::make_null();
+            }
+            return raw_value::make_value(data_value(it->second).serialize());
+        }
+        }
+        on_internal_error(expr_logger, fmt::format("evaluating column_mutation_attribute {} with unexpected kind", context));
+    };
+    // Handle WRITETIME(x.field) / TTL(x.field) on a UDT field
+    if (auto fs = expr::as_if<field_selection>(&cma.column)) {
+        auto inner_col = expr::as_if<column_value>(&fs->structure);
+        if (!inner_col) {
+            on_internal_error(expr_logger, fmt::format("evaluating column_mutation_attribute field_selection: inner expression is not a column: {}", fs->structure));
+        }
+        return lookup_element_attribute(inner_col, "field_selection", serialize_field_index(fs->field_idx));
+    }
+    // Handle WRITETIME(m[key]) / TTL(m[key]) on a map element
+    if (auto sub = expr::as_if<subscript>(&cma.column)) {
+        auto inner_col = expr::as_if<column_value>(&sub->val);
+        if (!inner_col) {
+            on_internal_error(expr_logger, fmt::format("evaluating column_mutation_attribute subscript: inner expression is not a column: {}", sub->val));
+        }
+        auto evaluated_key = evaluate(sub->sub, inputs);
+        if (evaluated_key.is_null()) {
+            return cql3::raw_value::make_null();
+        }
+        return evaluated_key.view().with_linearized([&] (bytes_view key_bv) {
+            return lookup_element_attribute(inner_col, "subscript", bytes(key_bv));
+        });
+    }
    auto col = expr::as_if<column_value>(&cma.column);
    if (!col) {
        on_internal_error(expr_logger, fmt::format("evaluating column_mutation_attribute of non-column {}", cma.column));
--- a/cql3/expr/prepare_expr.cc
+++ b/cql3/expr/prepare_expr.cc
@@ -1259,6 +1259,40 @@ prepare_column_mutation_attribute(
                    receiver->type->name(), receiver->name->text()));
    }
    auto column = prepare_expression(cma.column, db, keyspace, schema_opt, nullptr);
+    // Helper for the subscript and field-selection cases below: validates that
+    // inner_expr is a column, not a primary key column, that its type satisfies
+    // type_allowed, and that the cluster feature flag is on.
+    auto validate_and_return =
+            [&](const expression& inner_expr, std::string_view context,
+                auto type_allowed, std::string_view type_allowed_str) -> std::optional<expression> {
+        auto inner_cval = expr::as_if<column_value>(&inner_expr);
+        if (!inner_cval) {
+            throw exceptions::invalid_request_exception(fmt::format("{} on a {} expects a column, got {}", cma.kind, context, inner_expr));
+        }
+        if (inner_cval->col->is_primary_key()) {
+            throw exceptions::invalid_request_exception(fmt::format("{} is not legal on primary key component {}", cma.kind, inner_cval->col->name_as_text()));
+        }
+        if (!type_allowed(inner_cval->col->type)) {
+            throw exceptions::invalid_request_exception(fmt::format("{} on a {} is only valid for {}", cma.kind, context, type_allowed_str));
+        }
+        if (!db.features().writetime_ttl_individual_element) {
+            throw exceptions::invalid_request_exception(fmt::format(
+                "{} on a {} is not supported until all nodes in the cluster are upgraded", cma.kind, context));
+        }
+        return column_mutation_attribute{.kind = cma.kind, .column = std::move(column)};
+    };
+    // Handle WRITETIME(m[key]) / TTL(m[key]) - a subscript into a non-frozen map or set column
+    if (auto sub = expr::as_if<subscript>(&column)) {
+        return validate_and_return(sub->val, "subscript",
+            [](const data_type& t) { return (t->is_map() || t->is_set()) && t->is_multi_cell(); },
+            "non-frozen map or set columns");
+    }
+    // Handle WRITETIME(x.field) / TTL(x.field) - a field selection into a non-frozen UDT column
+    if (auto fs = expr::as_if<field_selection>(&column)) {
+        return validate_and_return(fs->structure, "field selection",
+            [](const data_type& t) { return t->is_user_type() && t->is_multi_cell(); },
+            "non-frozen UDT columns");
+    }
    auto cval = expr::as_if<column_value>(&column);
    if (!cval) {
        throw exceptions::invalid_request_exception(fmt::format("{} expects a column, but {} is a general expression", cma.kind, column));
@@ -1654,6 +1688,12 @@ static lw_shared_ptr<column_specification> get_lhs_receiver(const expression& pr
                return list_value_spec_of(*sub_col.col->column_specification);
            }
        },
+        [&](const field_selection& fs) -> lw_shared_ptr<column_specification> {
+            return make_lw_shared<column_specification>(
+                schema.ks_name(), schema.cf_name(),
+                ::make_shared<column_identifier>(fs.field->text(), true),
+                fs.type);
+        },
        [&](const tuple_constructor& tup) -> lw_shared_ptr<column_specification> {
            std::ostringstream tuple_name;
            tuple_name << "(";
--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -560,6 +560,11 @@ query_processor::acquire_strongly_consistent_coordinator() {
    return {remote_.get().sc_coordinator, std::move(holder)};
 }

+service::storage_service& query_processor::storage_service() {
+    auto [remote_, holder] = remote();
+    return remote_.get().ss;
+}
+
 void query_processor::start_remote(service::migration_manager& mm, service::mapreduce_service& mapreducer,
                                   service::storage_service& ss, service::raft_group0_client& group0_client,
                                   service::strong_consistency::coordinator& sc_coordinator) {
@@ -786,7 +791,7 @@ query_processor::get_statement(const std::string_view& query, const service::cli
        cf_stmt->prepare_keyspace(client_state);
    }
    ++_stats.prepare_invocations;
-    auto p = statement->prepare(_db, _cql_stats);
+    auto p = statement->prepare(_db, _cql_stats, _cql_config);
    p->statement->raw_cql_statement = sstring(query);
    auto audit_info = p->statement->get_audit_info();
    if (audit_info) {
@@ -901,7 +906,7 @@ query_options query_processor::make_internal_options(
 statements::prepared_statement::checked_weak_ptr query_processor::prepare_internal(const sstring& query_string) {
    auto& p = _internal_statements[query_string];
    if (p == nullptr) {
-        auto np = parse_statement(query_string, internal_dialect())->prepare(_db, _cql_stats);
+        auto np = parse_statement(query_string, internal_dialect())->prepare(_db, _cql_stats, _cql_config);
        np->statement->raw_cql_statement = query_string;
        p = std::move(np); // inserts it into map
    }
@@ -1012,7 +1017,7 @@ query_processor::execute_internal(
        return execute_with_params(std::move(p), cl, query_state, values);
    } else {
        // For internal queries, we want the default dialect, not the user provided one
-        auto p = parse_statement(query_string, dialect{})->prepare(_db, _cql_stats);
+        auto p = parse_statement(query_string, dialect{})->prepare(_db, _cql_stats, _cql_config);
        p->statement->raw_cql_statement = query_string;
        auto checked_weak_ptr = p->checked_weak_from_this();
        return execute_with_params(std::move(checked_weak_ptr), cl, query_state, values).finally([p = std::move(p)] {});
@@ -1071,6 +1076,11 @@ query_processor::execute_batch_without_checking_exception_message(
        query_options& options,
        std::unordered_map<prepared_cache_key_type, authorized_prepared_statements_cache::value_type> pending_authorization_entries) {
    auto access_future = co_await coroutine::as_future(batch->check_access(*this, query_state.get_client_state()));
+    bool failed = access_future.failed();
+    co_await audit::inspect(batch, query_state, options, failed);
+    if (failed) {
+        std::rethrow_exception(access_future.get_exception());
+    }
    co_await coroutine::parallel_for_each(pending_authorization_entries, [this, &query_state] (auto& e) -> future<> {
            try {
                co_await _authorized_prepared_cache.insert(*query_state.get_client_state().user(), e.first, std::move(e.second));
@@ -1078,11 +1088,6 @@ query_processor::execute_batch_without_checking_exception_message(
                log.error("failed to cache the entry: {}", std::current_exception());
            }
        });
-    bool failed = access_future.failed();
-    co_await audit::inspect(batch, query_state, options, failed);
-    if (access_future.failed()) {
-        std::rethrow_exception(access_future.get_exception());
-    }
    batch->validate();
    batch->validate(*this, query_state.get_client_state());
    _stats.queries_by_cl[size_t(options.get_consistency())] += batch->get_statements().size();
--- a/cql3/query_processor.hh
+++ b/cql3/query_processor.hh
@@ -209,6 +209,8 @@ public:
        return _proxy;
    }

+    service::storage_service& storage_service();
+
    std::pair<std::reference_wrapper<service::strong_consistency::coordinator>, gate::holder>
    acquire_strongly_consistent_coordinator();

--- a/cql3/restrictions/replication_restrictions.hh
+++ b/cql3/restrictions/replication_restrictions.hh
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2026-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
+ */
+
+#pragma once
+
+#include "db/config.hh"
+#include "utils/updateable_value.hh"
+
+namespace cql3 {
+
+struct replication_restrictions {
+    utils::updateable_value<db::tri_mode_restriction> restrict_replication_simplestrategy;
+    utils::updateable_value<std::vector<enum_option<db::replication_strategy_restriction_t>>> replication_strategy_warn_list;
+    utils::updateable_value<std::vector<enum_option<db::replication_strategy_restriction_t>>> replication_strategy_fail_list;
+    utils::updateable_value<int> minimum_replication_factor_fail_threshold;
+    utils::updateable_value<int> minimum_replication_factor_warn_threshold;
+    utils::updateable_value<int> maximum_replication_factor_fail_threshold;
+    utils::updateable_value<int> maximum_replication_factor_warn_threshold;
+
+    explicit replication_restrictions(const db::config& cfg)
+        : restrict_replication_simplestrategy(cfg.restrict_replication_simplestrategy)
+        , replication_strategy_warn_list(cfg.replication_strategy_warn_list)
+        , replication_strategy_fail_list(cfg.replication_strategy_fail_list)
+        , minimum_replication_factor_fail_threshold(cfg.minimum_replication_factor_fail_threshold)
+        , minimum_replication_factor_warn_threshold(cfg.minimum_replication_factor_warn_threshold)
+        , maximum_replication_factor_fail_threshold(cfg.maximum_replication_factor_fail_threshold)
+        , maximum_replication_factor_warn_threshold(cfg.maximum_replication_factor_warn_threshold)
+    {}
+
+    struct default_tag{};
+    replication_restrictions(default_tag)
+        : restrict_replication_simplestrategy(db::tri_mode_restriction(db::tri_mode_restriction_t::mode::FALSE))
+        , replication_strategy_warn_list(std::vector<enum_option<db::replication_strategy_restriction_t>>{})
+        , replication_strategy_fail_list(std::vector<enum_option<db::replication_strategy_restriction_t>>{})
+        , minimum_replication_factor_fail_threshold(-1)
+        , minimum_replication_factor_warn_threshold(3)
+        , maximum_replication_factor_fail_threshold(-1)
+        , maximum_replication_factor_warn_threshold(-1)
+    {}
+};
+
+} // namespace cql3
--- a/cql3/restrictions/twcs_restrictions.hh
+++ b/cql3/restrictions/twcs_restrictions.hh
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2026-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
+ */
+
+#pragma once
+
+#include "db/config.hh"
+#include "utils/updateable_value.hh"
+
+namespace cql3 {
+
+struct twcs_restrictions {
+    utils::updateable_value<uint32_t> twcs_max_window_count;
+    utils::updateable_value<db::tri_mode_restriction> restrict_twcs_without_default_ttl;
+
+    explicit twcs_restrictions(const db::config& cfg)
+        : twcs_max_window_count(cfg.twcs_max_window_count)
+        , restrict_twcs_without_default_ttl(cfg.restrict_twcs_without_default_ttl)
+    {}
+
+    struct default_tag{};
+    twcs_restrictions(default_tag)
+        : twcs_max_window_count(10000)
+        , restrict_twcs_without_default_ttl(db::tri_mode_restriction(db::tri_mode_restriction_t::mode::WARN))
+    {}
+};
+
+} // namespace cql3
--- a/cql3/restrictions/view_restrictions.hh
+++ b/cql3/restrictions/view_restrictions.hh
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2019-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
+ */
+
+#pragma once
+
+#include "db/config.hh"
+#include "db/tri_mode_restriction.hh"
+#include "utils/updateable_value.hh"
+
+namespace db { class config; }
+
+namespace cql3 {
+
+struct view_restrictions {
+    utils::updateable_value<db::tri_mode_restriction> strict_is_not_null_in_views;
+
+    explicit view_restrictions(const db::config& cfg)
+        : strict_is_not_null_in_views(cfg.strict_is_not_null_in_views)
+    {}
+
+    struct default_tag{};
+    view_restrictions(default_tag)
+        : strict_is_not_null_in_views(db::tri_mode_restriction(db::tri_mode_restriction_t::mode::WARN))
+    {}
+};
+
+}
--- a/cql3/selection/selection.cc
+++ b/cql3/selection/selection.cc
@@ -17,6 +17,7 @@
 #include "cql3/expr/expr-utils.hh"
 #include "cql3/functions/first_function.hh"
 #include "cql3/functions/aggregate_fcts.hh"
+#include "types/types.hh"

 #include <ranges>

@@ -31,12 +32,14 @@ selection::selection(schema_ptr schema,
    std::vector<lw_shared_ptr<column_specification>> metadata_,
    bool collect_timestamps,
    bool collect_TTLs,
+    bool collect_collection_timestamps,
    trivial is_trivial)
        : _schema(std::move(schema))
        , _columns(std::move(columns))
        , _metadata(::make_shared<metadata>(std::move(metadata_)))
        , _collect_timestamps(collect_timestamps)
        , _collect_TTLs(collect_TTLs)
+        , _collect_collection_timestamps(collect_collection_timestamps)
        , _contains_static_columns(std::any_of(_columns.begin(), _columns.end(), std::mem_fn(&column_definition::is_static)))
        , _is_trivial(is_trivial)
 { }
@@ -46,6 +49,7 @@ query::partition_slice::option_set selection::get_query_options() {

    opts.set_if<query::partition_slice::option::send_timestamp>(_collect_timestamps);
    opts.set_if<query::partition_slice::option::send_expiry>(_collect_TTLs);
+    opts.set_if<query::partition_slice::option::send_collection_timestamps>(_collect_collection_timestamps);

    opts.set_if<query::partition_slice::option::send_partition_key>(
        std::any_of(_columns.begin(), _columns.end(),
@@ -114,7 +118,7 @@ public:
     */
    simple_selection(schema_ptr schema, std::vector<const column_definition*> columns,
        std::vector<lw_shared_ptr<column_specification>> metadata, bool is_wildcard)
-            : selection(schema, std::move(columns), std::move(metadata), false, false, trivial::yes)
+            : selection(schema, std::move(columns), std::move(metadata), false, false, false, trivial::yes)
            , _is_wildcard(is_wildcard)
    { }

@@ -178,6 +182,12 @@ contains_column_mutation_attribute(expr::column_mutation_attribute::attribute_ki
    });
 }

+static bool contains_collection_mutation_attribute(const expr::expression& e) {
+    return expr::find_in_expression<expr::column_mutation_attribute>(e, [](const expr::column_mutation_attribute& cma) {
+        return expr::is<expr::subscript>(cma.column) || expr::is<expr::field_selection>(cma.column);
+    });
+}
+
 static
 bool
 contains_writetime(const expr::expression& e) {
@@ -202,7 +212,8 @@ public:
            std::vector<expr::expression> selectors)
        : selection(schema, std::move(columns), std::move(metadata),
            contains_writetime(expr::tuple_constructor{selectors}),
-            contains_ttl(expr::tuple_constructor{selectors}))
+            contains_ttl(expr::tuple_constructor{selectors}),
+            contains_collection_mutation_attribute(expr::tuple_constructor{selectors}))
        , _selectors(std::move(selectors))
    {
        auto agg_split = expr::split_aggregation(_selectors);
@@ -391,6 +402,7 @@ protected:
                    .static_and_regular_timestamps = rs._timestamps,
                    .static_and_regular_ttls = rs._ttls,
                    .temporaries = {},
+                    .collection_element_metadata = rs._collection_element_metadata,
            };
            for (auto&& e : _sel._selectors) {
                auto out = expr::evaluate(e, inputs);
@@ -429,6 +441,7 @@ protected:
                    .static_and_regular_timestamps = rs._timestamps,
                    .static_and_regular_ttls = rs._ttls,
                    .temporaries = _temporaries,
+                    .collection_element_metadata = rs._collection_element_metadata,
            };
            for (size_t i = 0; i != _sel._inner_loop.size(); ++i) {
                _temporaries[i] = expr::evaluate(_sel._inner_loop[i], inputs);
@@ -553,6 +566,9 @@ result_set_builder::result_set_builder(const selection& s, gc_clock::time_point
    if (s._collect_TTLs) {
        _ttls.resize(s._columns.size(), 0);
    }
+    if (s._collect_collection_timestamps) {
+        _collection_element_metadata.resize(s._columns.size());
+    }
 }

 void result_set_builder::add_empty() {
@@ -563,6 +579,9 @@ void result_set_builder::add_empty() {
    if (!_ttls.empty()) {
        _ttls[current.size() - 1] = -1;
    }
+    if (!_collection_element_metadata.empty()) {
+        _collection_element_metadata[current.size() - 1] = {};
+    }
 }

 void result_set_builder::add(bytes_opt value) {
@@ -585,8 +604,45 @@ void result_set_builder::add(const column_definition& def, const query::result_a
 }

 void result_set_builder::add_collection(const column_definition& def, bytes_view c) {
+    size_t col_idx = current.size();
+    if (!_collection_element_metadata.empty()) {
+        // Extended format produced by serialize_for_cql_with_timestamps()
+        // [uint32 cql_len][cql bytes][int32 entry_count]
+        // followed by entry_count entries, each:
+	    //   [int32 key_len][key bytes][int64 timestamp][int64 expiry_raw]
+        // where expiry_raw is -1 if the element does not expire, otherwise
+        // it is the serialized gc_clock time used to derive the remaining
+        // TTL. The flag _collect_collection_timestamps = true determines
+        // whether this extended format is used (instead of a plain CQL
+        // collection blob), and it is only enabled when a feature flag
+        // guarantees both reader and writer support it.
+        uint32_t cql_len = read_simple<uint32_t>(c);
+        bytes_view cql_bytes = read_simple_bytes(c, cql_len);
+        current.emplace_back(to_bytes(cql_bytes));
+
+        auto& meta = _collection_element_metadata[col_idx];
+        meta = {};  // clear stale data from previous row
+        int32_t entry_count = read_simple<int32_t>(c);
+        for (int32_t i = 0; i < entry_count; ++i) {
+            int32_t key_len = read_simple<int32_t>(c);
+            bytes key = to_bytes(read_simple_bytes(c, key_len));
+            int64_t ts = read_simple<int64_t>(c);
+            int64_t expiry_raw = read_simple<int64_t>(c);
+            meta.timestamps[key] = ts;
+            if (expiry_raw != -1) {
+                auto expiry = gc_clock::time_point(gc_clock::duration(expiry_raw));
+                auto ttl_left = expiry - _now;
+                int32_t ttl = int32_t(ttl_left.count());
+                if (ttl > 0) {
+                    meta.ttls[key] = ttl;
+                }
+                // otherwise, expired or no TTL; We can omit this key from
+                // map - missing key is treated as null by the evaluator.
+            }
+        }
+        return;
+    }
    current.emplace_back(to_bytes(c));
-    // timestamps, ttls meaningless for collections
 }

 void result_set_builder::update_last_group() {
--- a/cql3/selection/selection.hh
+++ b/cql3/selection/selection.hh
@@ -12,6 +12,7 @@

 #include "utils/assert.hh"
 #include "bytes.hh"
+#include "cql3/expr/collection_cell_metadata.hh"
 #include "schema/schema_fwd.hh"
 #include "query/query-result-reader.hh"
 #include "selector.hh"
@@ -69,6 +70,7 @@ private:
    ::shared_ptr<metadata> _metadata;
    const bool _collect_timestamps;
    const bool _collect_TTLs;
+    const bool _collect_collection_timestamps;
    const bool _contains_static_columns;
    bool _is_trivial;
 protected:
@@ -78,7 +80,9 @@ protected:
        std::vector<const column_definition*> columns,
        std::vector<lw_shared_ptr<column_specification>> metadata_,
        bool collect_timestamps,
-        bool collect_TTLs, trivial is_trivial = trivial::no);
+        bool collect_TTLs,
+        bool collect_collection_timestamps,
+        trivial is_trivial = trivial::no);

    virtual ~selection() {}
 public:
@@ -197,6 +201,7 @@ public:
    std::vector<bytes> current_clustering_key;
    std::vector<api::timestamp_type> _timestamps;
    std::vector<int32_t> _ttls;
+    std::vector<cql3::expr::collection_cell_metadata> _collection_element_metadata;
    const query_options* _options;
 private:
    const gc_clock::time_point _now;
--- a/cql3/statements/alter_keyspace_statement.cc
+++ b/cql3/statements/alter_keyspace_statement.cc
@@ -27,6 +27,7 @@
 #include "data_dictionary/data_dictionary.hh"
 #include "data_dictionary/keyspace_metadata.hh"
 #include "cql3/query_processor.hh"
+#include "cql3/cql_config.hh"
 #include "cql3/statements/ks_prop_defs.hh"
 #include "create_keyspace_statement.hh"
 #include "gms/feature_service.hh"
@@ -260,14 +261,14 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
 }

 std::unique_ptr<cql3::statements::prepared_statement>
-cql3::statements::alter_keyspace_statement::prepare(data_dictionary::database db, cql_stats& stats) {
+cql3::statements::alter_keyspace_statement::prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) {
    return std::make_unique<prepared_statement>(audit_info(), make_shared<alter_keyspace_statement>(*this));
 }


 future<::shared_ptr<cql_transport::messages::result_message>>
 cql3::statements::alter_keyspace_statement::execute(query_processor& qp, service::query_state& state, const query_options& options, std::optional<service::group0_guard> guard) const {
-    std::vector<sstring> warnings = check_against_restricted_replication_strategies(qp, keyspace(), *_attrs, qp.get_cql_stats());
+    std::vector<sstring> warnings = check_against_restricted_replication_strategies(qp, keyspace(), *_attrs, qp.get_cql_stats(), qp.get_cql_config().replication_restrictions);
    return schema_altering_statement::execute(qp, state, options, std::move(guard)).then([warnings = std::move(warnings)] (::shared_ptr<messages::result_message> msg) {
        for (const auto& warning : warnings) {
            msg->add_warning(warning);
--- a/cql3/statements/alter_keyspace_statement.hh
+++ b/cql3/statements/alter_keyspace_statement.hh
@@ -37,7 +37,7 @@ public:
    future<> check_access(query_processor& qp, const service::client_state& state) const override;
    void validate(query_processor& qp, const service::client_state& state) const override;
    virtual future<std::tuple<::shared_ptr<event_t>, cql3::cql_warnings_vec>> prepare_schema_mutations(query_processor& qp, service::query_state& state, const query_options& options, service::group0_batch& mc) const override;
-    virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
+    virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) override;
    virtual future<::shared_ptr<messages::result_message>> execute(query_processor& qp, service::query_state& state, const query_options& options, std::optional<service::group0_guard> guard) const override;
    bool changes_tablets(query_processor& qp) const;
 };
--- a/cql3/statements/alter_role_statement.hh
+++ b/cql3/statements/alter_role_statement.hh
@@ -33,7 +33,7 @@ public:
                , _options(std::move(options)) {
    }

-    std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
+    std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) override;

    virtual future<> check_access(query_processor& qp, const service::client_state&) const override;

--- a/cql3/statements/alter_service_level_statement.cc
+++ b/cql3/statements/alter_service_level_statement.cc
@@ -25,7 +25,7 @@ alter_service_level_statement::alter_service_level_statement(sstring service_lev

 std::unique_ptr<cql3::statements::prepared_statement>
 cql3::statements::alter_service_level_statement::prepare(
-        data_dictionary::database db, cql_stats &stats) {
+        data_dictionary::database db, cql_stats &stats, const cql_config& cfg) {
    return std::make_unique<prepared_statement>(audit_info(), ::make_shared<alter_service_level_statement>(*this));
 }

--- a/cql3/statements/alter_service_level_statement.hh
+++ b/cql3/statements/alter_service_level_statement.hh
@@ -23,7 +23,7 @@ class alter_service_level_statement final : public service_level_statement {

 public:
    alter_service_level_statement(sstring service_level, shared_ptr<sl_prop_defs> attrs);
-    std::unique_ptr<cql3::statements::prepared_statement> prepare(data_dictionary::database db, cql_stats &stats) override;
+    std::unique_ptr<cql3::statements::prepared_statement> prepare(data_dictionary::database db, cql_stats &stats, const cql_config& cfg) override;
    virtual future<> check_access(query_processor& qp, const service::client_state&) const override;
    virtual future<::shared_ptr<cql_transport::messages::result_message>>
    execute(query_processor&, service::query_state&, const query_options&, std::optional<service::group0_guard> guard) const override;
--- a/cql3/statements/alter_table_statement.cc
+++ b/cql3/statements/alter_table_statement.cc
@@ -14,6 +14,7 @@
 #include "utils/assert.hh"
 #include <seastar/core/coroutine.hh>
 #include "cql3/query_options.hh"
+#include "cql3/cql_config.hh"
 #include "cql3/statements/alter_table_statement.hh"
 #include "cql3/statements/alter_type_statement.hh"
 #include "exceptions/exceptions.hh"
@@ -560,7 +561,7 @@ alter_table_statement::prepare_schema_mutations(query_processor& qp, const query
 }

 std::unique_ptr<cql3::statements::prepared_statement>
-cql3::statements::alter_table_statement::prepare(data_dictionary::database db, cql_stats& stats) {
+cql3::statements::alter_table_statement::prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) {
    // Cannot happen; alter_table_statement is never instantiated as a raw statement
    // (instead we instantiate alter_table_statement::raw_statement)
    utils::on_internal_error("alter_table_statement cannot be prepared. Use alter_table_statement::raw_statement instead");
@@ -589,10 +590,10 @@ alter_table_statement::raw_statement::raw_statement(cf_name name,
    {}

 std::unique_ptr<cql3::statements::prepared_statement>
-alter_table_statement::raw_statement::prepare(data_dictionary::database db, cql_stats& stats) {
+alter_table_statement::raw_statement::prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) {
    auto t = db.try_find_table(keyspace(), column_family());
    std::optional<schema_ptr> s = t ? std::make_optional(t->schema()) : std::nullopt;
-    std::optional<sstring> warning = check_restricted_table_properties(db, s, keyspace(), column_family(), *_properties);
+    std::optional<sstring> warning = check_restricted_table_properties(s, keyspace(), column_family(), *_properties, cfg.twcs_restrictions);
    if (warning) {
        // FIXME: should this warning be returned to the caller?
        // See https://github.com/scylladb/scylladb/issues/20945
--- a/cql3/statements/alter_table_statement.hh
+++ b/cql3/statements/alter_table_statement.hh
@@ -64,7 +64,7 @@ public:

    virtual uint32_t get_bound_terms() const override;
    virtual future<> check_access(query_processor& qp, const service::client_state& state) const override;
-    virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
+    virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) override;
    virtual future<::shared_ptr<messages::result_message>> execute(query_processor& qp, service::query_state& state, const query_options& options, std::optional<service::group0_guard> guard) const override;

    future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, utils::chunked_vector<mutation>, cql3::cql_warnings_vec>> prepare_schema_mutations(query_processor& qp, const query_options& options, api::timestamp_type) const override;
@@ -92,7 +92,7 @@ public:
                  std::unique_ptr<attributes::raw> attrs,
                  shared_ptr<column_identifier::raw> ttl_change);
    
-    virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
+    virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) override;

    virtual audit::statement_category category() const override { return audit::statement_category::DDL; }
 };
--- a/cql3/statements/alter_type_statement.cc
+++ b/cql3/statements/alter_type_statement.cc
@@ -209,12 +209,12 @@ user_type alter_type_statement::renames::make_updated_type(data_dictionary::data
 }

 std::unique_ptr<cql3::statements::prepared_statement>
-alter_type_statement::add_or_alter::prepare(data_dictionary::database db, cql_stats& stats) {
+alter_type_statement::add_or_alter::prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) {
    return std::make_unique<prepared_statement>(audit_info(), make_shared<alter_type_statement::add_or_alter>(*this));
 }

 std::unique_ptr<cql3::statements::prepared_statement>
-alter_type_statement::renames::prepare(data_dictionary::database db, cql_stats& stats) {
+alter_type_statement::renames::prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) {
    return std::make_unique<prepared_statement>(audit_info(), make_shared<alter_type_statement::renames>(*this));
 }

--- a/cql3/statements/alter_type_statement.hh
+++ b/cql3/statements/alter_type_statement.hh
@@ -54,7 +54,7 @@ public:
                 const shared_ptr<column_identifier> field_name,
                 const shared_ptr<cql3_type::raw> field_type);
    virtual user_type make_updated_type(data_dictionary::database db, user_type to_update) const override;
-    virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
+    virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) override;
 private:
    user_type do_add(data_dictionary::database db, user_type to_update) const;
    user_type do_alter(data_dictionary::database db, user_type to_update) const;
@@ -71,7 +71,7 @@ public:
    void add_rename(shared_ptr<column_identifier> previous_name, shared_ptr<column_identifier> new_name);

    virtual user_type make_updated_type(data_dictionary::database db, user_type to_update) const override;
-    virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
+    virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) override;
 };

 }
--- a/cql3/statements/alter_view_statement.cc
+++ b/cql3/statements/alter_view_statement.cc
@@ -98,7 +98,7 @@ future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, utils::chun
 }

 std::unique_ptr<cql3::statements::prepared_statement>
-alter_view_statement::prepare(data_dictionary::database db, cql_stats& stats) {
+alter_view_statement::prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) {
    return std::make_unique<prepared_statement>(audit_info(), make_shared<alter_view_statement>(*this));
 }

--- a/cql3/statements/alter_view_statement.hh
+++ b/cql3/statements/alter_view_statement.hh
@@ -35,7 +35,7 @@ public:

    future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, utils::chunked_vector<mutation>, cql3::cql_warnings_vec>> prepare_schema_mutations(query_processor& qp, const query_options& options, api::timestamp_type) const override;

-    virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
+    virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) override;
 };

 }
--- a/cql3/statements/attach_service_level_statement.cc
+++ b/cql3/statements/attach_service_level_statement.cc
@@ -30,7 +30,7 @@ bool attach_service_level_statement::needs_guard(query_processor& qp, service::q

 std::unique_ptr<cql3::statements::prepared_statement>
 cql3::statements::attach_service_level_statement::prepare(
-        data_dictionary::database db, cql_stats &stats) {
+        data_dictionary::database db, cql_stats &stats, const cql_config& cfg) {
    return std::make_unique<prepared_statement>(audit_info(), ::make_shared<attach_service_level_statement>(*this));
 }

--- a/cql3/statements/attach_service_level_statement.hh
+++ b/cql3/statements/attach_service_level_statement.hh
@@ -22,7 +22,7 @@ class attach_service_level_statement final : public service_level_statement {
 public:
    attach_service_level_statement(sstring service_level, sstring role_name);
    virtual bool needs_guard(query_processor& qp, service::query_state&) const override;
-    std::unique_ptr<cql3::statements::prepared_statement> prepare(data_dictionary::database db, cql_stats &stats) override;
+    std::unique_ptr<cql3::statements::prepared_statement> prepare(data_dictionary::database db, cql_stats &stats, const cql_config& cfg) override;
    virtual future<> check_access(query_processor& qp, const service::client_state&) const override;
    virtual future<::shared_ptr<cql_transport::messages::result_message>>
    execute(query_processor&, service::query_state&, const query_options&, std::optional<service::group0_guard> guard) const override;
--- a/cql3/statements/batch_statement.cc
+++ b/cql3/statements/batch_statement.cc
@@ -10,7 +10,7 @@
 #include "batch_statement.hh"
 #include "cql3/util.hh"
 #include "raw/batch_statement.hh"
-#include "db/config.hh"
+#include "cql3/cql_config.hh"
 #include "db/consistency_level_validations.hh"
 #include "data_dictionary/data_dictionary.hh"
 #include <seastar/core/execution_stage.hh>
@@ -195,8 +195,8 @@ void batch_statement::verify_batch_size(query_processor& qp, const utils::chunke
        return;     // We only warn for batch spanning multiple mutations
    }

-    size_t warn_threshold = qp.db().get_config().batch_size_warn_threshold_in_kb() * 1024;
-    size_t fail_threshold = qp.db().get_config().batch_size_fail_threshold_in_kb() * 1024;
+    size_t warn_threshold = qp.get_cql_config().batch_size_warn_threshold_in_kb() * 1024;
+    size_t fail_threshold = qp.get_cql_config().batch_size_fail_threshold_in_kb() * 1024;

    size_t size = 0;
    for (auto&m : mutations) {
@@ -242,7 +242,7 @@ future<shared_ptr<cql_transport::messages::result_message>> batch_statement::exe

 future<shared_ptr<cql_transport::messages::result_message>> batch_statement::execute_without_checking_exception_message(
        query_processor& qp, service::query_state& state, const query_options& options, std::optional<service::group0_guard> guard) const {
-    cql3::util::validate_timestamp(qp.db().get_config(), options, _attrs);
+    cql3::util::validate_timestamp(qp.get_cql_config(), options, _attrs);
    return batch_stage(this, seastar::ref(qp), seastar::ref(state),
                       seastar::cref(options), false, options.get_timestamp(state));
 }
@@ -441,7 +441,7 @@ void batch_statement::build_cas_result_set_metadata() {
 namespace raw {

 std::unique_ptr<prepared_statement>
-batch_statement::prepare(data_dictionary::database db, cql_stats& stats) {
+batch_statement::prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) {
    auto&& meta = get_prepare_context();

    std::optional<sstring> first_ks;
--- a/cql3/statements/cf_prop_defs.cc
+++ b/cql3/statements/cf_prop_defs.cc
@@ -197,7 +197,7 @@ void cf_prop_defs::validate(const data_dictionary::database db, sstring ks_name,
        if (!db.features().tablet_options) {
            throw exceptions::configuration_exception("tablet options cannot be used until all nodes in the cluster enable this feature");
        }
-        db::tablet_options::validate(*tablet_options_map);
+        db::tablet_options::validate(*tablet_options_map, db.features());
    }

    if (has_property(KW_STORAGE_ENGINE)) {
@@ -206,9 +206,6 @@ void cf_prop_defs::validate(const data_dictionary::database db, sstring ks_name,
            if (!db.features().logstor) {
                throw exceptions::configuration_exception(format("The experimental feature 'logstor' must be enabled in order to use the 'logstor' storage engine."));
            }
-            if (!db.get_config().enable_logstor()) {
-                throw exceptions::configuration_exception(format("The configuration option 'enable_logstor' must be set to true in the configuration in order to use the 'logstor' storage engine."));
-            }
        } else {
            throw exceptions::configuration_exception(format("Illegal value for '{}'", KW_STORAGE_ENGINE));
        }
--- a/cql3/statements/create_aggregate_statement.cc
+++ b/cql3/statements/create_aggregate_statement.cc
@@ -78,7 +78,7 @@ seastar::future<shared_ptr<db::functions::function>> create_aggregate_statement:
    co_return ::make_shared<functions::user_aggregate>(_name, initcond, std::move(state_func), std::move(reduce_func), std::move(final_func));
 }

-std::unique_ptr<prepared_statement> create_aggregate_statement::prepare(data_dictionary::database db, cql_stats& stats) {
+std::unique_ptr<prepared_statement> create_aggregate_statement::prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) {
    return std::make_unique<prepared_statement>(audit_info(), make_shared<create_aggregate_statement>(*this));
 }

--- a/cql3/statements/create_aggregate_statement.hh
+++ b/cql3/statements/create_aggregate_statement.hh
@@ -24,7 +24,7 @@ namespace functions {
 namespace statements {

 class create_aggregate_statement final : public create_function_statement_base {
-    virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
+    virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) override;
    future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, utils::chunked_vector<mutation>, cql3::cql_warnings_vec>> prepare_schema_mutations(query_processor& qp, const query_options& options, api::timestamp_type) const override;
    virtual future<> check_access(query_processor& qp, const service::client_state& state) const override;

--- a/cql3/statements/create_function_statement.cc
+++ b/cql3/statements/create_function_statement.cc
@@ -54,7 +54,7 @@ create_function_statement::audit_info() const {
    return audit::audit::create_audit_info(category(), sstring(), sstring());
 }

-std::unique_ptr<prepared_statement> create_function_statement::prepare(data_dictionary::database db, cql_stats& stats) {
+std::unique_ptr<prepared_statement> create_function_statement::prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) {
    return std::make_unique<prepared_statement>(audit_info(), make_shared<create_function_statement>(*this));
 }

--- a/Show More
+++ b/Show More