diff --git a/alternator/CMakeLists.txt b/alternator/CMakeLists.txt
index 4cbe0691e4..80edc23bc1 100644
--- a/alternator/CMakeLists.txt
+++ b/alternator/CMakeLists.txt
@@ -9,6 +9,7 @@ target_sources(alternator
     controller.cc
     server.cc
     executor.cc
+    executor_read.cc
     stats.cc
     serialization.cc
     expressions.cc
diff --git a/alternator/attribute_path.hh b/alternator/attribute_path.hh
new file mode 100644
index 0000000000..8ae753c6ad
--- /dev/null
+++ b/alternator/attribute_path.hh
@@ -0,0 +1,253 @@
+/*
+ * Copyright 2019-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
+ */
+
+#pragma once
+
+#include <map>
+#include <memory>
+#include <optional>
+#include <string>
+#include <unordered_map>
+#include <variant>
+
+#include "utils/rjson.hh"
+#include "utils/overloaded_functor.hh"
+#include "alternator/error.hh"
+#include "alternator/expressions_types.hh"
+
+namespace alternator {
+
+// An attribute_path_map object is used to hold data for various attributes
+// paths (parsed::path) in a hierarchy of attribute paths. Each attribute path
+// has a root attribute, and then modified by member and index operators -
+// for example in "a.b[2].c" we have "a" as the root, then ".b" member, then
+// "[2]" index, and finally ".c" member.
+// Data can be added to an attribute_path_map using the add() function, but
+// requires that attributes with data not be *overlapping* or *conflicting*:
+//
+// 1. Two attribute paths which are identical or an ancestor of one another
+//    are considered *overlapping* and not allowed. If a.b.c has data,
+//    we can't add more data in a.b.c or any of its descendants like a.b.c.d.
+//
+// 2. Two attribute paths which need the same parent to have both a member and
+//    an index are considered *conflicting* and not allowed. E.g., if a.b has
+//    data, you can't add a[1]. The meaning of adding both would be that the
+//    attribute a is both a map and an array, which isn't sensible.
+//
+// These two requirements are common to the two places where Alternator uses
+// this abstraction to describe how a hierarchical item is to be transformed:
+//
+// 1. In ProjectExpression: for filtering from a full top-level attribute
+//    only the parts for which user asked in ProjectionExpression.
+//
+// 2. In UpdateExpression: for taking the previous value of a top-level
+//    attribute, and modifying it based on the instructions in the user
+//    wrote in UpdateExpression.
+
+template<typename T>
+class attribute_path_map_node {
+public:
+    using data_t = T;
+    // We need the extra unique_ptr<> here because libstdc++ unordered_map
+    // doesn't work with incomplete types :-(
+    using members_t =  std::unordered_map<std::string, std::unique_ptr<attribute_path_map_node<T>>>;
+    // The indexes list is sorted because DynamoDB requires handling writes
+    // beyond the end of a list in index order.
+    using indexes_t = std::map<unsigned, std::unique_ptr<attribute_path_map_node<T>>>;
+    // The prohibition on "overlap" and "conflict" explained above means
+    // That only one of data, members or indexes is non-empty.
+    std::optional<std::variant<data_t, members_t, indexes_t>> _content;
+
+    bool is_empty() const { return !_content; }
+    bool has_value() const { return _content && std::holds_alternative<data_t>(*_content); }
+    bool has_members() const { return _content && std::holds_alternative<members_t>(*_content); }
+    bool has_indexes() const { return _content && std::holds_alternative<indexes_t>(*_content); }
+    // get_members() assumes that has_members() is true
+    members_t& get_members() { return std::get<members_t>(*_content); }
+    const members_t& get_members() const { return std::get<members_t>(*_content); }
+    indexes_t& get_indexes() { return std::get<indexes_t>(*_content); }
+    const indexes_t& get_indexes() const { return std::get<indexes_t>(*_content); }
+    T& get_value() { return std::get<T>(*_content); }
+    const T& get_value() const { return std::get<T>(*_content); }
+};
+
+template<typename T>
+using attribute_path_map = std::unordered_map<std::string, attribute_path_map_node<T>>;
+
+using attrs_to_get_node = attribute_path_map_node<std::monostate>;
+// attrs_to_get lists which top-level attribute are needed, and possibly also
+// which part of the top-level attribute is really needed (when nested
+// attribute paths appeared in the query).
+// Most code actually uses optional<attrs_to_get>. There, a disengaged
+// optional means we should get all attributes, not specific ones.
+using attrs_to_get = attribute_path_map<std::monostate>;
+
+// takes a given JSON value and drops its parts which weren't asked to be
+// kept. It modifies the given JSON value, or returns false to signify that
+// the entire object should be dropped.
+// Note that The JSON value is assumed to be encoded using the DynamoDB
+// conventions - i.e., it is really a map whose key has a type string,
+// and the value is the real object.
+template<typename T>
+bool hierarchy_filter(rjson::value& val, const attribute_path_map_node<T>& h) {
+    if (!val.IsObject() || val.MemberCount() != 1) {
+        // This shouldn't happen. We shouldn't have stored malformed objects.
+        // But today Alternator does not validate the structure of nested
+        // documents before storing them, so this can happen on read.
+        throw api_error::internal(format("Malformed value object read: {}", val));
+    }
+    const char* type = val.MemberBegin()->name.GetString();
+    rjson::value& v = val.MemberBegin()->value;
+    if (h.has_members()) {
+        const auto& members = h.get_members();
+        if (type[0] != 'M' || !v.IsObject()) {
+            // If v is not an object (dictionary, map), none of the members
+            // can match.
+            return false;
+        }
+        rjson::value newv = rjson::empty_object();
+        for (auto it = v.MemberBegin(); it != v.MemberEnd(); ++it) {
+            std::string attr = rjson::to_string(it->name);
+            auto x = members.find(attr);
+            if (x != members.end()) {
+                if (x->second) {
+                    // Only a part of this attribute is to be filtered, do it.
+                    if (hierarchy_filter(it->value, *x->second)) {
+                        // because newv started empty and attr are unique
+                        // (keys of v), we can use add() here
+                        rjson::add_with_string_name(newv, attr, std::move(it->value));
+                    }
+                } else {
+                    // The entire attribute is to be kept
+                    rjson::add_with_string_name(newv, attr, std::move(it->value));
+                }
+            }
+        }
+        if (newv.MemberCount() == 0) {
+            return false;
+        }
+        v = newv;
+    } else if (h.has_indexes()) {
+        const auto& indexes = h.get_indexes();
+        if (type[0] != 'L' || !v.IsArray()) {
+            return false;
+        }
+        rjson::value newv = rjson::empty_array();
+        const auto& a = v.GetArray();
+        for (unsigned i = 0; i < v.Size(); i++) {
+            auto x = indexes.find(i);
+            if (x != indexes.end()) {
+                if (x->second) {
+                    if (hierarchy_filter(a[i], *x->second)) {
+                        rjson::push_back(newv, std::move(a[i]));
+                    }
+                } else {
+                    // The entire attribute is to be kept
+                    rjson::push_back(newv, std::move(a[i]));
+                }
+            }
+        }
+        if (newv.Size() == 0) {
+            return false;
+        }
+        v = newv;
+    }
+    return true;
+}
+
+// Add a path to an attribute_path_map. Throws a validation error if the path
+// "overlaps" with one already in the filter (one is a sub-path of the other)
+// or "conflicts" with it (both a member and index is requested).
+template<typename T>
+void attribute_path_map_add(const char* source, attribute_path_map<T>& map, const parsed::path& p, T value = {}) {
+   using node = attribute_path_map_node<T>;
+    // The first step is to look for the top-level attribute (p.root()):
+    auto it = map.find(p.root());
+    if (it == map.end()) {
+        if (p.has_operators()) {
+            it = map.emplace(p.root(), node {std::nullopt}).first;
+        } else {
+            (void) map.emplace(p.root(), node {std::move(value)}).first;
+            // Value inserted for top-level node. We're done.
+            return;
+        }
+    } else if(!p.has_operators()) {
+        // If p is top-level and we already have it or a part of it
+        // in map, it's a forbidden overlapping path.
+        throw api_error::validation(fmt::format(
+            "Invalid {}: two document paths overlap at {}", source, p.root()));
+    } else if (it->second.has_value()) {
+        // If we're here, it != map.end() && p.has_operators && it->second.has_value().
+        // This means the top-level attribute already has a value, and we're
+        // trying to add a non-top-level value. It's an overlap.
+        throw api_error::validation(fmt::format("Invalid {}: two document paths overlap at {}", source, p.root()));
+    }
+    node* h = &it->second;
+    // The second step is to walk h from the top-level node to the inner node
+    // where we're supposed to insert the value:
+    for (const auto& op : p.operators()) {
+        std::visit(overloaded_functor {
+            [&] (const std::string& member) {
+                if (h->is_empty()) {
+                    *h = node {typename node::members_t()};
+                } else if (h->has_indexes()) {
+                    throw api_error::validation(format("Invalid {}: two document paths conflict at {}", source, p));
+                } else if (h->has_value()) {
+                    throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
+                }
+                typename node::members_t& members = h->get_members();
+                auto it = members.find(member);
+                if (it == members.end()) {
+                    it = members.insert({member, std::make_unique<node>()}).first;
+                }
+                h = it->second.get();
+            },
+            [&] (unsigned index) {
+                if (h->is_empty()) {
+                    *h = node {typename node::indexes_t()};
+                } else if (h->has_members()) {
+                    throw api_error::validation(format("Invalid {}: two document paths conflict at {}", source, p));
+                } else if (h->has_value()) {
+                    throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
+                }
+                typename node::indexes_t& indexes = h->get_indexes();
+                auto it = indexes.find(index);
+                if (it == indexes.end()) {
+                    it = indexes.insert({index, std::make_unique<node>()}).first;
+                }
+                h = it->second.get();
+            }
+        }, op);
+    }
+    // Finally, insert the value in the node h.
+    if (h->is_empty()) {
+        *h = node {std::move(value)};
+    } else {
+        throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
+    }
+}
+
+// A very simplified version of the above function for the special case of
+// adding only top-level attribute. It's not only simpler, we also use a
+// different error message, referring to a "duplicate attribute" instead of
+// "overlapping paths". DynamoDB also has this distinction (errors in
+// AttributesToGet refer to duplicates, not overlaps, but errors in
+// ProjectionExpression refer to overlap - even if it's an exact duplicate).
+template<typename T>
+void attribute_path_map_add(const char* source, attribute_path_map<T>& map, const std::string& attr, T value = {}) {
+   using node = attribute_path_map_node<T>;
+    auto it = map.find(attr);
+    if (it == map.end()) {
+        map.emplace(attr, node {std::move(value)});
+    } else {
+        throw api_error::validation(fmt::format(
+            "Invalid {}: Duplicate attribute: {}", source, attr));
+    }
+}
+
+} // namespace alternator
diff --git a/alternator/executor.cc b/alternator/executor.cc
index 95de4b7232..b4c151b13f 100644
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
@@ -71,8 +71,6 @@
 
 using namespace std::chrono_literals;
 
-logging::logger elogger("alternator-executor");
-
 namespace std {
     template <> struct hash<std::pair<sstring, sstring>> {
         size_t operator () (const std::pair<sstring, sstring>& p) const {
@@ -83,6 +81,8 @@ namespace std {
 
 namespace alternator {
 
+logging::logger elogger("alternator-executor");
+
 // Alternator-specific table properties stored as hidden table tags:
 //
 // Alternator doesn't keep its own records of which Alternator tables exist
@@ -180,7 +180,7 @@ void executor::maybe_audit(
 static lw_shared_ptr<keyspace_metadata> create_keyspace_metadata(std::string_view keyspace_name, service::storage_proxy& sp, gms::gossiper& gossiper, api::timestamp_type,
         const std::map<sstring, sstring>& tags_map, const gms::feature_service& feat, const db::tablets_mode_t::mode tablets_mode);
 
-static map_type attrs_type() {
+map_type attrs_type() {
     static thread_local auto t = map_type_impl::get_instance(utf8_type, bytes_type, true);
     return t;
 }
@@ -192,7 +192,7 @@ static const column_definition& attrs_column(const schema& schema) {
 }
 
 
-static lw_shared_ptr<stats> get_stats_from_schema(service::storage_proxy& sp, const schema& schema) {
+lw_shared_ptr<stats> get_stats_from_schema(service::storage_proxy& sp, const schema& schema) {
     try {
         replica::table& table = sp.local_db().find_column_family(schema.id());
         if (!table.get_stats().alternator_stats) {
@@ -223,35 +223,6 @@ executor::body_writer make_streamed(rjson::value&& value) {
     };
 }
 
-// make_streamed_with_extra_array() is variant of make_streamed() above, which
-// builds a streaming response (a function writing to an output stream) from a
-// JSON object (rjson::value) but adds to it at the end an additional array.
-// The extra array is given a separate chunked_vector to avoid putting it
-// inside the rjson::value - because RapidJSON does contiguous allocations for
-// arrays which we want to avoid for potentially long arrays in Query/Scan
-// responses (see #23535).
-// If we ever fix RapidJSON to avoid contiguous allocations for arrays, or
-// replace it entirely (#24458), we can remove this function and the function
-// rjson::print_with_extra_array() which it calls.
-executor::body_writer make_streamed_with_extra_array(rjson::value&& value,
-    std::string array_name, utils::chunked_vector<rjson::value>&& array) {
-    return [value = std::move(value), array_name = std::move(array_name), array = std::move(array)](output_stream<char>&& _out) mutable -> future<> {
-        auto out = std::move(_out);
-        std::exception_ptr ex;
-        try {
-            co_await rjson::print_with_extra_array(value, array_name, array, out);
-        } catch (...) {
-            ex = std::current_exception();
-        }
-        co_await out.close();
-        co_await rjson::destroy_gently(std::move(value));
-        // TODO: can/should we also destroy the array gently?
-        if (ex) {
-            co_await coroutine::return_exception_ptr(std::move(ex));
-        }
-    };
-}
-
 // This function throws api_error::validation if input value is not an object.
 static void validate_is_object(const rjson::value& value, const char* caller) {
     if (!value.IsObject()) {
@@ -473,7 +444,7 @@ static void validate_cdc_log_name_length(std::string_view table_name) {
 // instead of each component individually as DynamoDB does.
 // The view_name() function assumes the table_name has already been validated
 // but validates the legality of index_name and the combination of both.
-static std::string view_name(std::string_view table_name, std::string_view index_name, const std::string& delim = ":", bool validate_len = true) {
+std::string view_name(std::string_view table_name, std::string_view index_name, const std::string& delim, bool validate_len) {
     if (index_name.length() < 3) {
         throw api_error::validation("IndexName must be at least 3 characters long");
     }
@@ -490,11 +461,11 @@ static std::string view_name(std::string_view table_name, std::string_view index
     return ret;
 }
 
-static std::string gsi_name(std::string_view table_name, std::string_view index_name, bool validate_len = true) {
+std::string gsi_name(std::string_view table_name, std::string_view index_name, bool validate_len) {
     return view_name(table_name, index_name, ":", validate_len);
 }
 
-static std::string lsi_name(std::string_view table_name, std::string_view index_name, bool validate_len = true) {
+std::string lsi_name(std::string_view table_name, std::string_view index_name, bool validate_len) {
     return view_name(table_name, index_name, "!:", validate_len);
 }
 
@@ -515,7 +486,7 @@ static std::optional<std::string> find_table_name(const rjson::value& request) {
     return table_name;
 }
 
-static std::string get_table_name(const rjson::value& request) {
+std::string get_table_name(const rjson::value& request) {
     auto name = find_table_name(request);
     if (!name) {
         throw api_error::validation("Missing TableName field in request");
@@ -568,7 +539,7 @@ schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& request)
 // or an exception if it doesn't exist. Otherwise, if table_name does not
 // start with INTERNAL_TABLE_PREFIX, this function returns an empty schema_ptr
 // and the caller should look for a normal Alternator table with that name.
-static schema_ptr try_get_internal_table(data_dictionary::database db, std::string_view table_name) {
+schema_ptr try_get_internal_table(data_dictionary::database db, std::string_view table_name) {
     size_t it = table_name.find(executor::INTERNAL_TABLE_PREFIX);
     if (it != 0) {
         return schema_ptr{};
@@ -596,64 +567,11 @@ static schema_ptr try_get_internal_table(data_dictionary::database db, std::stri
         }
 }
 
-// get_table_or_view() is similar to to get_table(), except it returns either
-// a table or a materialized view from which to read, based on the TableName
-// and optional IndexName in the request. Only requests like Query and Scan
-// which allow IndexName should use this function.
-enum class table_or_view_type { base, lsi, gsi, vector_index };
-static std::pair<schema_ptr, table_or_view_type>
-get_table_or_view(service::storage_proxy& proxy, const rjson::value& request) {
-    table_or_view_type type = table_or_view_type::base;
-    std::string table_name = get_table_name(request);
-
-    if (schema_ptr s = try_get_internal_table(proxy.data_dictionary(), table_name)) {
-        return {s, type};
-    }
-
-    std::string keyspace_name = executor::KEYSPACE_NAME_PREFIX + table_name;
-    const rjson::value* index_name = rjson::find(request, "IndexName");
-    std::string orig_table_name;
-    if (index_name) {
-        if (index_name->IsString()) {
-            orig_table_name = std::move(table_name);
-            table_name = view_name(orig_table_name, rjson::to_string_view(*index_name));
-            type = table_or_view_type::gsi;
-        } else {
-            throw api_error::validation(
-                    fmt::format("Non-string IndexName '{}'", rjson::to_string_view(*index_name)));
-        }
-        // If no tables for global indexes were found, the index may be local
-        if (!proxy.data_dictionary().has_schema(keyspace_name, table_name)) {
-            type = table_or_view_type::lsi;
-            table_name = lsi_name(orig_table_name, rjson::to_string_view(*index_name));
-        }
-    }
-
-    try {
-        return { proxy.data_dictionary().find_schema(keyspace_name, table_name), type };
-    } catch(data_dictionary::no_such_column_family&) {
-        if (index_name) {
-            // DynamoDB returns a different error depending on whether the
-            // base table doesn't exist (ResourceNotFoundException) or it
-            // does exist but the index does not (ValidationException).
-            if (proxy.data_dictionary().has_schema(keyspace_name, orig_table_name)) {
-                throw api_error::validation(
-                    fmt::format("Requested resource not found: Index '{}' for table '{}'", rjson::to_string_view(*index_name), orig_table_name));
-            } else {
-                throw api_error::resource_not_found(
-                    fmt::format("Requested resource not found: Table: {} not found", orig_table_name));
-            }
-        } else {
-            throw api_error::resource_not_found(
-                fmt::format("Requested resource not found: Table: {} not found", table_name));
-        }
-    }
-}
-
 // get_table_for_write() is similar to get_table(), but additionally, if the
 // configuration allows this, may also allow writing to system table with
-// prefix INTERNAL_TABLE_PREFIX. This is analogous to the function
-// get_table_or_view() above which allows *reading* internal tables.
+// prefix INTERNAL_TABLE_PREFIX. See also get_table_or_view() in
+// executor_read.cc which allows *reading* internal tables by the Query
+// operation.
 static schema_ptr get_table_for_write(service::storage_proxy& proxy, const rjson::value& request) {
     std::string table_name = get_table_name(request);
     if (schema_ptr s = try_get_internal_table(proxy.data_dictionary(), table_name)) {
@@ -685,7 +603,7 @@ static std::string get_string_attribute(const rjson::value& value, std::string_v
 // Convenience function for getting the value of a boolean attribute, or a
 // default value if it is missing. If the attribute exists, but is not a
 // bool, a descriptive api_error is thrown.
-static bool get_bool_attribute(const rjson::value& value, std::string_view attribute_name, bool default_return) {
+bool get_bool_attribute(const rjson::value& value, std::string_view attribute_name, bool default_return) {
     const rjson::value* attribute_value = rjson::find(value, attribute_name);
     if (!attribute_value) {
         return default_return;
@@ -700,7 +618,7 @@ static bool get_bool_attribute(const rjson::value& value, std::string_view attri
 // Convenience function for getting the value of an integer attribute, or
 // an empty optional if it is missing. If the attribute exists, but is not
 // an integer, a descriptive api_error is thrown.
-static std::optional<int> get_int_attribute(const rjson::value& value, std::string_view attribute_name) {
+std::optional<int> get_int_attribute(const rjson::value& value, std::string_view attribute_name) {
     const rjson::value* attribute_value = rjson::find(value, attribute_name);
     if (!attribute_value)
         return {};
@@ -2730,7 +2648,7 @@ public:
 // After calling pk_from_json() and ck_from_json() to extract the pk and ck
 // components of a key, and if that succeeded, call check_key() to further
 // check that the key doesn't have any spurious components.
-static void check_key(const rjson::value& key, const schema_ptr& schema) {
+void check_key(const rjson::value& key, const schema_ptr& schema) {
     if (key.MemberCount() != (schema->clustering_key_size() == 0 ? 1 : 2)) {
         throw api_error::validation("Given key attribute not in schema");
     }
@@ -3397,7 +3315,7 @@ static bool check_needs_read_before_write(const parsed::condition_expression& co
 
 // Fail the expression if it has unused attribute names or values. This is
 // how DynamoDB behaves, so we do too.
-static void verify_all_are_used(const rjson::value* field,
+void verify_all_are_used(const rjson::value* field,
         const std::unordered_set<std::string>& used, const char* field_name, const char* operation) {
     if (!field) {
         return;
@@ -3639,7 +3557,7 @@ future<executor::request_return_type> executor::delete_item(client_state& client
     co_return res;
 }
 
-static schema_ptr get_table_from_batch_request(const service::storage_proxy& proxy, const rjson::value::ConstMemberIterator& batch_request) {
+schema_ptr get_table_from_batch_request(const service::storage_proxy& proxy, const rjson::value::ConstMemberIterator& batch_request) {
     sstring table_name = rjson::to_sstring(batch_request->name); // JSON keys are always strings
     try {
         return proxy.data_dictionary().find_schema(sstring(executor::KEYSPACE_NAME_PREFIX) + table_name, table_name);
@@ -4024,280 +3942,6 @@ static const std::string_view get_item_type_string(const rjson::value& v) {
     return rjson::to_string_view(mem.name);
 }
 
-// attrs_to_get saves for each top-level attribute an attrs_to_get_node,
-// a hierarchy of subparts that need to be kept. The following function
-// takes a given JSON value and drops its parts which weren't asked to be
-// kept. It modifies the given JSON value, or returns false to signify that
-// the entire object should be dropped.
-// Note that The JSON value is assumed to be encoded using the DynamoDB
-// conventions - i.e., it is really a map whose key has a type string,
-// and the value is the real object.
-template<typename T>
-static bool hierarchy_filter(rjson::value& val, const attribute_path_map_node<T>& h) {
-    if (!val.IsObject() || val.MemberCount() != 1) {
-        // This shouldn't happen. We shouldn't have stored malformed objects.
-        // But today Alternator does not validate the structure of nested
-        // documents before storing them, so this can happen on read.
-        throw api_error::internal(format("Malformed value object read: {}", val));
-    }
-    const char* type = val.MemberBegin()->name.GetString();
-    rjson::value& v = val.MemberBegin()->value;
-    if (h.has_members()) {
-        const auto& members = h.get_members();
-        if (type[0] != 'M' || !v.IsObject()) {
-            // If v is not an object (dictionary, map), none of the members
-            // can match.
-            return false;
-        }
-        rjson::value newv = rjson::empty_object();
-        for (auto it = v.MemberBegin(); it != v.MemberEnd(); ++it) {
-            std::string attr = rjson::to_string(it->name);
-            auto x = members.find(attr);
-            if (x != members.end()) {
-                if (x->second) {
-                    // Only a part of this attribute is to be filtered, do it.
-                    if (hierarchy_filter(it->value, *x->second)) {
-                        // because newv started empty and attr are unique
-                        // (keys of v), we can use add() here
-                        rjson::add_with_string_name(newv, attr, std::move(it->value));
-                    }
-                } else {
-                    // The entire attribute is to be kept
-                    rjson::add_with_string_name(newv, attr, std::move(it->value));
-                }
-            }
-        }
-        if (newv.MemberCount() == 0) {
-            return false;
-        }
-        v = newv;
-    } else if (h.has_indexes()) {
-        const auto& indexes = h.get_indexes();
-        if (type[0] != 'L' || !v.IsArray()) {
-            return false;
-        }
-        rjson::value newv = rjson::empty_array();
-        const auto& a = v.GetArray();
-        for (unsigned i = 0; i < v.Size(); i++) {
-            auto x = indexes.find(i);
-            if (x != indexes.end()) {
-                if (x->second) {
-                    if (hierarchy_filter(a[i], *x->second)) {
-                        rjson::push_back(newv, std::move(a[i]));
-                    }
-                } else {
-                    // The entire attribute is to be kept
-                    rjson::push_back(newv, std::move(a[i]));
-                }
-            }
-        }
-        if (newv.Size() == 0) {
-            return false;
-        }
-        v = newv;
-    }
-    return true;
-}
-
-// Add a path to an attribute_path_map. Throws a validation error if the path
-// "overlaps" with one already in the filter (one is a sub-path of the other)
-// or "conflicts" with it (both a member and index is requested).
-template<typename T>
-void attribute_path_map_add(const char* source, attribute_path_map<T>& map, const parsed::path& p, T value = {}) {
-   using node = attribute_path_map_node<T>;
-    // The first step is to look for the top-level attribute (p.root()):
-    auto it = map.find(p.root());
-    if (it == map.end()) {
-        if (p.has_operators()) {
-            it = map.emplace(p.root(), node {std::nullopt}).first;
-        } else {
-            (void) map.emplace(p.root(), node {std::move(value)}).first;
-            // Value inserted for top-level node. We're done.
-            return;
-        }
-    } else if(!p.has_operators()) {
-        // If p is top-level and we already have it or a part of it
-        // in map, it's a forbidden overlapping path.
-        throw api_error::validation(fmt::format(
-            "Invalid {}: two document paths overlap at {}", source, p.root()));
-    } else if (it->second.has_value()) {
-        // If we're here, it != map.end() && p.has_operators && it->second.has_value().
-        // This means the top-level attribute already has a value, and we're
-        // trying to add a non-top-level value. It's an overlap.
-        throw api_error::validation(fmt::format("Invalid {}: two document paths overlap at {}", source, p.root()));
-    }
-    node* h = &it->second;
-    // The second step is to walk h from the top-level node to the inner node
-    // where we're supposed to insert the value:
-    for (const auto& op : p.operators()) {
-        std::visit(overloaded_functor {
-            [&] (const std::string& member) {
-                if (h->is_empty()) {
-                    *h = node {typename node::members_t()};
-                } else if (h->has_indexes()) {
-                    throw api_error::validation(format("Invalid {}: two document paths conflict at {}", source, p));
-                } else if (h->has_value()) {
-                    throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
-                }
-                typename node::members_t& members = h->get_members();
-                auto it = members.find(member);
-                if (it == members.end()) {
-                    it = members.insert({member, std::make_unique<node>()}).first;
-                }
-                h = it->second.get();
-            },
-            [&] (unsigned index) {
-                if (h->is_empty()) {
-                    *h = node {typename node::indexes_t()};
-                } else if (h->has_members()) {
-                    throw api_error::validation(format("Invalid {}: two document paths conflict at {}", source, p));
-                } else if (h->has_value()) {
-                    throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
-                }
-                typename node::indexes_t& indexes = h->get_indexes();
-                auto it = indexes.find(index);
-                if (it == indexes.end()) {
-                    it = indexes.insert({index, std::make_unique<node>()}).first;
-                }
-                h = it->second.get();
-            }
-        }, op);
-    }
-    // Finally, insert the value in the node h.
-    if (h->is_empty()) {
-        *h = node {std::move(value)};
-    } else {
-        throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
-    }
-}
-
-// A very simplified version of the above function for the special case of
-// adding only top-level attribute. It's not only simpler, we also use a
-// different error message, referring to a "duplicate attribute"instead of
-// "overlapping paths". DynamoDB also has this distinction (errors in
-// AttributesToGet refer to duplicates, not overlaps, but errors in
-// ProjectionExpression refer to overlap - even if it's an exact duplicate).
-template<typename T>
-void attribute_path_map_add(const char* source, attribute_path_map<T>& map, const std::string& attr, T value = {}) {
-   using node = attribute_path_map_node<T>;
-    auto it = map.find(attr);
-    if (it == map.end()) {
-        map.emplace(attr, node {std::move(value)});
-    } else {
-        throw api_error::validation(fmt::format(
-            "Invalid {}: Duplicate attribute: {}", source, attr));
-    }
-}
-
-// Parse the "Select" parameter of a Scan or Query operation, throwing a
-// ValidationException in various forbidden combinations of options and
-// finally returning one of three options:
-// 1. regular - the default scan behavior of returning all or specific
-//    attributes ("ALL_ATTRIBUTES" or "SPECIFIC_ATTRIBUTES").
-// 2. count - just count the items ("COUNT")
-// 3. projection - return projected attributes ("ALL_PROJECTED_ATTRIBUTES")
-// An ValidationException is thrown when recognizing an invalid combination
-// of options - such as ALL_PROJECTED_ATTRIBUTES for a base table, or
-// SPECIFIC_ATTRIBUTES without ProjectionExpression or AttributesToGet.
-enum class select_type { regular, count, projection };
-static select_type parse_select(const rjson::value& request, table_or_view_type table_type) {
-    const rjson::value* select_value = rjson::find(request, "Select");
-    if (!select_value) {
-        // If "Select" is not specified, it defaults to ALL_ATTRIBUTES
-        // on a base table or vector index, or ALL_PROJECTED_ATTRIBUTES on GSI/LSI.
-        return (table_type == table_or_view_type::base || table_type == table_or_view_type::vector_index) ?
-            select_type::regular : select_type::projection;
-    }
-    if (!select_value->IsString()) {
-        throw api_error::validation("Select parameter must be a string");
-    }
-    std::string_view select = rjson::to_string_view(*select_value);
-    const bool has_attributes_to_get = request.HasMember("AttributesToGet");
-    const bool has_projection_expression = request.HasMember("ProjectionExpression");
-    if (select == "SPECIFIC_ATTRIBUTES") {
-        if (has_projection_expression || has_attributes_to_get) {
-            return select_type::regular;
-        }
-        throw api_error::validation("Select=SPECIFIC_ATTRIBUTES requires AttributesToGet or ProjectionExpression");
-    }
-    if (has_projection_expression || has_attributes_to_get) {
-        throw api_error::validation("AttributesToGet or ProjectionExpression require Select to be either SPECIFIC_ATTRIBUTES or missing");
-    }
-    if (select == "COUNT") {
-        return select_type::count;
-    }
-    if (select == "ALL_ATTRIBUTES") {
-        // FIXME: when we support projections (#5036), if this is a GSI and
-        // not all attributes are projected to it, we should throw.
-        return select_type::regular;
-    }
-    if (select == "ALL_PROJECTED_ATTRIBUTES") {
-        if (table_type == table_or_view_type::base) {
-            throw api_error::validation("ALL_PROJECTED_ATTRIBUTES only allowed for indexes");
-        }
-        return select_type::projection;
-    }
-    throw api_error::validation(fmt::format("Unknown Select value '{}'. Allowed choices: ALL_ATTRIBUTES, SPECIFIC_ATTRIBUTES, ALL_PROJECTED_ATTRIBUTES, COUNT",
-        select));
-}
-
-// calculate_attrs_to_get() takes either AttributesToGet or
-// ProjectionExpression parameters (having both is *not* allowed),
-// and returns the list of cells we need to read, or a disengaged optional
-// when *all* attributes are to be returned.
-// However, in our current implementation, only top-level attributes are
-// stored as separate cells - a nested document is stored serialized together
-// (as JSON) in the same cell. So this function return a map - each key is the
-// top-level attribute we will need need to read, and the value for each
-// top-level attribute is the partial hierarchy (struct hierarchy_filter)
-// that we will need to extract from that serialized JSON.
-// For example, if ProjectionExpression lists a.b and a.c[2], we
-// return one top-level attribute name, "a", with the value "{b, c[2]}".
-
-static std::optional<attrs_to_get> calculate_attrs_to_get(const rjson::value& req, parsed::expression_cache& parsed_expression_cache, std::unordered_set<std::string>& used_attribute_names, select_type select = select_type::regular) {
-    if (select == select_type::count) {
-        // An empty map asks to retrieve no attributes. Note that this is
-        // different from a disengaged optional which means retrieve all.
-        return attrs_to_get();
-    }
-    // FIXME: also need to handle select_type::projection
-    const bool has_attributes_to_get = req.HasMember("AttributesToGet");
-    const bool has_projection_expression = req.HasMember("ProjectionExpression");
-    if (has_attributes_to_get && has_projection_expression) {
-        throw api_error::validation(
-                format("GetItem does not allow both ProjectionExpression and AttributesToGet to be given together"));
-    }
-    if (has_attributes_to_get) {
-        const rjson::value& attributes_to_get = req["AttributesToGet"];
-        attrs_to_get ret;
-        for (auto it = attributes_to_get.Begin(); it != attributes_to_get.End(); ++it) {
-            attribute_path_map_add("AttributesToGet", ret, rjson::to_string(*it));
-            validate_attr_name_length("AttributesToGet", it->GetStringLength(), false);
-        }
-        if (ret.empty()) {
-            throw api_error::validation("Empty AttributesToGet is not allowed. Consider using Select=COUNT instead.");
-        }
-        return ret;
-    } else if (has_projection_expression) {
-        const rjson::value& projection_expression = req["ProjectionExpression"];
-        const rjson::value* expression_attribute_names = rjson::find(req, "ExpressionAttributeNames");
-        std::vector<parsed::path> paths_to_get;
-        try {
-            paths_to_get = parsed_expression_cache.parse_projection_expression(rjson::to_string_view(projection_expression));
-        } catch(expressions_syntax_error& e) {
-            throw api_error::validation(e.what());
-        }
-        resolve_projection_expression(paths_to_get, expression_attribute_names, used_attribute_names);
-        attrs_to_get ret;
-        for (const parsed::path& p : paths_to_get) {
-            attribute_path_map_add("ProjectionExpression", ret, p);
-        }
-        return ret;
-    }
-    // An disengaged optional asks to read everything
-    return std::nullopt;
-}
-
 /**
  * Helper routine to extract data when we already have
  * row, etc etc.
@@ -4409,29 +4053,6 @@ std::optional<rjson::value> executor::describe_single_item(schema_ptr schema,
     return item;
 }
 
-future<std::vector<rjson::value>> executor::describe_multi_item(schema_ptr schema,
-        const query::partition_slice&& slice,
-        shared_ptr<cql3::selection::selection> selection,
-        foreign_ptr<lw_shared_ptr<query::result>> query_result,
-        shared_ptr<const std::optional<attrs_to_get>> attrs_to_get,
-        noncopyable_function<void(uint64_t)> item_callback) {
-    cql3::selection::result_set_builder builder(*selection, gc_clock::now());
-    query::result_view::consume(*query_result, slice, cql3::selection::result_set_builder::visitor(builder, *schema, *selection));
-    auto result_set = builder.build();
-    std::vector<rjson::value> ret;
-    for (auto& result_row : result_set->rows()) {
-        rjson::value item = rjson::empty_object();
-        uint64_t item_length_in_bytes = 0;
-        describe_single_item(*selection, result_row, *attrs_to_get, item, &item_length_in_bytes);
-        if (item_callback) {
-            item_callback(item_length_in_bytes);
-        }
-        ret.push_back(std::move(item));
-        co_await coroutine::maybe_yield();
-    }
-    co_return ret;
-}
-
 static bool check_needs_read_before_write(const parsed::value& v) {
     return std::visit(overloaded_functor {
         [&] (const parsed::constant& c) -> bool {
@@ -5219,106 +4840,6 @@ future<executor::request_return_type> executor::update_item(client_state& client
     co_return res;
 }
 
-// Check according to the request's "ConsistentRead" field, which consistency
-// level we need to use for the read. The field can be True for strongly
-// consistent reads, or False for eventually consistent reads, or if this
-// field is absence, we default to eventually consistent reads.
-// In Scylla, eventually-consistent reads are implemented as consistency
-// level LOCAL_ONE, and strongly-consistent reads as LOCAL_QUORUM.
-static db::consistency_level get_read_consistency(const rjson::value& request) {
-    const rjson::value* consistent_read_value = rjson::find(request, "ConsistentRead");
-    bool consistent_read = false;
-    if (consistent_read_value && !consistent_read_value->IsNull()) {
-        if (consistent_read_value->IsBool()) {
-            consistent_read = consistent_read_value->GetBool();
-        } else {
-            throw api_error::validation("ConsistentRead flag must be a boolean");
-        }
-    }
-    return consistent_read ? db::consistency_level::LOCAL_QUORUM : db::consistency_level::LOCAL_ONE;
-}
-
-// describe_item() wraps the result of describe_single_item() by a map
-// as needed by the GetItem request. It should not be used for other purposes,
-// use describe_single_item() instead.
-static rjson::value describe_item(schema_ptr schema,
-        const query::partition_slice& slice,
-        const cql3::selection::selection& selection,
-        const query::result& query_result,
-        const std::optional<attrs_to_get>& attrs_to_get,
-        consumed_capacity_counter& consumed_capacity,
-        uint64_t& metric) {
-    std::optional<rjson::value> opt_item = executor::describe_single_item(std::move(schema), slice, selection, std::move(query_result), attrs_to_get, &consumed_capacity._total_bytes);
-    rjson::value item_descr = rjson::empty_object();
-    if (opt_item) {
-        rjson::add(item_descr, "Item", std::move(*opt_item));
-    }
-    consumed_capacity.add_consumed_capacity_to_response_if_needed(item_descr);
-    metric += consumed_capacity.get_half_units();
-    return item_descr;
-}
-
-future<executor::request_return_type> executor::get_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
-    _stats.api_operations.get_item++;
-    auto start_time = std::chrono::steady_clock::now();
-    elogger.trace("Getting item {}", request);
-
-    schema_ptr schema = get_table(_proxy, request);
-    lw_shared_ptr<stats> per_table_stats = get_stats_from_schema(_proxy, *schema);
-    per_table_stats->api_operations.get_item++;
-    tracing::add_alternator_table_name(trace_state, schema->cf_name());
-
-    rjson::value& query_key = request["Key"];
-    db::consistency_level cl = get_read_consistency(request);
-
-    maybe_audit(audit_info, audit::statement_category::QUERY, schema->ks_name(), schema->cf_name(), "GetItem", request, cl);
-
-    partition_key pk = pk_from_json(query_key, schema);
-    dht::partition_range_vector partition_ranges{dht::partition_range(dht::decorate_key(*schema, pk))};
-
-    std::vector<query::clustering_range> bounds;
-    if (schema->clustering_key_size() == 0) {
-        bounds.push_back(query::clustering_range::make_open_ended_both_sides());
-    } else {
-        clustering_key ck = ck_from_json(query_key, schema);
-        bounds.push_back(query::clustering_range::make_singular(std::move(ck)));
-    }
-    check_key(query_key, schema);
-
-    //TODO(sarna): It would be better to fetch only some attributes of the map, not all
-    auto regular_columns =
-            schema->regular_columns() | std::views::transform(&column_definition::id)
-            | std::ranges::to<query::column_id_vector>();
-
-    auto selection = cql3::selection::selection::wildcard(schema);
-
-    auto partition_slice = query::partition_slice(std::move(bounds), {}, std::move(regular_columns), selection->get_query_options());
-    auto command = ::make_lw_shared<query::read_command>(schema->id(), schema->version(), partition_slice, _proxy.get_max_result_size(partition_slice),
-            query::tombstone_limit(_proxy.get_tombstone_limit()));
-
-    std::unordered_set<std::string> used_attribute_names;
-    auto attrs_to_get = calculate_attrs_to_get(request, *_parsed_expression_cache, used_attribute_names);
-    const rjson::value* expression_attribute_names = rjson::find(request, "ExpressionAttributeNames");
-    verify_all_are_used(expression_attribute_names, used_attribute_names, "ExpressionAttributeNames", "GetItem");
-    rcu_consumed_capacity_counter add_capacity(request, cl == db::consistency_level::LOCAL_QUORUM);
-    co_await verify_permission(_enforce_authorization, _warn_authorization, client_state, schema, auth::permission::SELECT, _stats);
-    service::storage_proxy::coordinator_query_result qr =
-        co_await _proxy.query(
-            schema, std::move(command), std::move(partition_ranges), cl,
-            service::storage_proxy::coordinator_query_options(executor::default_timeout(), std::move(permit), client_state, trace_state));
-    per_table_stats->api_operations.get_item_latency.mark(std::chrono::steady_clock::now() - start_time);
-    _stats.api_operations.get_item_latency.mark(std::chrono::steady_clock::now() - start_time);
-    uint64_t rcu_half_units = 0;
-    rjson::value res = describe_item(schema, partition_slice, *selection, *qr.query_result, std::move(attrs_to_get), add_capacity, rcu_half_units);
-    per_table_stats->rcu_half_units_total += rcu_half_units;
-    _stats.rcu_half_units_total += rcu_half_units;
-    // Update item size metrics only if we found an item.
-    if (qr.query_result->row_count().value_or(0) > 0) {
-        per_table_stats->operation_sizes.get_item_op_size_kb.add(bytes_to_kb_ceil(add_capacity._total_bytes));
-    }
-    co_return rjson::print(std::move(res));
-}
-
 static void check_big_object(const rjson::value& val, int& size_left);
 static void check_big_array(const rjson::value& val, int& size_left);
 
@@ -5372,1630 +4893,6 @@ static void check_big_object(const rjson::value& val, int& size_left) {
     }
 }
 
-future<executor::request_return_type> executor::batch_get_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
-    // FIXME: In this implementation, an unbounded batch size can cause
-    // unbounded response JSON object to be buffered in memory, unbounded
-    // parallelism of the requests, and unbounded amount of non-preemptable
-    // work in the following loops. So we should limit the batch size, and/or
-    // the response size, as DynamoDB does.
-    _stats.api_operations.batch_get_item++;
-    rjson::value& request_items = request["RequestItems"];
-    auto start_time = std::chrono::steady_clock::now();
-    // We need to validate all the parameters before starting any asynchronous
-    // query, and fail the entire request on any parse error. So we parse all
-    // the input into our own vector "requests", each element a table_requests
-    // listing all the request aimed at a single table. For efficiency, inside
-    // each table_requests we further group together all reads going to the
-    // same partition, so we can later send them together.
-    bool should_add_rcu = rcu_consumed_capacity_counter::should_add_capacity(request);
-    struct table_requests {
-        schema_ptr schema;
-        db::consistency_level cl;
-        ::shared_ptr<const std::optional<alternator::attrs_to_get>> attrs_to_get;
-        // clustering_keys keeps a sorted set of clustering keys. It must
-        // be sorted for the read below (see #10827). Additionally each
-        // clustering key is mapped to the original rjson::value "Key".
-        using clustering_keys = std::map<clustering_key, rjson::value*, clustering_key::less_compare>;
-        std::unordered_map<partition_key, clustering_keys, partition_key::hashing, partition_key::equality> requests;
-        table_requests(schema_ptr s)
-            : schema(std::move(s))
-            , requests(8, partition_key::hashing(*schema), partition_key::equality(*schema))
-        {}
-        void add(rjson::value& key) {
-            auto pk = pk_from_json(key, schema);
-            auto it = requests.find(pk);
-            if (it == requests.end()) {
-                it = requests.emplace(pk, clustering_key::less_compare(*schema)).first;
-            }
-            auto ck = ck_from_json(key, schema);
-            if (auto [_, inserted] = it->second.emplace(ck, &key); !inserted) {
-                throw api_error::validation("Provided list of item keys contains duplicates");
-            }
-        }
-    };
-    std::vector<table_requests> requests;
-    uint batch_size = 0;
-    for (auto it = request_items.MemberBegin(); it != request_items.MemberEnd(); ++it) {
-        table_requests rs(get_table_from_batch_request(_proxy, it));
-        tracing::add_alternator_table_name(trace_state, rs.schema->cf_name());
-        rs.cl = get_read_consistency(it->value);
-        std::unordered_set<std::string> used_attribute_names;
-        rs.attrs_to_get = ::make_shared<const std::optional<attrs_to_get>>(calculate_attrs_to_get(it->value, *_parsed_expression_cache, used_attribute_names));
-        const rjson::value* expression_attribute_names = rjson::find(it->value, "ExpressionAttributeNames");
-        verify_all_are_used(expression_attribute_names, used_attribute_names,"ExpressionAttributeNames", "GetItem");
-        auto& keys = (it->value)["Keys"];
-        for (rjson::value& key : keys.GetArray()) {
-            rs.add(key);
-            check_key(key, rs.schema);
-        }
-        batch_size += rs.requests.size();
-        requests.emplace_back(std::move(rs));
-    }
-
-    for (const table_requests& tr : requests) {
-        co_await verify_permission(_enforce_authorization, _warn_authorization, client_state, tr.schema, auth::permission::SELECT, _stats);
-    }
-
-    _stats.api_operations.batch_get_item_batch_total += batch_size;
-    _stats.api_operations.batch_get_item_histogram.add(batch_size);
-    // If we got here, all "requests" are valid, so let's start the
-    // requests for the different partitions all in parallel.
-    std::vector<future<std::vector<rjson::value>>> response_futures;
-    std::vector<uint64_t> consumed_rcu_half_units_per_table(requests.size());
-    for (size_t i = 0; i < requests.size(); i++) {
-        const table_requests& rs = requests[i];
-        bool is_quorum = rs.cl == db::consistency_level::LOCAL_QUORUM;
-        lw_shared_ptr<stats> per_table_stats = get_stats_from_schema(_proxy, *rs.schema);
-        per_table_stats->api_operations.batch_get_item_histogram.add(rs.requests.size());
-        for (const auto& [pk, cks] : rs.requests) {
-            dht::partition_range_vector partition_ranges{dht::partition_range(dht::decorate_key(*rs.schema, pk))};
-            std::vector<query::clustering_range> bounds;
-            if (rs.schema->clustering_key_size() == 0) {
-                bounds.push_back(query::clustering_range::make_open_ended_both_sides());
-            } else {
-                for (auto& ck : cks) {
-                    bounds.push_back(query::clustering_range::make_singular(ck.first));
-                }
-            }
-            auto regular_columns =
-                    rs.schema->regular_columns() | std::views::transform(&column_definition::id)
-                    | std::ranges::to<query::column_id_vector>();
-            auto selection = cql3::selection::selection::wildcard(rs.schema);
-            auto partition_slice = query::partition_slice(std::move(bounds), {}, std::move(regular_columns), selection->get_query_options());
-            auto command = ::make_lw_shared<query::read_command>(rs.schema->id(), rs.schema->version(), partition_slice, _proxy.get_max_result_size(partition_slice),
-                    query::tombstone_limit(_proxy.get_tombstone_limit()));
-            command->allow_limit = db::allow_per_partition_rate_limit::yes;
-            const auto item_callback = [is_quorum, per_table_stats, &rcus_per_table = consumed_rcu_half_units_per_table[i]](uint64_t size) {
-                rcus_per_table += rcu_consumed_capacity_counter::get_half_units(size, is_quorum);
-                // Update item size only if the item exists.
-                if (size > 0) {
-                    per_table_stats->operation_sizes.batch_get_item_op_size_kb.add(bytes_to_kb_ceil(size));
-                }
-            };
-            future<std::vector<rjson::value>> f = _proxy.query(rs.schema, std::move(command), std::move(partition_ranges), rs.cl,
-                    service::storage_proxy::coordinator_query_options(executor::default_timeout(), permit, client_state, trace_state)).then(
-                    [schema = rs.schema, partition_slice = std::move(partition_slice), selection = std::move(selection), attrs_to_get = rs.attrs_to_get, item_callback = std::move(item_callback)] (service::storage_proxy::coordinator_query_result qr) mutable {
-                utils::get_local_injector().inject("alternator_batch_get_item", [] { throw std::runtime_error("batch_get_item injection"); });
-                return describe_multi_item(std::move(schema), std::move(partition_slice), std::move(selection), std::move(qr.query_result), std::move(attrs_to_get), std::move(item_callback));
-            });
-            response_futures.push_back(std::move(f));
-        }
-    }
-
-    // Wait for all requests to complete, and then return the response.
-    // In case of full failure (no reads succeeded), an arbitrary error
-    // from one of the operations will be returned.
-    bool some_succeeded = false;
-    std::exception_ptr eptr;
-    std::set<sstring> table_names; // for auditing
-    // FIXME: will_log() here doesn't pass keyspace/table, so keyspace-level audit
-    // filtering is bypassed — a batch spanning multiple tables is audited as a whole.
-    bool should_audit = _audit.local_is_initialized() && _audit.local().will_log(audit::statement_category::QUERY);
-    rjson::value response = rjson::empty_object();
-    rjson::add(response, "Responses", rjson::empty_object());
-    rjson::add(response, "UnprocessedKeys", rjson::empty_object());
-    auto fut_it = response_futures.begin();
-    rjson::value consumed_capacity = rjson::empty_array();
-    for (size_t i = 0; i < requests.size(); i++) {
-        const table_requests& rs = requests[i];
-        std::string table = table_name(*rs.schema);
-        if (should_audit) {
-            table_names.insert(table);
-        }
-        for (const auto& [_, cks] : rs.requests) {
-            auto& fut = *fut_it;
-            ++fut_it;
-            try {
-                std::vector<rjson::value> results = co_await std::move(fut);
-                some_succeeded = true;
-                if (!response["Responses"].HasMember(table)) {
-                    rjson::add_with_string_name(response["Responses"], table, rjson::empty_array());
-                }
-                for (rjson::value& json : results) {
-                    rjson::push_back(response["Responses"][table], std::move(json));
-                }
-            } catch(...) {
-                eptr = std::current_exception();
-                // This read of potentially several rows in one partition,
-                // failed. We need to add the row key(s) to UnprocessedKeys.
-                if (!response["UnprocessedKeys"].HasMember(table)) {
-                    // Add the table's entry in UnprocessedKeys. Need to copy
-                    // all the table's parameters from the request except the
-                    // Keys field, which we start empty and then build below.
-                    rjson::add_with_string_name(response["UnprocessedKeys"], table, rjson::empty_object());
-                    rjson::value& unprocessed_item = response["UnprocessedKeys"][table];
-                    rjson::value& request_item = request_items[table];
-                    for (auto it = request_item.MemberBegin(); it != request_item.MemberEnd(); ++it) {
-                        if (it->name != "Keys") {
-                            rjson::add_with_string_name(unprocessed_item,
-                                rjson::to_string_view(it->name), rjson::copy(it->value));
-                        }
-                    }
-                    rjson::add_with_string_name(unprocessed_item, "Keys", rjson::empty_array());
-                }
-                for (auto& ck : cks) {
-                    rjson::push_back(response["UnprocessedKeys"][table]["Keys"], std::move(*ck.second));
-                }
-            }
-        }
-        uint64_t rcu_half_units = consumed_rcu_half_units_per_table[i];
-        _stats.rcu_half_units_total += rcu_half_units;
-        lw_shared_ptr<stats> per_table_stats = get_stats_from_schema(_proxy, *rs.schema);
-        per_table_stats->rcu_half_units_total += rcu_half_units;
-        if (should_add_rcu) {
-            rjson::value entry = rjson::empty_object();
-            rjson::add(entry, "TableName", table);
-            rjson::add(entry, "CapacityUnits", rcu_half_units*0.5);
-            rjson::push_back(consumed_capacity, std::move(entry));
-        }
-    }
-
-    if (should_add_rcu) {
-        rjson::add(response, "ConsumedCapacity", std::move(consumed_capacity));
-    }
-    elogger.trace("Unprocessed keys: {}", response["UnprocessedKeys"]);
-    // NOTE: Each table in the batch has its own CL (set by get_read_consistency()),
-    // but the audit entry records a single CL for the whole batch. We use ANY as a
-    // placeholder to indicate "mixed / not applicable".
-    // FIXME: Auditing is executed only for a complete success
-    maybe_audit(audit_info, audit::statement_category::QUERY, "",
-                print_names_for_audit(table_names), "BatchGetItem", request, db::consistency_level::ANY);
-    if (!some_succeeded && eptr) {
-        co_await coroutine::return_exception_ptr(std::move(eptr));
-    }
-    auto duration = std::chrono::steady_clock::now() - start_time;
-    _stats.api_operations.batch_get_item_latency.mark(duration);
-    for (const table_requests& rs : requests) {
-        lw_shared_ptr<stats> per_table_stats = get_stats_from_schema(_proxy, *rs.schema);
-        per_table_stats->api_operations.batch_get_item_latency.mark(duration);
-    }
-    if (is_big(response)) {
-        co_return make_streamed(std::move(response));
-    } else {
-        co_return rjson::print(std::move(response));
-    }
-}
-
-// "filter" represents a condition that can be applied to individual items
-// read by a Query or Scan operation, to decide whether to keep the item.
-// A filter is constructed from a Query or Scan request. This uses the
-// relevant fields in the query (FilterExpression or QueryFilter/ScanFilter +
-// ConditionalOperator). These fields are pre-checked and pre-parsed as much
-// as possible, to ensure that later checking of many items is efficient.
-class filter {
-private:
-    // Holding QueryFilter/ScanFilter + ConditionalOperator:
-    struct conditions_filter {
-        bool require_all;
-        rjson::value conditions;
-    };
-    // Holding a parsed FilterExpression:
-    struct expression_filter {
-        parsed::condition_expression expression;
-    };
-    std::optional<std::variant<conditions_filter, expression_filter>> _imp;
-public:
-    // Filtering for Scan and Query are very similar, but there are some
-    // small differences, especially the names of the request attributes.
-    enum class request_type { SCAN, QUERY };
-    // Note that a filter does not store pointers to the query used to
-    // construct it.
-    filter(parsed::expression_cache& parsed_expression_cache, const rjson::value& request, request_type rt,
-            std::unordered_set<std::string>& used_attribute_names,
-            std::unordered_set<std::string>& used_attribute_values);
-    bool check(const rjson::value& item) const;
-    bool filters_on(std::string_view attribute) const;
-    // for_filters_on() runs the given function on the attributes that the
-    // filter works on. It may run for the same attribute more than once if
-    // used more than once in the filter.
-    void for_filters_on(const noncopyable_function<void(std::string_view)>& func) const;
-    operator bool() const { return bool(_imp); }
-};
-
-filter::filter(parsed::expression_cache& parsed_expression_cache, const rjson::value& request, request_type rt,
-        std::unordered_set<std::string>& used_attribute_names,
-        std::unordered_set<std::string>& used_attribute_values) {
-    const rjson::value* expression = rjson::find(request, "FilterExpression");
-    const char* conditions_attribute = (rt == request_type::SCAN) ? "ScanFilter" : "QueryFilter";
-    const rjson::value* conditions = rjson::find(request, conditions_attribute);
-    auto conditional_operator = get_conditional_operator(request);
-    if (conditional_operator != conditional_operator_type::MISSING &&
-        (!conditions || (conditions->IsObject() && conditions->GetObject().ObjectEmpty()))) {
-            throw api_error::validation(
-                    format("'ConditionalOperator' parameter cannot be specified for missing or empty {}",
-                            conditions_attribute));
-    }
-    if (expression && conditions) {
-        throw api_error::validation(
-                format("FilterExpression and {} are not allowed together", conditions_attribute));
-    }
-    if (expression) {
-        if (!expression->IsString()) {
-            throw api_error::validation("FilterExpression must be a string");
-        }
-        if (expression->GetStringLength() == 0) {
-            throw api_error::validation("FilterExpression must not be empty");
-        }
-        if (rjson::find(request, "AttributesToGet")) {
-            throw api_error::validation("Cannot use both old-style and new-style parameters in same request: FilterExpression and AttributesToGet");
-        }
-        try {
-            auto parsed = parsed_expression_cache.parse_condition_expression(rjson::to_string_view(*expression), "FilterExpression");
-            const rjson::value* expression_attribute_names = rjson::find(request, "ExpressionAttributeNames");
-            const rjson::value* expression_attribute_values = rjson::find(request, "ExpressionAttributeValues");
-            resolve_condition_expression(parsed,
-                    expression_attribute_names, expression_attribute_values,
-                    used_attribute_names, used_attribute_values);
-            _imp = expression_filter { std::move(parsed) };
-        } catch(expressions_syntax_error& e) {
-            throw api_error::validation(e.what());
-        }
-    }
-    if (conditions) {
-        if (rjson::find(request, "ProjectionExpression")) {
-            throw api_error::validation(format("Cannot use both old-style and new-style parameters in same request: {} and ProjectionExpression", conditions_attribute));
-        }
-        bool require_all = conditional_operator != conditional_operator_type::OR;
-        _imp = conditions_filter { require_all, rjson::copy(*conditions) };
-    }
-}
-
-bool filter::check(const rjson::value& item) const {
-    if (!_imp) {
-        return true;
-    }
-    return std::visit(overloaded_functor {
-        [&] (const conditions_filter& f) -> bool {
-            return verify_condition(f.conditions, f.require_all, &item);
-        },
-        [&] (const expression_filter& f) -> bool {
-            return verify_condition_expression(f.expression, &item);
-        }
-    }, *_imp);
-}
-
-bool filter::filters_on(std::string_view attribute) const {
-    if (!_imp) {
-        return false;
-    }
-    return std::visit(overloaded_functor {
-        [&] (const conditions_filter& f) -> bool {
-            for (auto it = f.conditions.MemberBegin(); it != f.conditions.MemberEnd(); ++it) {
-                if (rjson::to_string_view(it->name) == attribute) {
-                    return true;
-                }
-            }
-            return false;
-        },
-        [&] (const expression_filter& f) -> bool {
-            return condition_expression_on(f.expression, attribute);
-        }
-    }, *_imp);
-}
-
-void filter::for_filters_on(const noncopyable_function<void(std::string_view)>& func) const {
-    if (_imp) {
-        std::visit(overloaded_functor {
-            [&] (const conditions_filter& f) -> void {
-                for (auto it = f.conditions.MemberBegin(); it != f.conditions.MemberEnd(); ++it) {
-                    func(rjson::to_string_view(it->name));
-                }
-            },
-            [&] (const expression_filter& f) -> void {
-                return for_condition_expression_on(f.expression, func);
-            }
-        }, *_imp);
-    }
-}
-
-class describe_items_visitor {
-    typedef std::vector<const column_definition*> columns_t;
-    const columns_t& _columns;
-    const std::optional<attrs_to_get>& _attrs_to_get;
-    std::unordered_set<std::string> _extra_filter_attrs;
-    const filter& _filter;
-    typename columns_t::const_iterator _column_it;
-    rjson::value _item;
-    // _items is a chunked_vector<rjson::value> instead of a RapidJson array
-    // (rjson::value) because unfortunately RapidJson arrays are stored
-    // contiguously in memory, and cause large allocations when a Query/Scan
-    // returns a long list of short items (issue #23535).
-    utils::chunked_vector<rjson::value> _items;
-    size_t _scanned_count;
-
-public:
-    describe_items_visitor(const columns_t& columns, const std::optional<attrs_to_get>& attrs_to_get, filter& filter)
-            : _columns(columns)
-            , _attrs_to_get(attrs_to_get)
-            , _filter(filter)
-            , _column_it(columns.begin())
-            , _item(rjson::empty_object())
-            , _scanned_count(0)
-    {
-        // _filter.check() may need additional attributes not listed in
-        // _attrs_to_get (i.e., not requested as part of the output).
-        // We list those in _extra_filter_attrs. We will include them in
-        // the JSON but take them out before finally returning the JSON.
-        if (_attrs_to_get) {
-            _filter.for_filters_on([&] (std::string_view attr) {
-                std::string a(attr); // no heterogeneous maps searches :-(
-                if (!_attrs_to_get->contains(a)) {
-                    _extra_filter_attrs.emplace(std::move(a));
-                }
-            });
-        }
-    }
-
-    void start_row() {
-        _column_it = _columns.begin();
-    }
-
-    void accept_value(managed_bytes_view_opt result_bytes_view) {
-        if (!result_bytes_view) {
-            ++_column_it;
-            return;
-        }
-        result_bytes_view->with_linearized([this] (bytes_view bv) {
-            std::string column_name = (*_column_it)->name_as_text();
-            if (column_name != executor::ATTRS_COLUMN_NAME) {
-                if (!_attrs_to_get || _attrs_to_get->contains(column_name) || _extra_filter_attrs.contains(column_name)) {
-                    if (!_item.HasMember(column_name.c_str())) {
-                        rjson::add_with_string_name(_item, column_name, rjson::empty_object());
-                    }
-                    rjson::value& field = _item[column_name.c_str()];
-                    rjson::add_with_string_name(field, type_to_string((*_column_it)->type), json_key_column_value(bv, **_column_it));
-                }
-            } else {
-                auto deserialized = attrs_type()->deserialize(bv);
-                auto keys_and_values = value_cast<map_type_impl::native_type>(deserialized);
-                for (auto entry : keys_and_values) {
-                    std::string attr_name = value_cast<sstring>(entry.first);
-                    if (!_attrs_to_get || _attrs_to_get->contains(attr_name) || _extra_filter_attrs.contains(attr_name)) {
-                        bytes value = value_cast<bytes>(entry.second);
-                        // Even if _attrs_to_get asked to keep only a part of a
-                        // top-level attribute, we keep the entire attribute
-                        // at this stage, because the item filter might still
-                        // need the other parts (it was easier for us to keep
-                        // extra_filter_attrs at top-level granularity). We'll
-                        // filter the unneeded parts after item filtering.
-                        rjson::add_with_string_name(_item, attr_name, deserialize_item(value));
-                    }
-                }
-            }
-        });
-        ++_column_it;
-    }
-
-    void end_row() {
-        if (_filter.check(_item)) {
-            // As noted above, we kept entire top-level attributes listed in
-            // _attrs_to_get. We may need to only keep parts of them.
-            if (_attrs_to_get) {
-                for (const auto& attr: *_attrs_to_get) {
-                    // If !attr.has_value() it means we were asked not to keep
-                    // attr entirely, but just parts of it.
-                    if (!attr.second.has_value()) {
-                        rjson::value* toplevel= rjson::find(_item, attr.first);
-                        if (toplevel && !hierarchy_filter(*toplevel, attr.second)) {
-                            rjson::remove_member(_item, attr.first);
-                        }
-                    }
-                }
-            }
-            // Remove the extra attributes _extra_filter_attrs which we had
-            // to add just for the filter, and not requested to be returned:
-            for (const auto& attr : _extra_filter_attrs) {
-                rjson::remove_member(_item, attr);
-            }
-
-            _items.push_back(std::move(_item));
-        }
-        _item = rjson::empty_object();
-        ++_scanned_count;
-    }
-
-    utils::chunked_vector<rjson::value> get_items() && {
-        return std::move(_items);
-    }
-
-    size_t get_scanned_count() {
-        return _scanned_count;
-    }
-};
-
-// describe_items() returns a JSON object that includes members "Count"
-// and "ScannedCount", but *not* "Items" - that is returned separately
-// as a chunked_vector to avoid large contiguous allocations which
-// RapidJSON does of its array. The caller should add "Items" to the
-// returned JSON object if needed, or print it separately.
-// The returned chunked_vector (the items) is std::optional<>, because
-// the user may have requested only to count items, and not return any
-// items - which is different from returning an empty list of items.
-static future<std::tuple<rjson::value, std::optional<utils::chunked_vector<rjson::value>>, size_t>> describe_items(
-        const cql3::selection::selection& selection,
-        std::unique_ptr<cql3::result_set> result_set,
-        std::optional<attrs_to_get>&& attrs_to_get,
-        filter&& filter) {
-    describe_items_visitor visitor(selection.get_columns(), attrs_to_get, filter);
-    co_await result_set->visit_gently(visitor);
-    auto scanned_count = visitor.get_scanned_count();
-    utils::chunked_vector<rjson::value> items = std::move(visitor).get_items();
-    rjson::value items_descr = rjson::empty_object();
-    auto size = items.size();
-    rjson::add(items_descr, "Count", rjson::value(size));
-    rjson::add(items_descr, "ScannedCount", rjson::value(scanned_count));
-    // If attrs_to_get && attrs_to_get->empty(), this means the user asked not
-    // to get any attributes (i.e., a Scan or Query with Select=COUNT) and we
-    // shouldn't return "Items" at all.
-    // TODO: consider optimizing the case of Select=COUNT without a filter.
-    // In that case, we currently build a list of empty items and here drop
-    // it. We could just count the items and not bother with the empty items.
-    // (However, remember that when we do have a filter, we need the items).
-    std::optional<utils::chunked_vector<rjson::value>> opt_items;
-    if (!attrs_to_get || !attrs_to_get->empty()) {
-        opt_items = std::move(items);
-    }
-    co_return std::tuple(std::move(items_descr), std::move(opt_items), size);
-}
-
-static rjson::value encode_paging_state(const schema& schema, const service::pager::paging_state& paging_state) {
-    rjson::value last_evaluated_key = rjson::empty_object();
-    std::vector<bytes> exploded_pk = paging_state.get_partition_key().explode();
-    auto exploded_pk_it = exploded_pk.begin();
-    for (const column_definition& cdef : schema.partition_key_columns()) {
-        rjson::add_with_string_name(last_evaluated_key, std::string_view(cdef.name_as_text()), rjson::empty_object());
-        rjson::value& key_entry = last_evaluated_key[cdef.name_as_text()];
-        rjson::add_with_string_name(key_entry, type_to_string(cdef.type), json_key_column_value(*exploded_pk_it, cdef));
-        ++exploded_pk_it;
-    }
-    auto pos = paging_state.get_position_in_partition();
-    if (pos.has_key()) {
-        // Alternator itself allows at most one column in clustering key, but 
-        // user can use Alternator api to access system tables which might have
-        // multiple clustering key columns. So we need to handle that case here.
-        auto cdef_it = schema.clustering_key_columns().begin();        
-        for(const auto &exploded_ck : pos.key().explode()) {
-            rjson::add_with_string_name(last_evaluated_key, std::string_view(cdef_it->name_as_text()), rjson::empty_object());
-            rjson::value& key_entry = last_evaluated_key[cdef_it->name_as_text()];
-            rjson::add_with_string_name(key_entry, type_to_string(cdef_it->type), json_key_column_value(exploded_ck, *cdef_it));
-            ++cdef_it;
-        }
-    }
-    // To avoid possible conflicts (and thus having to reserve these names) we
-    // avoid adding the weight and region fields of the position to the paging
-    // state. Alternator will never need these as it doesn't have range
-    // tombstones (the only thing that can generate a position other than at(row)).
-    // We conditionally include these fields when reading CQL tables through alternator.
-    if (!is_alternator_keyspace(schema.ks_name()) && (!pos.has_key() || pos.get_bound_weight() != bound_weight::equal)) {
-        rjson::add_with_string_name(last_evaluated_key, scylla_paging_region, rjson::empty_object());
-        rjson::add(last_evaluated_key[scylla_paging_region.data()], "S", rjson::from_string(fmt::to_string(pos.region())));
-        rjson::add_with_string_name(last_evaluated_key, scylla_paging_weight, rjson::empty_object());
-        rjson::add(last_evaluated_key[scylla_paging_weight.data()], "N", static_cast<int>(pos.get_bound_weight()));
-    }
-    return last_evaluated_key;
-}
-
-// RapidJSON allocates arrays contiguously in memory, so we want to avoid
-// returning a large number of items as a single rapidjson array, and use
-// a chunked_vector instead. The following constant is an arbitrary cutoff
-// point for when to switch from a rapidjson array to a chunked_vector.
-static constexpr int max_items_for_rapidjson_array = 256;
-
-static future<executor::request_return_type> do_query(service::storage_proxy& proxy,
-        schema_ptr table_schema,
-        const rjson::value* exclusive_start_key,
-        dht::partition_range_vector partition_ranges,
-        std::vector<query::clustering_range> ck_bounds,
-        std::optional<attrs_to_get> attrs_to_get,
-        uint32_t limit,
-        db::consistency_level cl,
-        filter filter,
-        query::partition_slice::option_set custom_opts,
-        service::client_state& client_state,
-        alternator::stats& stats,
-        tracing::trace_state_ptr trace_state,
-        service_permit permit,
-        bool enforce_authorization,
-        bool warn_authorization) {
-    lw_shared_ptr<service::pager::paging_state> old_paging_state = nullptr;
-
-    tracing::trace(trace_state, "Performing a database query");
-
-    // Reverse the schema and the clustering bounds as the underlying code expects
-    // reversed queries in the native reversed format.
-    auto query_schema = table_schema;
-    const bool reversed = custom_opts.contains<query::partition_slice::option::reversed>();
-    if (reversed) {
-        query_schema = table_schema->get_reversed();
-
-        std::reverse(ck_bounds.begin(), ck_bounds.end());
-        for (auto& bound : ck_bounds) {
-            bound = query::reverse(bound);
-        }
-    }
-
-    if (exclusive_start_key) {
-        partition_key pk = pk_from_json(*exclusive_start_key, table_schema);
-        auto pos = position_in_partition::for_partition_start();
-        if (table_schema->clustering_key_size() > 0) {
-            pos = pos_from_json(*exclusive_start_key, table_schema);
-        }
-        old_paging_state = make_lw_shared<service::pager::paging_state>(pk, pos, query::max_partitions, query_id::create_null_id(), service::pager::paging_state::replicas_per_token_range{}, std::nullopt, 0);
-    }
-
-    co_await verify_permission(enforce_authorization, warn_authorization, client_state, table_schema, auth::permission::SELECT, stats);
-
-    auto regular_columns =
-            table_schema->regular_columns() | std::views::transform(&column_definition::id)
-            | std::ranges::to<query::column_id_vector>();
-    auto static_columns =
-            table_schema->static_columns() | std::views::transform(&column_definition::id)
-            | std::ranges::to<query::column_id_vector>();
-    auto selection = cql3::selection::selection::wildcard(table_schema);
-    query::partition_slice::option_set opts = selection->get_query_options();
-    opts.add(custom_opts);
-    auto partition_slice = query::partition_slice(std::move(ck_bounds), std::move(static_columns), std::move(regular_columns), opts);
-    auto command = ::make_lw_shared<query::read_command>(query_schema->id(), query_schema->version(), partition_slice, proxy.get_max_result_size(partition_slice),
-        query::tombstone_limit(proxy.get_tombstone_limit()));
-
-    elogger.trace("Executing read query (reversed {}): table schema {}, query schema {}", partition_slice.is_reversed(), table_schema->version(), query_schema->version());
-
-    auto query_state_ptr = std::make_unique<service::query_state>(client_state, trace_state, std::move(permit));
-
-    // FIXME: should be moved above, set on opts, so get_max_result_size knows it?
-    command->slice.options.set<query::partition_slice::option::allow_short_read>();
-    auto query_options = std::make_unique<cql3::query_options>(cl, std::vector<cql3::raw_value>{});
-    query_options = std::make_unique<cql3::query_options>(std::move(query_options), std::move(old_paging_state));
-    auto p = service::pager::query_pagers::pager(proxy, query_schema, selection, *query_state_ptr, *query_options, command, std::move(partition_ranges), nullptr);
-
-    std::unique_ptr<cql3::result_set> rs = co_await p->fetch_page(limit, gc_clock::now(), executor::default_timeout());
-    if (!p->is_exhausted()) {
-        rs->get_metadata().set_paging_state(p->state());
-    }
-    auto paging_state = rs->get_metadata().paging_state();
-    bool has_filter = filter;
-    auto [items_descr, opt_items, size] = co_await describe_items(*selection, std::move(rs), std::move(attrs_to_get), std::move(filter));
-    if (paging_state) {
-        rjson::add(items_descr, "LastEvaluatedKey", encode_paging_state(*table_schema, *paging_state));
-    }
-    if (has_filter) {
-        stats.cql_stats.filtered_rows_read_total += p->stats().rows_read_total;
-        // update our "filtered_row_matched_total" for all the rows matched, despited the filter
-        stats.cql_stats.filtered_rows_matched_total += size;
-    }
-    if (opt_items) {
-        if (opt_items->size() >= max_items_for_rapidjson_array) {
-            // There are many items, better print the JSON and the array of
-            // items (opt_items) separately to avoid RapidJSON's contiguous
-            // allocation of arrays.
-            co_return make_streamed_with_extra_array(std::move(items_descr), "Items", std::move(*opt_items));
-        }
-        // There aren't many items in the chunked vector opt_items,
-        // let's just insert them into the JSON object and print the
-        // full JSON normally.
-        rjson::value items_json = rjson::empty_array();
-        for (auto& item : *opt_items) {
-            rjson::push_back(items_json, std::move(item));
-        }
-        rjson::add(items_descr, "Items", std::move(items_json));
-    }
-    if (is_big(items_descr)) {
-        co_return make_streamed(std::move(items_descr));
-    }
-    co_return rjson::print(std::move(items_descr));
-}
-
-static dht::token token_for_segment(int segment, int total_segments) {
-    throwing_assert(total_segments > 1 && segment >= 0 && segment < total_segments);
-    uint64_t delta = std::numeric_limits<uint64_t>::max() / total_segments;
-    return dht::token::from_int64(std::numeric_limits<int64_t>::min() + delta * segment);
-}
-
-static dht::partition_range get_range_for_segment(int segment, int total_segments) {
-    if (total_segments == 1) {
-        return dht::partition_range::make_open_ended_both_sides();
-    }
-    if (segment == 0) {
-        dht::token ending_token = token_for_segment(1, total_segments);
-        return dht::partition_range::make_ending_with(
-                dht::partition_range::bound(dht::ring_position::ending_at(ending_token), false));
-    } else if (segment == total_segments - 1) {
-        dht::token starting_token = token_for_segment(segment, total_segments);
-        return dht::partition_range::make_starting_with(
-                dht::partition_range::bound(dht::ring_position::starting_at(starting_token)));
-    } else {
-        dht::token starting_token = token_for_segment(segment, total_segments);
-        dht::token ending_token = token_for_segment(segment + 1, total_segments);
-        return dht::partition_range::make(
-            dht::partition_range::bound(dht::ring_position::starting_at(starting_token)),
-            dht::partition_range::bound(dht::ring_position::ending_at(ending_token), false)
-        );
-    }
-}
-
-future<executor::request_return_type> executor::scan(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
-    _stats.api_operations.scan++;
-    elogger.trace("Scanning {}", request);
-
-    auto [schema, table_type] = get_table_or_view(_proxy, request);
-    db::consistency_level cl = get_read_consistency(request);
-
-    maybe_audit(audit_info, audit::statement_category::QUERY, schema->ks_name(), schema->cf_name(), "Scan", request, cl);
-
-    tracing::add_alternator_table_name(trace_state, schema->cf_name());
-    get_stats_from_schema(_proxy, *schema)->api_operations.scan++;
-    auto segment = get_int_attribute(request, "Segment");
-    auto total_segments = get_int_attribute(request, "TotalSegments");
-    if (segment || total_segments) {
-        if (!segment || !total_segments) {
-            return make_ready_future<request_return_type>(api_error::validation(
-                    "Both Segment and TotalSegments attributes need to be present for a parallel scan"));
-        }
-        if (*segment < 0 || *segment >= *total_segments) {
-            return make_ready_future<request_return_type>(api_error::validation(
-                    "Segment must be non-negative and less than TotalSegments"));
-        }
-        if (*total_segments < 0 || *total_segments > 1000000) {
-            return make_ready_future<request_return_type>(api_error::validation(
-                    "TotalSegments must be non-negative and less or equal to 1000000"));
-        }
-    }
-
-    rjson::value* exclusive_start_key = rjson::find(request, "ExclusiveStartKey");
-
-    if (table_type == table_or_view_type::gsi && cl != db::consistency_level::LOCAL_ONE) {
-        return make_ready_future<request_return_type>(api_error::validation(
-                "Consistent reads are not allowed on global indexes (GSI)"));
-    }
-    rjson::value* limit_json = rjson::find(request, "Limit");
-    uint32_t limit = limit_json ? limit_json->GetUint64() : std::numeric_limits<uint32_t>::max();
-    if (limit <= 0) {
-        return make_ready_future<request_return_type>(api_error::validation("Limit must be greater than 0"));
-    }
-
-    select_type select = parse_select(request, table_type);
-
-    std::unordered_set<std::string> used_attribute_names;
-    std::unordered_set<std::string> used_attribute_values;
-    auto attrs_to_get = calculate_attrs_to_get(request, *_parsed_expression_cache, used_attribute_names, select);
-
-    dht::partition_range_vector partition_ranges;
-    if (segment) {
-        auto range = get_range_for_segment(*segment, *total_segments);
-        if (exclusive_start_key) {
-            auto ring_pos = dht::ring_position{dht::decorate_key(*schema, pk_from_json(*exclusive_start_key, schema))};
-            if (!range.contains(ring_pos, dht::ring_position_comparator(*schema))) {
-                return make_ready_future<request_return_type>(api_error::validation(
-                    format("The provided starting key is invalid: Invalid ExclusiveStartKey. Please use ExclusiveStartKey "
-                           "with correct Segment. TotalSegments: {} Segment: {}", *total_segments, *segment)));
-            }
-        }
-        partition_ranges.push_back(range);
-    } else {
-        partition_ranges.push_back(dht::partition_range::make_open_ended_both_sides());
-    }
-    std::vector<query::clustering_range> ck_bounds{query::clustering_range::make_open_ended_both_sides()};
-
-    filter filter(*_parsed_expression_cache, request, filter::request_type::SCAN, used_attribute_names, used_attribute_values);
-    // Note: Unlike Query, Scan does allow a filter on the key attributes.
-    // For some *specific* cases of key filtering, such an equality test on
-    // partition key or comparison operator for the sort key, we could have
-    // optimized the filtering by modifying partition_ranges and/or
-    // ck_bounds. We haven't done this optimization yet.
-
-    const rjson::value* expression_attribute_names = rjson::find(request, "ExpressionAttributeNames");
-    const rjson::value* expression_attribute_values = rjson::find(request, "ExpressionAttributeValues");
-    verify_all_are_used(expression_attribute_names, used_attribute_names, "ExpressionAttributeNames", "Scan");
-    verify_all_are_used(expression_attribute_values, used_attribute_values, "ExpressionAttributeValues", "Scan");
-
-    return do_query(_proxy, schema, exclusive_start_key, std::move(partition_ranges), std::move(ck_bounds), std::move(attrs_to_get), limit, cl,
-            std::move(filter), query::partition_slice::option_set(), client_state, _stats, trace_state, std::move(permit), _enforce_authorization, _warn_authorization);
-}
-
-static dht::partition_range calculate_pk_bound(schema_ptr schema, const column_definition& pk_cdef, const rjson::value& comp_definition, const rjson::value& attrs) {
-    auto op = get_comparison_operator(comp_definition);
-    if (op != comparison_operator_type::EQ) {
-        throw api_error::validation(format("Hash key can only be restricted with equality operator (EQ). {} not supported.", comp_definition));
-    }
-    if (attrs.Size() != 1) {
-        throw api_error::validation(format("A single attribute is required for a hash key EQ restriction: {}", attrs));
-    }
-    bytes raw_value = get_key_from_typed_value(attrs[0], pk_cdef);
-    partition_key pk = partition_key::from_singular_bytes(*schema, std::move(raw_value));
-    auto decorated_key = dht::decorate_key(*schema, pk);
-    return dht::partition_range(decorated_key);
-}
-
-static query::clustering_range get_clustering_range_for_begins_with(bytes&& target, const clustering_key& ck, schema_ptr schema, data_type t) {
-    auto it = boost::range::find_end(target, bytes("\xFF"), std::not_equal_to<bytes::value_type>());
-    if (it != target.end()) {
-        ++*it;
-        target.resize(std::distance(target.begin(), it) + 1);
-        clustering_key upper_limit = clustering_key::from_single_value(*schema, target);
-        return query::clustering_range::make(query::clustering_range::bound(ck), query::clustering_range::bound(upper_limit, false));
-    }
-    return query::clustering_range::make_starting_with(query::clustering_range::bound(ck));
-}
-
-static query::clustering_range calculate_ck_bound(schema_ptr schema, const column_definition& ck_cdef, const rjson::value& comp_definition, const rjson::value& attrs) {
-    auto op = get_comparison_operator(comp_definition);
-    const size_t expected_attrs_size = (op == comparison_operator_type::BETWEEN) ? 2 : 1;
-    if (attrs.Size() != expected_attrs_size) {
-        throw api_error::validation(format("{} arguments expected for a sort key restriction: {}", expected_attrs_size, attrs));
-    }
-    bytes raw_value = get_key_from_typed_value(attrs[0], ck_cdef);
-    clustering_key ck = clustering_key::from_single_value(*schema, raw_value);
-    switch (op) {
-    case comparison_operator_type::EQ:
-        return query::clustering_range(ck);
-    case comparison_operator_type::LE:
-        return query::clustering_range::make_ending_with(query::clustering_range::bound(ck));
-    case comparison_operator_type::LT:
-        return query::clustering_range::make_ending_with(query::clustering_range::bound(ck, false));
-    case comparison_operator_type::GE:
-        return query::clustering_range::make_starting_with(query::clustering_range::bound(ck));
-    case comparison_operator_type::GT:
-        return query::clustering_range::make_starting_with(query::clustering_range::bound(ck, false));
-    case comparison_operator_type::BETWEEN: {
-        bytes raw_upper_limit = get_key_from_typed_value(attrs[1], ck_cdef);
-        clustering_key upper_limit = clustering_key::from_single_value(*schema, raw_upper_limit);
-        return query::clustering_range::make(query::clustering_range::bound(ck), query::clustering_range::bound(upper_limit));
-    }
-    case comparison_operator_type::BEGINS_WITH: {
-        if (raw_value.empty()) {
-            return query::clustering_range::make_open_ended_both_sides();
-        }
-        // NOTICE(sarna): A range starting with given prefix and ending (non-inclusively) with a string "incremented" by a single
-        // character at the end. Throws for NUMBER instances.
-        if (!ck_cdef.type->is_compatible_with(*utf8_type)) {
-            throw api_error::validation(fmt::format("BEGINS_WITH operator cannot be applied to type {}", type_to_string(ck_cdef.type)));
-        }
-        return get_clustering_range_for_begins_with(std::move(raw_value), ck, schema, ck_cdef.type);
-    }
-    default:
-        throw api_error::validation(format("Operator {} not supported for sort key", comp_definition));
-    }
-}
-
-// Calculates primary key bounds from KeyConditions
-static std::pair<dht::partition_range_vector, std::vector<query::clustering_range>>
-calculate_bounds_conditions(schema_ptr schema, const rjson::value& conditions) {
-    dht::partition_range_vector partition_ranges;
-    std::vector<query::clustering_range> ck_bounds;
-
-    for (auto it = conditions.MemberBegin(); it != conditions.MemberEnd(); ++it) {
-        sstring key = rjson::to_sstring(it->name);
-        const rjson::value& condition = it->value;
-
-        const rjson::value& comp_definition = rjson::get(condition, "ComparisonOperator");
-        const rjson::value& attr_list = rjson::get(condition, "AttributeValueList");
-
-        const column_definition& pk_cdef = schema->partition_key_columns().front();
-        const column_definition* ck_cdef = schema->clustering_key_size() > 0 ? &schema->clustering_key_columns().front() : nullptr;
-        if (key == pk_cdef.name_as_text()) {
-            if (!partition_ranges.empty()) {
-                throw api_error::validation("Currently only a single restriction per key is allowed");
-            }
-            partition_ranges.push_back(calculate_pk_bound(schema, pk_cdef, comp_definition, attr_list));
-        }
-        if (ck_cdef && key == ck_cdef->name_as_text()) {
-            if (!ck_bounds.empty()) {
-                throw api_error::validation("Currently only a single restriction per key is allowed");
-            }
-            ck_bounds.push_back(calculate_ck_bound(schema, *ck_cdef, comp_definition, attr_list));
-        }
-    }
-
-    // Validate that a query's conditions must be on the hash key, and
-    // optionally also on the sort key if it exists.
-    if (partition_ranges.empty()) {
-        throw api_error::validation(format("Query missing condition on hash key '{}'", schema->partition_key_columns().front().name_as_text()));
-    }
-    if (schema->clustering_key_size() == 0) {
-        if (conditions.MemberCount() != 1) {
-            throw api_error::validation("Only one condition allowed in table with only hash key");
-        }
-    } else {
-        if (conditions.MemberCount() == 2 && ck_bounds.empty()) {
-            throw api_error::validation(format("Query missing condition on sort key '{}'", schema->clustering_key_columns().front().name_as_text()));
-        } else if (conditions.MemberCount() > 2) {
-            throw api_error::validation("Only one or two conditions allowed in table with hash key and sort key");
-        }
-    }
-
-    if (ck_bounds.empty()) {
-        ck_bounds.push_back(query::clustering_range::make_open_ended_both_sides());
-    }
-
-    return {std::move(partition_ranges), std::move(ck_bounds)};
-}
-
-// Extract the top-level column name specified in a KeyConditionExpression.
-// If a nested attribute path is given, a ValidationException is generated.
-// If the column name is a #reference to ExpressionAttributeNames, the
-// reference is resolved.
-// Note this function returns a string_view, which may refer to data in the
-// given parsed::value or expression_attribute_names.
-static std::string_view get_toplevel(const parsed::value& v,
-        const rjson::value* expression_attribute_names,
-        std::unordered_set<std::string>& used_attribute_names)
-{
-    const parsed::path& path = std::get<parsed::path>(v._value);
-    if (path.has_operators()) {
-        throw api_error::validation("KeyConditionExpression does not support nested attributes");
-    }
-    std::string_view column_name = path.root();
-    if (column_name.size() > 0 && column_name[0] == '#') {
-        used_attribute_names.emplace(column_name);
-        if (!expression_attribute_names) {
-            throw api_error::validation(
-                    fmt::format("ExpressionAttributeNames missing, entry '{}' required by KeyConditionExpression",
-                            column_name));
-        }
-        const rjson::value* value = rjson::find(*expression_attribute_names, column_name);
-        if (!value || !value->IsString()) {
-            throw api_error::validation(
-                    fmt::format("ExpressionAttributeNames missing entry '{}' required by KeyConditionExpression",
-                            column_name));
-        }
-        column_name = rjson::to_string_view(*value);
-    }
-    return column_name;
-}
-
-// Extract a constant value specified in a KeyConditionExpression.
-// This constant was originally parsed as a reference (:name) to a member of
-// ExpressionAttributeValues, but at this point, after resolve_value(), it
-// was already converted into a JSON value.
-// This function decodes the value (using its given expected type) into bytes
-// which Scylla uses as the actual key value. If the value has the wrong type,
-// or the input had other problems, a ValidationException is thrown.
-static bytes get_constant_value(const parsed::value& v,
-        const column_definition& column)
-{
-    const parsed::constant& constant = std::get<parsed::constant>(v._value);
-    const parsed::constant::literal& lit = std::get<parsed::constant::literal>(constant._value);
-    return get_key_from_typed_value(*lit, column);
-}
-
-// condition_expression_and_list extracts a list of ANDed primitive conditions
-// from a condition_expression. This is useful for KeyConditionExpression,
-// which may not use OR or NOT. If the given condition_expression does use
-// OR or NOT, this function throws a ValidationException.
-static void condition_expression_and_list(
-        const parsed::condition_expression& condition_expression,
-        std::vector<const parsed::primitive_condition*>& conditions)
-{
-    if (condition_expression._negated) {
-        throw api_error::validation("KeyConditionExpression cannot use NOT");
-    }
-    std::visit(overloaded_functor {
-        [&] (const parsed::primitive_condition& cond) {
-            conditions.push_back(&cond);
-        },
-        [&] (const parsed::condition_expression::condition_list& list) {
-            if (list.op == '|' && list.conditions.size() > 1) {
-                throw api_error::validation("KeyConditionExpression cannot use OR");
-            }
-            for (const parsed::condition_expression& cond : list.conditions) {
-                condition_expression_and_list(cond, conditions);
-            }
-        }
-    }, condition_expression._expression);
-}
-
-// Calculates primary key bounds from KeyConditionExpression
-static std::pair<dht::partition_range_vector, std::vector<query::clustering_range>>
-calculate_bounds_condition_expression(schema_ptr schema,
-        const rjson::value& expression,
-        const rjson::value* expression_attribute_values,
-        std::unordered_set<std::string>& used_attribute_values,
-        const rjson::value* expression_attribute_names,
-        std::unordered_set<std::string>& used_attribute_names,
-        parsed::expression_cache& parsed_expression_cache)
-{
-    if (!expression.IsString()) {
-        throw api_error::validation("KeyConditionExpression must be a string");
-    }
-    if (expression.GetStringLength() == 0) {
-        throw api_error::validation("KeyConditionExpression must not be empty");
-    }
-    // We parse the KeyConditionExpression with the same parser we use for
-    // ConditionExpression. But KeyConditionExpression only supports a subset
-    // of the ConditionExpression features, so we have many additional
-    // verifications below that the key condition is legal. Briefly, a valid
-    // key condition must contain a single partition key and a single
-    // sort-key range.
-    parsed::condition_expression p;
-    try {
-        p = parsed_expression_cache.parse_condition_expression(rjson::to_string_view(expression), "KeyConditionExpression");
-    } catch(expressions_syntax_error& e) {
-        throw api_error::validation(e.what());
-    }
-    resolve_condition_expression(p,
-            expression_attribute_names, expression_attribute_values,
-            used_attribute_names, used_attribute_values);
-    std::vector<const parsed::primitive_condition*> conditions;
-    condition_expression_and_list(p, conditions);
-
-    if (conditions.size() < 1 || conditions.size() > 2) {
-        throw api_error::validation(
-                "KeyConditionExpression syntax error: must have 1 or 2 conditions");
-    }
-    // Scylla allows us to have an (equality) constraint on the partition key
-    // pk_cdef, and a range constraint on the *first* clustering key ck_cdef.
-    // Note that this is also good enough for our GSI implementation - the
-    // GSI's user-specified sort key will be the first clustering key.
-    // FIXME: In the case described in issue #5320 (base and GSI both have
-    // just hash key - but different ones), this may allow the user to Query
-    // using the base key which isn't officially part of the GSI.
-    const column_definition& pk_cdef = schema->partition_key_columns().front();
-    const column_definition* ck_cdef = schema->clustering_key_size() > 0 ?
-            &schema->clustering_key_columns().front() : nullptr;
-
-    dht::partition_range_vector partition_ranges;
-    std::vector<query::clustering_range> ck_bounds;
-    for (const parsed::primitive_condition* condp : conditions) {
-        const parsed::primitive_condition& cond = *condp;
-        // In all comparison operators, one operand must be a column name,
-        // the other is a constant (value reference). We remember which is
-        // which in toplevel_ind, and also the column name in key (not just
-        // for comparison operators).
-        std::string_view key;
-        int toplevel_ind;
-        switch (cond._values.size()) {
-        case 1: {
-            // The only legal single-value condition is a begin_with() function,
-            // and it must have two parameters - a top-level attribute and a
-            // value reference..
-            const parsed::value::function_call *f = std::get_if<parsed::value::function_call>(&cond._values[0]._value);
-            if (!f) {
-                throw api_error::validation("KeyConditionExpression cannot be just a value");
-            }
-            if (f->_function_name != "begins_with") {
-                throw api_error::validation(
-                        fmt::format("KeyConditionExpression function '{}' not supported",f->_function_name));
-            }
-            if (f->_parameters.size() != 2 || !f->_parameters[0].is_path() ||
-                    !f->_parameters[1].is_constant()) {
-                throw api_error::validation(
-                        "KeyConditionExpression begins_with() takes attribute and value");
-            }
-            key = get_toplevel(f->_parameters[0], expression_attribute_names, used_attribute_names);
-            toplevel_ind = -1;
-            break;
-        }
-        case 2:
-            if (cond._values[0].is_path() && cond._values[1].is_constant()) {
-                toplevel_ind = 0;
-            } else if (cond._values[1].is_path() && cond._values[0].is_constant()) {
-                toplevel_ind = 1;
-            } else {
-                throw api_error::validation("KeyConditionExpression must compare attribute with constant");
-            }
-            key = get_toplevel(cond._values[toplevel_ind],  expression_attribute_names, used_attribute_names);
-            break;
-        case 3:
-            // Only BETWEEN has three operands. First must be a column name,
-            // two other must be value references (constants):
-            if (cond._op != parsed::primitive_condition::type::BETWEEN) {
-                // Shouldn't happen unless we have a bug in the parser
-                throw std::logic_error(format("Wrong number of values {} in primitive_condition", cond._values.size()));
-            }
-            if (cond._values[0].is_path() && cond._values[1].is_constant() && cond._values[2].is_constant()) {
-                toplevel_ind = 0;
-                key = get_toplevel(cond._values[0], expression_attribute_names, used_attribute_names);
-            } else {
-                throw api_error::validation("KeyConditionExpression must compare attribute with constants");
-            }
-            break;
-        default:
-            // Shouldn't happen unless we have a bug in the parser
-            throw std::logic_error(format("Wrong number of values {} in primitive_condition", cond._values.size()));
-        }
-        if (cond._op == parsed::primitive_condition::type::IN) {
-            throw api_error::validation("KeyConditionExpression does not support IN operator");
-        } else if (cond._op == parsed::primitive_condition::type::NE) {
-            throw api_error::validation("KeyConditionExpression does not support NE operator");
-        } else if (cond._op == parsed::primitive_condition::type::EQ) {
-            // the EQ operator (=) is the only one which can be used for both
-            // the partition key and sort key:
-            if (sstring(key) == pk_cdef.name_as_text()) {
-                if (!partition_ranges.empty()) {
-                    throw api_error::validation(
-                            "KeyConditionExpression allows only one condition for each key");
-                }
-                bytes raw_value = get_constant_value(cond._values[!toplevel_ind], pk_cdef);
-                partition_key pk = partition_key::from_singular_bytes(*schema, std::move(raw_value));
-                auto decorated_key = dht::decorate_key(*schema, pk);
-                partition_ranges.push_back(dht::partition_range(decorated_key));
-            } else if (ck_cdef && sstring(key) == ck_cdef->name_as_text()) {
-                if (!ck_bounds.empty()) {
-                    throw api_error::validation(
-                            "KeyConditionExpression allows only one condition for each key");
-                }
-                bytes raw_value = get_constant_value(cond._values[!toplevel_ind], *ck_cdef);
-                clustering_key ck = clustering_key::from_single_value(*schema, raw_value);
-                ck_bounds.push_back(query::clustering_range(ck));
-            } else {
-                throw api_error::validation(
-                        fmt::format("KeyConditionExpression condition on non-key attribute {}", key));
-            }
-            continue;
-        }
-        // If we're still here, it's any other operator besides EQ, and these
-        // are allowed *only* on the clustering key:
-        if (sstring(key) == pk_cdef.name_as_text()) {
-            throw api_error::validation(
-                    fmt::format("KeyConditionExpression only '=' condition is supported on partition key {}", key));
-        } else if (!ck_cdef || sstring(key) != ck_cdef->name_as_text()) {
-            throw api_error::validation(
-                    fmt::format("KeyConditionExpression condition on non-key attribute {}", key));
-        }
-        if (!ck_bounds.empty()) {
-            throw api_error::validation(
-                    "KeyConditionExpression allows only one condition for each key");
-        }
-        if (cond._op == parsed::primitive_condition::type::BETWEEN) {
-            clustering_key ck1 = clustering_key::from_single_value(*schema,
-                    get_constant_value(cond._values[1], *ck_cdef));
-            clustering_key ck2 = clustering_key::from_single_value(*schema,
-                    get_constant_value(cond._values[2], *ck_cdef));
-            ck_bounds.push_back(query::clustering_range::make(
-                    query::clustering_range::bound(ck1), query::clustering_range::bound(ck2)));
-            continue;
-        } else if (cond._values.size() == 1) {
-            // We already verified above, that this case this can only be a
-            // function call to begins_with(), with the first parameter the
-            // key, the second the value reference.
-            bytes raw_value = get_constant_value(
-                    std::get<parsed::value::function_call>(cond._values[0]._value)._parameters[1], *ck_cdef);
-            if (!ck_cdef->type->is_compatible_with(*utf8_type)) {
-                // begins_with() supported on bytes and strings (both stored
-                // in the database as strings) but not on numbers.
-                throw api_error::validation(
-                        fmt::format("KeyConditionExpression begins_with() not supported on type {}",
-                                type_to_string(ck_cdef->type)));
-            } else if (raw_value.empty()) {
-                ck_bounds.push_back(query::clustering_range::make_open_ended_both_sides());
-            } else {
-                clustering_key ck = clustering_key::from_single_value(*schema, raw_value);
-                ck_bounds.push_back(get_clustering_range_for_begins_with(std::move(raw_value), ck, schema, ck_cdef->type));
-            }
-            continue;
-        }
-
-        // All remaining operator have one value reference parameter in index
-        // !toplevel_ind. Note how toplevel_ind==1 reverses the direction of
-        // an inequality.
-        bytes raw_value = get_constant_value(cond._values[!toplevel_ind], *ck_cdef);
-        clustering_key ck = clustering_key::from_single_value(*schema, raw_value);
-        if ((cond._op == parsed::primitive_condition::type::LT && toplevel_ind == 0) ||
-            (cond._op == parsed::primitive_condition::type::GT && toplevel_ind == 1)) {
-            ck_bounds.push_back(query::clustering_range::make_ending_with(query::clustering_range::bound(ck, false)));
-        } else if ((cond._op == parsed::primitive_condition::type::GT && toplevel_ind == 0) ||
-                   (cond._op == parsed::primitive_condition::type::LT && toplevel_ind == 1)) {
-            ck_bounds.push_back(query::clustering_range::make_starting_with(query::clustering_range::bound(ck, false)));
-        } else if ((cond._op == parsed::primitive_condition::type::LE && toplevel_ind == 0) ||
-                   (cond._op == parsed::primitive_condition::type::GE && toplevel_ind == 1)) {
-            ck_bounds.push_back(query::clustering_range::make_ending_with(query::clustering_range::bound(ck)));
-        } else if ((cond._op == parsed::primitive_condition::type::GE && toplevel_ind == 0) ||
-                   (cond._op == parsed::primitive_condition::type::LE && toplevel_ind == 1)) {
-            ck_bounds.push_back(query::clustering_range::make_starting_with(query::clustering_range::bound(ck)));
-        }
-    }
-
-    if (partition_ranges.empty()) {
-        throw api_error::validation(
-                format("KeyConditionExpression requires a condition on partition key {}", pk_cdef.name_as_text()));
-    }
-    if (ck_bounds.empty()) {
-        ck_bounds.push_back(query::clustering_range::make_open_ended_both_sides());
-    }
-    return {std::move(partition_ranges), std::move(ck_bounds)};
-}
-
-static future<executor::request_return_type> query_vector(
-        service::storage_proxy& proxy,
-        vector_search::vector_store_client& vsc,
-        rjson::value request,
-        service::client_state& client_state,
-        tracing::trace_state_ptr trace_state,
-        service_permit permit,
-        bool enforce_authorization,
-        bool warn_authorization,
-        alternator::stats& stats,
-        parsed::expression_cache& parsed_expr_cache) {
-    // If vector search is requested, IndexName must be given and must
-    // refer to a vector index - not to a GSI or LSI.
-    const rjson::value* index_name_v = rjson::find(request, "IndexName");
-    if (!index_name_v || !index_name_v->IsString()) {
-        co_return api_error::validation(
-            "VectorSearch requires IndexName referring to a vector index");
-    }
-    std::string_view index_name = rjson::to_string_view(*index_name_v);
-    schema_ptr base_schema = get_table(proxy, request);
-    bool is_vector = std::ranges::any_of(base_schema->indices(), [&](const index_metadata& im) {
-        const auto& opts = im.options();
-        auto it = opts.find(db::index::secondary_index::custom_class_option_name);
-        return im.name() == index_name && it != opts.end() && it->second == "vector_index";
-    });
-    if (!is_vector) {
-        co_return api_error::validation(
-            format("VectorSearch IndexName '{}' is not a vector index.", index_name));
-    }
-    // QueryVector is required inside VectorSearch.
-    const rjson::value* vector_search = rjson::find(request, "VectorSearch");
-    if (!vector_search || !vector_search->IsObject()) {
-        co_return api_error::validation(
-            "VectorSearch requires a VectorSearch parameter");
-    }
-    const rjson::value* query_vector = rjson::find(*vector_search, "QueryVector");
-    if (!query_vector || !query_vector->IsObject()) {
-        co_return api_error::validation(
-            "VectorSearch requires a QueryVector parameter");
-    }
-    // QueryVector should be is a DynamoDB value, which must be of type "L"
-    // (a list), containing only elements of type "N" (numbers). The number
-    // of these elements must be exactly the "dimensions" defined for this
-    // vector index. We'll now validate all these assumptions and parse
-    // all the numbers in the vector into an std::vector<float> query_vec -
-    // the type that ann() wants.
-    int dimensions = 0;
-    for (const index_metadata& im : base_schema->indices()) {
-        if (im.name() == index_name) {
-            auto dims_it = im.options().find("dimensions");
-            if (dims_it != im.options().end()) {
-                try {
-                    dimensions = std::stoi(dims_it->second);
-                } catch (...) {}
-            }
-            break;
-        }
-    }
-    throwing_assert(dimensions > 0);
-    const rjson::value* qv_list = rjson::find(*query_vector, "L");
-    if (!qv_list || !qv_list->IsArray()) {
-        co_return api_error::validation(
-            "VectorSearch QueryVector must be a list of numbers");
-    }
-    const auto& arr = qv_list->GetArray();
-    if ((int)arr.Size() != dimensions) {
-        co_return api_error::validation(
-            format("VectorSearch QueryVector length {} does not match index Dimensions {}",
-                arr.Size(), dimensions));
-    }
-    std::vector<float> query_vec;
-    query_vec.reserve(arr.Size());
-    for (const rjson::value& elem : arr) {
-        if (!elem.IsObject()) {
-            co_return api_error::validation(
-                "VectorSearch QueryVector must contain only numbers");
-        }
-        const rjson::value* n_val = rjson::find(elem, "N");
-        if (!n_val || !n_val->IsString()) {
-            co_return api_error::validation(
-                "VectorSearch QueryVector must contain only numbers");
-        }
-        std::string_view num_str = rjson::to_string_view(*n_val);
-        float f;
-        auto [ptr, ec] = std::from_chars(num_str.data(), num_str.data() + num_str.size(), f);
-        if (ec != std::errc{} || ptr != num_str.data() + num_str.size()) {
-            co_return api_error::validation(
-                format("VectorSearch QueryVector element '{}' is not a valid number", num_str));
-        }
-        query_vec.push_back(f);
-    }
-
-    // Limit is mandatory for vector search: it defines k, the number of
-    // nearest neighbors to return.
-    const rjson::value* limit_json = rjson::find(request, "Limit");
-    if (!limit_json || !limit_json->IsUint()) {
-        co_return api_error::validation("VectorSearch requires a positive integer Limit parameter");
-    }
-    uint32_t limit = limit_json->GetUint();
-    if (limit == 0) {
-        co_return api_error::validation("Limit must be greater than 0");
-    }
-
-    // Consistent reads are not supported for vector search, just like GSI.
-    if (get_read_consistency(request) != db::consistency_level::LOCAL_ONE) {
-        co_return api_error::validation(
-            "Consistent reads are not allowed on vector indexes");
-    }
-
-    // Pagination (ExclusiveStartKey) is not supported for vector search.
-    if (rjson::find(request, "ExclusiveStartKey")) {
-        co_return api_error::validation(
-            "VectorSearch does not support pagination (ExclusiveStartKey)");
-    }
-
-    // ScanIndexForward is not supported for vector search: the ordering of
-    // results is determined by vector distance, not by the sort key.
-    if (rjson::find(request, "ScanIndexForward")) {
-        co_return api_error::validation(
-            "VectorSearch does not support ScanIndexForward");
-    }
-
-    std::unordered_set<std::string> used_attribute_names;
-    std::unordered_set<std::string> used_attribute_values;
-    // Parse the Select parameter and determine which attributes to return.
-    // For a vector index, the default Select is ALL_ATTRIBUTES (full items).
-    // ALL_PROJECTED_ATTRIBUTES is significantly more efficent because it
-    // returns what the vector store returned without looking up additional
-    // base-table data. Currently only the primary key attributes are projected
-    // but in the future we'll implement projecting additional attributes into
-    // the vector index - these additional attributes will also be usable for
-    // filtering). COUNT returns only the count without items.
-    select_type select = parse_select(request, table_or_view_type::vector_index);
-    std::optional<alternator::attrs_to_get> attrs_to_get_opt;
-    if (select == select_type::projection) {
-        // ALL_PROJECTED_ATTRIBUTES for a vector index: return only key attributes.
-        alternator::attrs_to_get key_attrs;
-        for (const column_definition& cdef : base_schema->partition_key_columns()) {
-            attribute_path_map_add("Select", key_attrs, cdef.name_as_text());
-        }
-        for (const column_definition& cdef : base_schema->clustering_key_columns()) {
-            attribute_path_map_add("Select", key_attrs, cdef.name_as_text());
-        }
-        attrs_to_get_opt = std::move(key_attrs);
-    } else {
-        attrs_to_get_opt = calculate_attrs_to_get(request, parsed_expr_cache, used_attribute_names, select);
-    }
-    // QueryFilter (the old-style API) is not supported for vector search Queries.
-    if (rjson::find(request, "QueryFilter")) {
-        co_return api_error::validation(
-            "VectorSearch does not support QueryFilter; use FilterExpression instead");
-    }
-    // FilterExpression: post-filter the vector search results by any attribute.
-    filter flt(parsed_expr_cache, request, filter::request_type::QUERY,
-               used_attribute_names, used_attribute_values);
-    const rjson::value* expression_attribute_names = rjson::find(request, "ExpressionAttributeNames");
-    verify_all_are_used(expression_attribute_names, used_attribute_names, "ExpressionAttributeNames", "Query");
-    const rjson::value* expression_attribute_values = rjson::find(request, "ExpressionAttributeValues");
-    verify_all_are_used(expression_attribute_values, used_attribute_values, "ExpressionAttributeValues", "Query");
-
-    // Verify the user has SELECT permission on the base table, as we
-    // do for every type of read operation after validating the input
-    // parameters.
-    co_await verify_permission(enforce_authorization, warn_authorization,
-            client_state, base_schema, auth::permission::SELECT, stats);
-
-    // Query the vector store for the approximate nearest neighbors.
-    auto timeout = executor::default_timeout();
-    abort_on_expiry aoe(timeout);
-    rjson::value pre_filter = rjson::empty_object(); // TODO, implement
-    auto pkeys_result = co_await vsc.ann(
-            base_schema->ks_name(), std::string(index_name), base_schema,
-            std::move(query_vec), limit, pre_filter, aoe.abort_source());
-    if (!pkeys_result.has_value()) {
-        const sstring error_msg = std::visit(vector_search::error_visitor{}, pkeys_result.error());
-        co_return api_error::validation(error_msg);
-    }
-    const std::vector<vector_search::primary_key>& pkeys = pkeys_result.value();
-
-    // For SELECT=COUNT with no filter: skip fetching from the base table and
-    // just return the count of candidates returned by the vector store.
-    // If a filter is present, fall through to the base-table fetch to apply it.
-    if (select == select_type::count && !flt) {
-        rjson::value response = rjson::empty_object();
-        rjson::add(response, "Count", rjson::value(static_cast<int>(pkeys.size())));
-        rjson::add(response, "ScannedCount", rjson::value(static_cast<int>(pkeys.size())));
-        co_return rjson::print(std::move(response));
-    }
-
-    // For SELECT=ALL_PROJECTED_ATTRIBUTES with no filter: skip fetching from
-    // the base table and build items directly from the key columns returned by
-    // the vector store. If a filter is present, fall through to the base-table
-    // fetch to apply it.
-    if (select == select_type::projection && !flt) {
-        rjson::value items_json = rjson::empty_array();
-        for (const auto& pkey : pkeys) {
-            rjson::value item = rjson::empty_object();
-            std::vector<bytes> exploded_pk = pkey.partition.key().explode();
-            auto exploded_pk_it = exploded_pk.begin();
-            for (const column_definition& cdef : base_schema->partition_key_columns()) {
-                rjson::value key_val = rjson::empty_object();
-                rjson::add_with_string_name(key_val, type_to_string(cdef.type), json_key_column_value(*exploded_pk_it, cdef));
-                rjson::add_with_string_name(item, std::string_view(cdef.name_as_text()), std::move(key_val));
-                ++exploded_pk_it;
-            }
-            if (base_schema->clustering_key_size() > 0) {
-                std::vector<bytes> exploded_ck = pkey.clustering.explode();
-                auto exploded_ck_it = exploded_ck.begin();
-                for (const column_definition& cdef : base_schema->clustering_key_columns()) {
-                    rjson::value key_val = rjson::empty_object();
-                    rjson::add_with_string_name(key_val, type_to_string(cdef.type), json_key_column_value(*exploded_ck_it, cdef));
-                    rjson::add_with_string_name(item, std::string_view(cdef.name_as_text()), std::move(key_val));
-                    ++exploded_ck_it;
-                }
-            }
-            rjson::push_back(items_json, std::move(item));
-        }
-        rjson::value response = rjson::empty_object();
-        rjson::add(response, "Count", rjson::value(static_cast<int>(items_json.Size())));
-        rjson::add(response, "ScannedCount", rjson::value(static_cast<int>(pkeys.size())));
-        rjson::add(response, "Items", std::move(items_json));
-        co_return rjson::print(std::move(response));
-    }
-
-    // TODO: For SELECT=SPECIFIC_ATTRIBUTES, if they are part of the projected
-    // attributes, we should use the above optimized code path - not fall through
-    // to the read from the base table as below as we need to do if the specific
-    // attributes contain non-projected columns.
-
-    // Fetch the matching items from the base table and build the response.
-    // When a filter is present, we always fetch the full item so that all
-    // attributes are available for filter evaluation, regardless of the
-    // projection required for the final response.
-    auto selection = cql3::selection::selection::wildcard(base_schema);
-    auto regular_columns = base_schema->regular_columns()
-            | std::views::transform(&column_definition::id)
-            | std::ranges::to<query::column_id_vector>();
-    auto attrs_to_get = ::make_shared<const std::optional<alternator::attrs_to_get>>(
-        flt ? std::nullopt : std::move(attrs_to_get_opt));
-
-    rjson::value items_json = rjson::empty_array();
-    int matched_count = 0;
-
-    if (base_schema->clustering_key_size() == 0) {
-        // Hash-only table: query each partition individually, in the order
-        // returned by the vector store, to preserve vector-distance ordering
-        // in the response. A multi-partition batch read would return items in
-        // token order instead, which would be wrong.
-        // FIXME: do this more efficiently with a batched read that preserves
-        // ordering.
-        for (const auto& pkey : pkeys) {
-            std::vector<query::clustering_range> bounds{
-                    query::clustering_range::make_open_ended_both_sides()};
-            auto partition_slice = query::partition_slice(std::move(bounds), {},
-                    regular_columns, selection->get_query_options());
-            auto command = ::make_lw_shared<query::read_command>(
-                    base_schema->id(), base_schema->version(), partition_slice,
-                    proxy.get_max_result_size(partition_slice),
-                    query::tombstone_limit(proxy.get_tombstone_limit()));
-            service::storage_proxy::coordinator_query_result qr =
-                    co_await proxy.query(base_schema, command,
-                            {dht::partition_range(pkey.partition)},
-                            db::consistency_level::LOCAL_ONE,
-                            service::storage_proxy::coordinator_query_options(
-                                    timeout, permit, client_state, trace_state));
-            auto opt_item = executor::describe_single_item(base_schema, partition_slice,
-                    *selection, *qr.query_result, *attrs_to_get);
-            if (opt_item && (!flt || flt.check(*opt_item))) {
-                ++matched_count;
-                if (select != select_type::count) {
-                    if (select == select_type::projection) {
-                        // A filter caused us to fall through here instead of
-                        // taking the projection early-exit above. Reconstruct
-                        // the key-only item from the full item we fetched.
-                        rjson::value key_item = rjson::empty_object();
-                        for (const column_definition& cdef : base_schema->partition_key_columns()) {
-                            if (const rjson::value* v = rjson::find(*opt_item, cdef.name_as_text())) {
-                                rjson::add_with_string_name(key_item, cdef.name_as_text(), rjson::copy(*v));
-                            }
-                        }
-                        rjson::push_back(items_json, std::move(key_item));
-                    } else {
-                        // When a filter caused us to fetch the full item, apply the
-                        // requested projection (attrs_to_get_opt) before returning it.
-                        // This mirrors describe_items_visitor::end_row() which removes
-                        // extra filter attributes from the returned item.
-                        if (flt && attrs_to_get_opt) {
-                            for (const auto& [attr_name, subpath] : *attrs_to_get_opt) {
-                                if (!subpath.has_value()) {
-                                    if (rjson::value* toplevel = rjson::find(*opt_item, attr_name)) {
-                                        if (!hierarchy_filter(*toplevel, subpath)) {
-                                            rjson::remove_member(*opt_item, attr_name);
-                                        }
-                                    }
-                                }
-                            }
-                            std::vector<std::string> to_remove;
-                            for (auto it = opt_item->MemberBegin(); it != opt_item->MemberEnd(); ++it) {
-                                std::string key(it->name.GetString(), it->name.GetStringLength());
-                                if (!attrs_to_get_opt->contains(key)) {
-                                    to_remove.push_back(std::move(key));
-                                }
-                            }
-                            for (const auto& key : to_remove) {
-                                rjson::remove_member(*opt_item, key);
-                            }
-                        }
-                        rjson::push_back(items_json, std::move(*opt_item));
-                    }
-                }
-            }
-        }
-    } else {
-        // Hash+range table: query each (partition, clustering) pair individually.
-        // FIXME: do this more efficiently!!!
-        for (const auto& pkey : pkeys) {
-            std::vector<query::clustering_range> bounds{
-                    query::clustering_range::make_singular(pkey.clustering)};
-            auto partition_slice = query::partition_slice(std::move(bounds), {},
-                    regular_columns, selection->get_query_options());
-            auto command = ::make_lw_shared<query::read_command>(
-                    base_schema->id(), base_schema->version(), partition_slice,
-                    proxy.get_max_result_size(partition_slice),
-                    query::tombstone_limit(proxy.get_tombstone_limit()));
-            service::storage_proxy::coordinator_query_result qr =
-                    co_await proxy.query(base_schema, command,
-                            {dht::partition_range(pkey.partition)},
-                            db::consistency_level::LOCAL_ONE,
-                            service::storage_proxy::coordinator_query_options(
-                                    timeout, permit, client_state, trace_state));
-            auto opt_item = executor::describe_single_item(base_schema, partition_slice,
-                    *selection, *qr.query_result, *attrs_to_get);
-            if (opt_item && (!flt || flt.check(*opt_item))) {
-                ++matched_count;
-                if (select != select_type::count) {
-                    if (select == select_type::projection) {
-                        // A filter caused us to fall through here; project to keys.
-                        rjson::value key_item = rjson::empty_object();
-                        for (const column_definition& cdef : base_schema->partition_key_columns()) {
-                            if (const rjson::value* v = rjson::find(*opt_item, cdef.name_as_text())) {
-                                rjson::add_with_string_name(key_item, cdef.name_as_text(), rjson::copy(*v));
-                            }
-                        }
-                        for (const column_definition& cdef : base_schema->clustering_key_columns()) {
-                            if (const rjson::value* v = rjson::find(*opt_item, cdef.name_as_text())) {
-                                rjson::add_with_string_name(key_item, cdef.name_as_text(), rjson::copy(*v));
-                            }
-                        }
-                        rjson::push_back(items_json, std::move(key_item));
-                    } else {
-                        // When a filter caused us to fetch the full item, apply the
-                        // requested projection (attrs_to_get_opt) before returning it.
-                        // This mirrors describe_items_visitor::end_row() which removes
-                        // extra filter attributes from the returned item.
-                        if (flt && attrs_to_get_opt) {
-                            for (const auto& [attr_name, subpath] : *attrs_to_get_opt) {
-                                if (!subpath.has_value()) {
-                                    if (rjson::value* toplevel = rjson::find(*opt_item, attr_name)) {
-                                        if (!hierarchy_filter(*toplevel, subpath)) {
-                                            rjson::remove_member(*opt_item, attr_name);
-                                        }
-                                    }
-                                }
-                            }
-                            std::vector<std::string> to_remove;
-                            for (auto it = opt_item->MemberBegin(); it != opt_item->MemberEnd(); ++it) {
-                                std::string key(it->name.GetString(), it->name.GetStringLength());
-                                if (!attrs_to_get_opt->contains(key)) {
-                                    to_remove.push_back(std::move(key));
-                                }
-                            }
-                            for (const auto& key : to_remove) {
-                                rjson::remove_member(*opt_item, key);
-                            }
-                        }
-                        rjson::push_back(items_json, std::move(*opt_item));
-                    }
-                }
-            }
-        }
-    }
-
-    rjson::value response = rjson::empty_object();
-    if (select == select_type::count) {
-        rjson::add(response, "Count", rjson::value(matched_count));
-    } else {
-        rjson::add(response, "Count", rjson::value(static_cast<int>(items_json.Size())));
-        rjson::add(response, "Items", std::move(items_json));
-    }
-    rjson::add(response, "ScannedCount", rjson::value(static_cast<int>(pkeys.size())));
-    co_return rjson::print(std::move(response));
-}
-
-future<executor::request_return_type> executor::query(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
-    _stats.api_operations.query++;
-    elogger.trace("Querying {}", request);
-
-    if (rjson::find(request, "VectorSearch")) {
-        // If vector search is requested, we have a separate code path.
-        // IndexName must be given and must refer to a vector index - not
-        // to a GSI or LSI as the code below assumes.
-        return query_vector(_proxy, _vsc, std::move(request), client_state, trace_state, std::move(permit),
-                _enforce_authorization, _warn_authorization, _stats, *_parsed_expression_cache);
-    }
-
-    auto [schema, table_type] = get_table_or_view(_proxy, request);
-    db::consistency_level cl = get_read_consistency(request);
-
-    maybe_audit(audit_info, audit::statement_category::QUERY, schema->ks_name(), schema->cf_name(), "Query", request, cl);
-
-    get_stats_from_schema(_proxy, *schema)->api_operations.query++;
-    tracing::add_alternator_table_name(trace_state, schema->cf_name());
-
-    rjson::value* exclusive_start_key = rjson::find(request, "ExclusiveStartKey");
-    if (table_type == table_or_view_type::gsi && cl != db::consistency_level::LOCAL_ONE) {
-        return make_ready_future<request_return_type>(api_error::validation(
-                "Consistent reads are not allowed on global indexes (GSI)"));
-    }
-    rjson::value* limit_json = rjson::find(request, "Limit");
-    uint32_t limit = limit_json ? limit_json->GetUint64() : std::numeric_limits<uint32_t>::max();
-    if (limit <= 0) {
-        return make_ready_future<request_return_type>(api_error::validation("Limit must be greater than 0"));
-    }
-
-    const bool forward = get_bool_attribute(request, "ScanIndexForward", true);
-
-    rjson::value* key_conditions = rjson::find(request, "KeyConditions");
-    rjson::value* key_condition_expression = rjson::find(request, "KeyConditionExpression");
-    std::unordered_set<std::string> used_attribute_values;
-    std::unordered_set<std::string> used_attribute_names;
-    if (key_conditions && key_condition_expression) {
-        throw api_error::validation("Query does not allow both "
-                "KeyConditions and KeyConditionExpression to be given together");
-    } else if (!key_conditions && !key_condition_expression) {
-        throw api_error::validation("Query must have one of "
-                "KeyConditions or KeyConditionExpression");
-    }
-
-    const rjson::value* expression_attribute_names = rjson::find(request, "ExpressionAttributeNames");
-    const rjson::value* expression_attribute_values = rjson::find(request, "ExpressionAttributeValues");
-
-    // exactly one of key_conditions or key_condition_expression
-    auto [partition_ranges, ck_bounds] = key_conditions
-                ? calculate_bounds_conditions(schema, *key_conditions)
-                : calculate_bounds_condition_expression(schema, *key_condition_expression,
-                        expression_attribute_values,
-                        used_attribute_values,
-                        expression_attribute_names,
-                        used_attribute_names, *_parsed_expression_cache);
-
-    filter filter(*_parsed_expression_cache, request, filter::request_type::QUERY,
-            used_attribute_names, used_attribute_values);
-
-    // A query is not allowed to filter on the partition key or the sort key.
-    for (const column_definition& cdef : schema->partition_key_columns()) { // just one
-        if (filter.filters_on(cdef.name_as_text())) {
-            return make_ready_future<request_return_type>(api_error::validation(
-                    format("QueryFilter can only contain non-primary key attributes: Partition key attribute: {}", cdef.name_as_text())));
-        }
-    }
-    for (const column_definition& cdef : schema->clustering_key_columns()) {
-        if (filter.filters_on(cdef.name_as_text())) {
-            return make_ready_future<request_return_type>(api_error::validation(
-                    format("QueryFilter can only contain non-primary key attributes: Sort key attribute: {}", cdef.name_as_text())));
-        }
-        // FIXME: this "break" can avoid listing some clustering key columns
-        // we added for GSIs just because they existed in the base table -
-        // but not in all cases. We still have issue #5320.
-        break;
-    }
-
-    select_type select = parse_select(request, table_type);
-
-    auto attrs_to_get = calculate_attrs_to_get(request, *_parsed_expression_cache, used_attribute_names, select);
-    verify_all_are_used(expression_attribute_names, used_attribute_names, "ExpressionAttributeNames", "Query");
-    verify_all_are_used(expression_attribute_values, used_attribute_values, "ExpressionAttributeValues", "Query");
-    query::partition_slice::option_set opts;
-    opts.set_if<query::partition_slice::option::reversed>(!forward);
-    return do_query(_proxy, schema, exclusive_start_key, std::move(partition_ranges), std::move(ck_bounds), std::move(attrs_to_get), limit, cl,
-            std::move(filter), opts, client_state, _stats, std::move(trace_state), std::move(permit), _enforce_authorization, _warn_authorization);
-}
 
 future<executor::request_return_type> executor::list_tables(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
     _stats.api_operations.list_tables++;
diff --git a/alternator/executor.hh b/alternator/executor.hh
index ad880418e7..524354d1f8 100644
--- a/alternator/executor.hh
+++ b/alternator/executor.hh
@@ -67,6 +67,8 @@ class gossiper;
 
 class schema_builder;
 
+#include "alternator/attribute_path.hh"
+
 namespace alternator {
 
 enum class table_status;
@@ -78,71 +80,6 @@ bool is_alternator_keyspace(const sstring& ks_name);
 // Wraps the db::get_tags_of_table and throws if the table is missing the tags extension.
 const std::map<sstring, sstring>& get_tags_of_table_or_throw(schema_ptr schema);
 
-// An attribute_path_map object is used to hold data for various attributes
-// paths (parsed::path) in a hierarchy of attribute paths. Each attribute path
-// has a root attribute, and then modified by member and index operators -
-// for example in "a.b[2].c" we have "a" as the root, then ".b" member, then
-// "[2]" index, and finally ".c" member.
-// Data can be added to an attribute_path_map using the add() function, but
-// requires that attributes with data not be *overlapping* or *conflicting*:
-//
-// 1. Two attribute paths which are identical or an ancestor of one another
-//    are considered *overlapping* and not allowed. If a.b.c has data,
-//    we can't add more data in a.b.c or any of its descendants like a.b.c.d.
-//
-// 2. Two attribute paths which need the same parent to have both a member and
-//    an index are considered *conflicting* and not allowed. E.g., if a.b has
-//    data, you can't add a[1]. The meaning of adding both would be that the
-//    attribute a is both a map and an array, which isn't sensible.
-//
-// These two requirements are common to the two places where Alternator uses
-// this abstraction to describe how a hierarchical item is to be transformed:
-//
-// 1. In ProjectExpression: for filtering from a full top-level attribute
-//    only the parts for which user asked in ProjectionExpression.
-//
-// 2. In UpdateExpression: for taking the previous value of a top-level
-//    attribute, and modifying it based on the instructions in the user
-//    wrote in UpdateExpression.
-
-template<typename T>
-class attribute_path_map_node {
-public:
-    using data_t = T;
-    // We need the extra unique_ptr<> here because libstdc++ unordered_map
-    // doesn't work with incomplete types :-(
-    using members_t =  std::unordered_map<std::string, std::unique_ptr<attribute_path_map_node<T>>>;
-    // The indexes list is sorted because DynamoDB requires handling writes
-    // beyond the end of a list in index order.
-    using indexes_t = std::map<unsigned, std::unique_ptr<attribute_path_map_node<T>>>;
-    // The prohibition on "overlap" and "conflict" explained above means
-    // That only one of data, members or indexes is non-empty.
-    std::optional<std::variant<data_t, members_t, indexes_t>> _content;
-
-    bool is_empty() const { return !_content; }
-    bool has_value() const { return _content && std::holds_alternative<data_t>(*_content); }
-    bool has_members() const { return _content && std::holds_alternative<members_t>(*_content); }
-    bool has_indexes() const { return _content && std::holds_alternative<indexes_t>(*_content); }
-    // get_members() assumes that has_members() is true
-    members_t& get_members() { return std::get<members_t>(*_content); }
-    const members_t& get_members() const { return std::get<members_t>(*_content); }
-    indexes_t& get_indexes() { return std::get<indexes_t>(*_content); }
-    const indexes_t& get_indexes() const { return std::get<indexes_t>(*_content); }
-    T& get_value() { return std::get<T>(*_content); }
-    const T& get_value() const { return std::get<T>(*_content); }
-};
-
-template<typename T>
-using attribute_path_map = std::unordered_map<std::string, attribute_path_map_node<T>>;
-
-using attrs_to_get_node = attribute_path_map_node<std::monostate>;
-// attrs_to_get lists which top-level attribute are needed, and possibly also
-// which part of the top-level attribute is really needed (when nested
-// attribute paths appeared in the query).
-// Most code actually uses optional<attrs_to_get>. There, a disengaged
-// optional means we should get all attributes, not specific ones.
-using attrs_to_get = attribute_path_map<std::monostate>;
-
 namespace parsed {
 class expression_cache;
 }
@@ -351,4 +288,24 @@ arn_parts parse_arn(std::string_view arn, std::string_view arn_field_name, std::
 // The format is ks1|ks2|ks3... and table1|table2|table3...
 sstring print_names_for_audit(const std::set<sstring>& names);
 
+map_type attrs_type();
+lw_shared_ptr<stats> get_stats_from_schema(service::storage_proxy& sp, const schema& schema);
+std::string view_name(std::string_view table_name, std::string_view index_name,
+        const std::string& delim = ":", bool validate_len = true);
+std::string gsi_name(std::string_view table_name, std::string_view index_name,
+        bool validate_len = true);
+std::string lsi_name(std::string_view table_name, std::string_view index_name,
+        bool validate_len = true);
+std::string get_table_name(const rjson::value& request);
+schema_ptr try_get_internal_table(data_dictionary::database db, std::string_view table_name);
+std::optional<int> get_int_attribute(const rjson::value& value, std::string_view attribute_name);
+bool get_bool_attribute(const rjson::value& value, std::string_view attribute_name, bool default_return);
+void check_key(const rjson::value& key, const schema_ptr& schema);
+schema_ptr get_table_from_batch_request(const service::storage_proxy& proxy, const rjson::value::ConstMemberIterator& batch_request);
+void verify_all_are_used(
+        const rjson::value* field,
+        const std::unordered_set<std::string>& used,
+        const char* field_name,
+        const char* operation);
+
 }
diff --git a/alternator/executor_read.cc b/alternator/executor_read.cc
new file mode 100644
index 0000000000..05bc63a2b7
--- /dev/null
+++ b/alternator/executor_read.cc
@@ -0,0 +1,1997 @@
+/*
+ * Copyright 2019-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
+ */
+
+// This file implements the Alternator read operations: GetItem, BatchGetItem,
+// Query (including vector search) and Scan.
+// Public entry points:
+//   * executor::get_item()
+//   * executor::batch_get_item()
+//   * executor::scan()
+//   * executor::query()
+// Major internal functions:
+//   * do_query(): the common code for Query and Scan, except vector search.
+//   * query_vector(): the vector-search code path for Query with VectorSearch.
+// and a number of helper functions for parsing common parameters of read
+// requests such as TableName, IndexName, Select, FilterExpression,
+// ConsistentRead, ProjectionExpression, and more.
+
+#include "alternator/executor.hh"
+#include "alternator/conditions.hh"
+#include "alternator/expressions.hh"
+#include "alternator/consumed_capacity.hh"
+#include "alternator/serialization.hh"
+#include "alternator/attribute_path.hh"
+#include "auth/permission.hh"
+#include "cql3/selection/selection.hh"
+#include "cql3/result_set.hh"
+#include "query/query-request.hh"
+#include "schema/schema.hh"
+#include "service/client_state.hh"
+#include "service/pager/query_pagers.hh"
+#include "service/storage_proxy.hh"
+#include "index/secondary_index.hh"
+#include "utils/assert.hh"
+#include "utils/overloaded_functor.hh"
+#include "utils/error_injection.hh"
+#include "vector_search/vector_store_client.hh"
+#include <seastar/core/abort_on_expiry.hh>
+#include <seastar/core/coroutine.hh>
+#include <seastar/coroutine/maybe_yield.hh>
+#include <boost/range/algorithm/find_end.hpp>
+#include <charconv>
+
+using namespace std::chrono_literals;
+
+namespace alternator {
+
+extern logging::logger elogger; // from executor.cc
+
+// make_streamed_with_extra_array() is variant of make_streamed() above, which
+// builds a streaming response (a function writing to an output stream) from a
+// JSON object (rjson::value) but adds to it at the end an additional array.
+// The extra array is given a separate chunked_vector to avoid putting it
+// inside the rjson::value - because RapidJSON does contiguous allocations for
+// arrays which we want to avoid for potentially long arrays in Query/Scan
+// responses (see #23535).
+// If we ever fix RapidJSON to avoid contiguous allocations for arrays, or
+// replace it entirely (#24458), we can remove this function and the function
+// rjson::print_with_extra_array() which it calls.
+static executor::body_writer make_streamed_with_extra_array(rjson::value&& value,
+        std::string array_name, utils::chunked_vector<rjson::value>&& array) {
+    return [value = std::move(value), array_name = std::move(array_name), array = std::move(array)](output_stream<char>&& _out) mutable -> future<> {
+        auto out = std::move(_out);
+        std::exception_ptr ex;
+        try {
+            co_await rjson::print_with_extra_array(value, array_name, array, out);
+        } catch (...) {
+            ex = std::current_exception();
+        }
+        co_await out.close();
+        co_await rjson::destroy_gently(std::move(value));
+        // TODO: can/should we also destroy the array gently?
+        if (ex) {
+            co_await coroutine::return_exception_ptr(std::move(ex));
+        }
+    };
+}
+
+// select_type represents how the Select parameter of Query/Scan selects what
+// to return. It is also used by calculate_attrs_to_get() to know whether to
+// return no attributes (count), or specific attributes.
+enum class select_type { regular, count, projection };
+
+// Check according to the request's "ConsistentRead" field, which consistency
+// level we need to use for the read. The field can be True for strongly
+// consistent reads, or False for eventually consistent reads, or if this
+// field is absence, we default to eventually consistent reads.
+// In Scylla, eventually-consistent reads are implemented as consistency
+// level LOCAL_ONE, and strongly-consistent reads as LOCAL_QUORUM.
+static db::consistency_level get_read_consistency(const rjson::value& request) {
+    const rjson::value* consistent_read_value = rjson::find(request, "ConsistentRead");
+    bool consistent_read = false;
+    if (consistent_read_value && !consistent_read_value->IsNull()) {
+        if (consistent_read_value->IsBool()) {
+            consistent_read = consistent_read_value->GetBool();
+        } else {
+            throw api_error::validation("ConsistentRead flag must be a boolean");
+        }
+    }
+    return consistent_read ? db::consistency_level::LOCAL_QUORUM : db::consistency_level::LOCAL_ONE;
+}
+
+// attrs_to_get saves for each top-level attribute an attrs_to_get_node,
+// a hierarchy of subparts that need to be kept. The following function
+// calculate_attrs_to_get() takes either AttributesToGet or
+// ProjectionExpression parameters (having both is *not* allowed),
+// and returns the list of cells we need to read, or a disengaged optional
+// when *all* attributes are to be returned.
+// However, in our current implementation, only top-level attributes are
+// stored as separate cells - a nested document is stored serialized together
+// (as JSON) in the same cell. So this function return a map - each key is the
+// top-level attribute we will need need to read, and the value for each
+// top-level attribute is the partial hierarchy (struct hierarchy_filter)
+// that we will need to extract from that serialized JSON.
+// For example, if ProjectionExpression lists a.b and a.c[2], we
+// return one top-level attribute name, "a", with the value "{b, c[2]}".
+static std::optional<attrs_to_get> calculate_attrs_to_get(const rjson::value& req, parsed::expression_cache& parsed_expression_cache, std::unordered_set<std::string>& used_attribute_names, select_type select = select_type::regular) {
+    if (select == select_type::count) {
+        // An empty map asks to retrieve no attributes. Note that this is
+        // different from a disengaged optional which means retrieve all.
+        return attrs_to_get();
+    }
+    // FIXME: also need to handle select_type::projection
+    const bool has_attributes_to_get = req.HasMember("AttributesToGet");
+    const bool has_projection_expression = req.HasMember("ProjectionExpression");
+    if (has_attributes_to_get && has_projection_expression) {
+        throw api_error::validation(
+                format("GetItem does not allow both ProjectionExpression and AttributesToGet to be given together"));
+    }
+    if (has_attributes_to_get) {
+        const rjson::value& attributes_to_get = req["AttributesToGet"];
+        attrs_to_get ret;
+        for (auto it = attributes_to_get.Begin(); it != attributes_to_get.End(); ++it) {
+            attribute_path_map_add("AttributesToGet", ret, rjson::to_string(*it));
+            validate_attr_name_length("AttributesToGet", it->GetStringLength(), false);
+        }
+        if (ret.empty()) {
+            throw api_error::validation("Empty AttributesToGet is not allowed. Consider using Select=COUNT instead.");
+        }
+        return ret;
+    } else if (has_projection_expression) {
+        const rjson::value& projection_expression = req["ProjectionExpression"];
+        const rjson::value* expression_attribute_names = rjson::find(req, "ExpressionAttributeNames");
+        std::vector<parsed::path> paths_to_get;
+        try {
+            paths_to_get = parsed_expression_cache.parse_projection_expression(rjson::to_string_view(projection_expression));
+        } catch(expressions_syntax_error& e) {
+            throw api_error::validation(e.what());
+        }
+        resolve_projection_expression(paths_to_get, expression_attribute_names, used_attribute_names);
+        attrs_to_get ret;
+        for (const parsed::path& p : paths_to_get) {
+            attribute_path_map_add("ProjectionExpression", ret, p);
+        }
+        return ret;
+    }
+    // A disengaged optional asks to read everything
+    return std::nullopt;
+}
+
+// get_table_or_view() is similar to to get_table(), except it returns either
+// a table or a materialized view from which to read, based on the TableName
+// and optional IndexName in the request. Only requests like Query and Scan
+// which allow IndexName should use this function.
+enum class table_or_view_type { base, lsi, gsi, vector_index };
+static std::pair<schema_ptr, table_or_view_type>
+get_table_or_view(service::storage_proxy& proxy, const rjson::value& request) {
+    table_or_view_type type = table_or_view_type::base;
+    std::string table_name = get_table_name(request);
+
+    if (schema_ptr s = try_get_internal_table(proxy.data_dictionary(), table_name)) {
+        return {s, type};
+    }
+
+    std::string keyspace_name = executor::KEYSPACE_NAME_PREFIX + table_name;
+    const rjson::value* index_name = rjson::find(request, "IndexName");
+    std::string orig_table_name;
+    if (index_name) {
+        if (index_name->IsString()) {
+            orig_table_name = std::move(table_name);
+            table_name = view_name(orig_table_name, rjson::to_string_view(*index_name));
+            type = table_or_view_type::gsi;
+        } else {
+            throw api_error::validation(
+                    fmt::format("Non-string IndexName '{}'", rjson::to_string_view(*index_name)));
+        }
+        // If no tables for global indexes were found, the index may be local
+        if (!proxy.data_dictionary().has_schema(keyspace_name, table_name)) {
+            type = table_or_view_type::lsi;
+            table_name = lsi_name(orig_table_name, rjson::to_string_view(*index_name));
+        }
+    }
+
+    try {
+        return { proxy.data_dictionary().find_schema(keyspace_name, table_name), type };
+    } catch(data_dictionary::no_such_column_family&) {
+        if (index_name) {
+            // DynamoDB returns a different error depending on whether the
+            // base table doesn't exist (ResourceNotFoundException) or it
+            // does exist but the index does not (ValidationException).
+            if (proxy.data_dictionary().has_schema(keyspace_name, orig_table_name)) {
+                throw api_error::validation(
+                    fmt::format("Requested resource not found: Index '{}' for table '{}'", rjson::to_string_view(*index_name), orig_table_name));
+            } else {
+                throw api_error::resource_not_found(
+                    fmt::format("Requested resource not found: Table: {} not found", orig_table_name));
+            }
+        } else {
+            throw api_error::resource_not_found(
+                fmt::format("Requested resource not found: Table: {} not found", table_name));
+        }
+    }
+}
+
+
+// Parse the "Select" parameter of a Scan or Query operation, throwing a
+// ValidationException in various forbidden combinations of options and
+// finally returning one of three options:
+// 1. regular - the default scan behavior of returning all or specific
+//    attributes ("ALL_ATTRIBUTES" or "SPECIFIC_ATTRIBUTES").
+// 2. count - just count the items ("COUNT")
+// 3. projection - return projected attributes ("ALL_PROJECTED_ATTRIBUTES")
+// An ValidationException is thrown when recognizing an invalid combination
+// of options - such as ALL_PROJECTED_ATTRIBUTES for a base table, or
+// SPECIFIC_ATTRIBUTES without ProjectionExpression or AttributesToGet.
+static select_type parse_select(const rjson::value& request, table_or_view_type table_type) {
+    const rjson::value* select_value = rjson::find(request, "Select");
+    if (!select_value) {
+        // If "Select" is not specified, it defaults to ALL_ATTRIBUTES
+        // on a base table or vector index, or ALL_PROJECTED_ATTRIBUTES on GSI/LSI.
+        return (table_type == table_or_view_type::base || table_type == table_or_view_type::vector_index) ?
+            select_type::regular : select_type::projection;
+    }
+    if (!select_value->IsString()) {
+        throw api_error::validation("Select parameter must be a string");
+    }
+    std::string_view select = rjson::to_string_view(*select_value);
+    const bool has_attributes_to_get = request.HasMember("AttributesToGet");
+    const bool has_projection_expression = request.HasMember("ProjectionExpression");
+    if (select == "SPECIFIC_ATTRIBUTES") {
+        if (has_projection_expression || has_attributes_to_get) {
+            return select_type::regular;
+        }
+        throw api_error::validation("Select=SPECIFIC_ATTRIBUTES requires AttributesToGet or ProjectionExpression");
+    }
+    if (has_projection_expression || has_attributes_to_get) {
+        throw api_error::validation("AttributesToGet or ProjectionExpression require Select to be either SPECIFIC_ATTRIBUTES or missing");
+    }
+    if (select == "COUNT") {
+        return select_type::count;
+    }
+    if (select == "ALL_ATTRIBUTES") {
+        // FIXME: when we support projections (#5036), if this is a GSI and
+        // not all attributes are projected to it, we should throw.
+        return select_type::regular;
+    }
+    if (select == "ALL_PROJECTED_ATTRIBUTES") {
+        if (table_type == table_or_view_type::base) {
+            throw api_error::validation("ALL_PROJECTED_ATTRIBUTES only allowed for indexes");
+        }
+        return select_type::projection;
+    }
+    throw api_error::validation(fmt::format("Unknown Select value '{}'. Allowed choices: ALL_ATTRIBUTES, SPECIFIC_ATTRIBUTES, ALL_PROJECTED_ATTRIBUTES, COUNT",
+        select));
+}
+
+// "filter" represents a condition that can be applied to individual items
+// read by a Query or Scan operation, to decide whether to keep the item.
+// A filter is constructed from a Query or Scan request. This uses the
+// relevant fields in the query (FilterExpression or QueryFilter/ScanFilter +
+// ConditionalOperator). These fields are pre-checked and pre-parsed as much
+// as possible, to ensure that later checking of many items is efficient.
+class filter {
+private:
+    // Holding QueryFilter/ScanFilter + ConditionalOperator:
+    struct conditions_filter {
+        bool require_all;
+        rjson::value conditions;
+    };
+    // Holding a parsed FilterExpression:
+    struct expression_filter {
+        parsed::condition_expression expression;
+    };
+    std::optional<std::variant<conditions_filter, expression_filter>> _imp;
+public:
+    // Filtering for Scan and Query are very similar, but there are some
+    // small differences, especially the names of the request attributes.
+    enum class request_type { SCAN, QUERY };
+    // Note that a filter does not store pointers to the query used to
+    // construct it.
+    filter(parsed::expression_cache& parsed_expression_cache, const rjson::value& request, request_type rt,
+            std::unordered_set<std::string>& used_attribute_names,
+            std::unordered_set<std::string>& used_attribute_values);
+    bool check(const rjson::value& item) const;
+    bool filters_on(std::string_view attribute) const;
+    // for_filters_on() runs the given function on the attributes that the
+    // filter works on. It may run for the same attribute more than once if
+    // used more than once in the filter.
+    void for_filters_on(const noncopyable_function<void(std::string_view)>& func) const;
+    operator bool() const { return bool(_imp); }
+};
+
+filter::filter(parsed::expression_cache& parsed_expression_cache, const rjson::value& request, request_type rt,
+        std::unordered_set<std::string>& used_attribute_names,
+        std::unordered_set<std::string>& used_attribute_values) {
+    const rjson::value* expression = rjson::find(request, "FilterExpression");
+    const char* conditions_attribute = (rt == request_type::SCAN) ? "ScanFilter" : "QueryFilter";
+    const rjson::value* conditions = rjson::find(request, conditions_attribute);
+    auto conditional_operator = get_conditional_operator(request);
+    if (conditional_operator != conditional_operator_type::MISSING &&
+        (!conditions || (conditions->IsObject() && conditions->GetObject().ObjectEmpty()))) {
+            throw api_error::validation(
+                    format("'ConditionalOperator' parameter cannot be specified for missing or empty {}",
+                            conditions_attribute));
+    }
+    if (expression && conditions) {
+        throw api_error::validation(
+                format("FilterExpression and {} are not allowed together", conditions_attribute));
+    }
+    if (expression) {
+        if (!expression->IsString()) {
+            throw api_error::validation("FilterExpression must be a string");
+        }
+        if (expression->GetStringLength() == 0) {
+            throw api_error::validation("FilterExpression must not be empty");
+        }
+        if (rjson::find(request, "AttributesToGet")) {
+            throw api_error::validation("Cannot use both old-style and new-style parameters in same request: FilterExpression and AttributesToGet");
+        }
+        try {
+            auto parsed = parsed_expression_cache.parse_condition_expression(rjson::to_string_view(*expression), "FilterExpression");
+            const rjson::value* expression_attribute_names = rjson::find(request, "ExpressionAttributeNames");
+            const rjson::value* expression_attribute_values = rjson::find(request, "ExpressionAttributeValues");
+            resolve_condition_expression(parsed,
+                    expression_attribute_names, expression_attribute_values,
+                    used_attribute_names, used_attribute_values);
+            _imp = expression_filter { std::move(parsed) };
+        } catch(expressions_syntax_error& e) {
+            throw api_error::validation(e.what());
+        }
+    }
+    if (conditions) {
+        if (rjson::find(request, "ProjectionExpression")) {
+            throw api_error::validation(format("Cannot use both old-style and new-style parameters in same request: {} and ProjectionExpression", conditions_attribute));
+        }
+        bool require_all = conditional_operator != conditional_operator_type::OR;
+        _imp = conditions_filter { require_all, rjson::copy(*conditions) };
+    }
+}
+
+bool filter::check(const rjson::value& item) const {
+    if (!_imp) {
+        return true;
+    }
+    return std::visit(overloaded_functor {
+        [&] (const conditions_filter& f) -> bool {
+            return verify_condition(f.conditions, f.require_all, &item);
+        },
+        [&] (const expression_filter& f) -> bool {
+            return verify_condition_expression(f.expression, &item);
+        }
+    }, *_imp);
+}
+
+bool filter::filters_on(std::string_view attribute) const {
+    if (!_imp) {
+        return false;
+    }
+    return std::visit(overloaded_functor {
+        [&] (const conditions_filter& f) -> bool {
+            for (auto it = f.conditions.MemberBegin(); it != f.conditions.MemberEnd(); ++it) {
+                if (rjson::to_string_view(it->name) == attribute) {
+                    return true;
+                }
+            }
+            return false;
+        },
+        [&] (const expression_filter& f) -> bool {
+            return condition_expression_on(f.expression, attribute);
+        }
+    }, *_imp);
+}
+
+void filter::for_filters_on(const noncopyable_function<void(std::string_view)>& func) const {
+    if (_imp) {
+        std::visit(overloaded_functor {
+            [&] (const conditions_filter& f) -> void {
+                for (auto it = f.conditions.MemberBegin(); it != f.conditions.MemberEnd(); ++it) {
+                    func(rjson::to_string_view(it->name));
+                }
+            },
+            [&] (const expression_filter& f) -> void {
+                return for_condition_expression_on(f.expression, func);
+            }
+        }, *_imp);
+    }
+}
+
+class describe_items_visitor {
+    typedef std::vector<const column_definition*> columns_t;
+    const columns_t& _columns;
+    const std::optional<attrs_to_get>& _attrs_to_get;
+    std::unordered_set<std::string> _extra_filter_attrs;
+    const filter& _filter;
+    typename columns_t::const_iterator _column_it;
+    rjson::value _item;
+    // _items is a chunked_vector<rjson::value> instead of a RapidJson array
+    // (rjson::value) because unfortunately RapidJson arrays are stored
+    // contiguously in memory, and cause large allocations when a Query/Scan
+    // returns a long list of short items (issue #23535).
+    utils::chunked_vector<rjson::value> _items;
+    size_t _scanned_count;
+
+public:
+    describe_items_visitor(const columns_t& columns, const std::optional<attrs_to_get>& attrs_to_get, filter& filter)
+            : _columns(columns)
+            , _attrs_to_get(attrs_to_get)
+            , _filter(filter)
+            , _column_it(columns.begin())
+            , _item(rjson::empty_object())
+            , _scanned_count(0)
+    {
+        // _filter.check() may need additional attributes not listed in
+        // _attrs_to_get (i.e., not requested as part of the output).
+        // We list those in _extra_filter_attrs. We will include them in
+        // the JSON but take them out before finally returning the JSON.
+        if (_attrs_to_get) {
+            _filter.for_filters_on([&] (std::string_view attr) {
+                std::string a(attr); // no heterogeneous maps searches :-(
+                if (!_attrs_to_get->contains(a)) {
+                    _extra_filter_attrs.emplace(std::move(a));
+                }
+            });
+        }
+    }
+
+    void start_row() {
+        _column_it = _columns.begin();
+    }
+
+    void accept_value(managed_bytes_view_opt result_bytes_view) {
+        if (!result_bytes_view) {
+            ++_column_it;
+            return;
+        }
+        result_bytes_view->with_linearized([this] (bytes_view bv) {
+            std::string column_name = (*_column_it)->name_as_text();
+            if (column_name != executor::ATTRS_COLUMN_NAME) {
+                if (!_attrs_to_get || _attrs_to_get->contains(column_name) || _extra_filter_attrs.contains(column_name)) {
+                    if (!_item.HasMember(column_name.c_str())) {
+                        rjson::add_with_string_name(_item, column_name, rjson::empty_object());
+                    }
+                    rjson::value& field = _item[column_name.c_str()];
+                    rjson::add_with_string_name(field, type_to_string((*_column_it)->type), json_key_column_value(bv, **_column_it));
+                }
+            } else {
+                auto deserialized = attrs_type()->deserialize(bv);
+                auto keys_and_values = value_cast<map_type_impl::native_type>(deserialized);
+                for (auto entry : keys_and_values) {
+                    std::string attr_name = value_cast<sstring>(entry.first);
+                    if (!_attrs_to_get || _attrs_to_get->contains(attr_name) || _extra_filter_attrs.contains(attr_name)) {
+                        bytes value = value_cast<bytes>(entry.second);
+                        // Even if _attrs_to_get asked to keep only a part of a
+                        // top-level attribute, we keep the entire attribute
+                        // at this stage, because the item filter might still
+                        // need the other parts (it was easier for us to keep
+                        // extra_filter_attrs at top-level granularity). We'll
+                        // filter the unneeded parts after item filtering.
+                        rjson::add_with_string_name(_item, attr_name, deserialize_item(value));
+                    }
+                }
+            }
+        });
+        ++_column_it;
+    }
+
+    void end_row() {
+        if (_filter.check(_item)) {
+            // As noted above, we kept entire top-level attributes listed in
+            // _attrs_to_get. We may need to only keep parts of them.
+            if (_attrs_to_get) {
+                for (const auto& attr: *_attrs_to_get) {
+                    // If !attr.has_value() it means we were asked not to keep
+                    // attr entirely, but just parts of it.
+                    if (!attr.second.has_value()) {
+                        rjson::value* toplevel= rjson::find(_item, attr.first);
+                        if (toplevel && !hierarchy_filter(*toplevel, attr.second)) {
+                            rjson::remove_member(_item, attr.first);
+                        }
+                    }
+                }
+            }
+            // Remove the extra attributes _extra_filter_attrs which we had
+            // to add just for the filter, and not requested to be returned:
+            for (const auto& attr : _extra_filter_attrs) {
+                rjson::remove_member(_item, attr);
+            }
+
+            _items.push_back(std::move(_item));
+        }
+        _item = rjson::empty_object();
+        ++_scanned_count;
+    }
+
+    utils::chunked_vector<rjson::value> get_items() && {
+        return std::move(_items);
+    }
+
+    size_t get_scanned_count() {
+        return _scanned_count;
+    }
+};
+
+// describe_items() returns a JSON object that includes members "Count"
+// and "ScannedCount", but *not* "Items" - that is returned separately
+// as a chunked_vector to avoid large contiguous allocations which
+// RapidJSON does of its array. The caller should add "Items" to the
+// returned JSON object if needed, or print it separately.
+// The returned chunked_vector (the items) is std::optional<>, because
+// the user may have requested only to count items, and not return any
+// items - which is different from returning an empty list of items.
+static future<std::tuple<rjson::value, std::optional<utils::chunked_vector<rjson::value>>, size_t>> describe_items(
+        const cql3::selection::selection& selection,
+        std::unique_ptr<cql3::result_set> result_set,
+        std::optional<attrs_to_get>&& attrs_to_get,
+        filter&& filter) {
+    describe_items_visitor visitor(selection.get_columns(), attrs_to_get, filter);
+    co_await result_set->visit_gently(visitor);
+    auto scanned_count = visitor.get_scanned_count();
+    utils::chunked_vector<rjson::value> items = std::move(visitor).get_items();
+    rjson::value items_descr = rjson::empty_object();
+    auto size = items.size();
+    rjson::add(items_descr, "Count", rjson::value(size));
+    rjson::add(items_descr, "ScannedCount", rjson::value(scanned_count));
+    // If attrs_to_get && attrs_to_get->empty(), this means the user asked not
+    // to get any attributes (i.e., a Scan or Query with Select=COUNT) and we
+    // shouldn't return "Items" at all.
+    // TODO: consider optimizing the case of Select=COUNT without a filter.
+    // In that case, we currently build a list of empty items and here drop
+    // it. We could just count the items and not bother with the empty items.
+    // (However, remember that when we do have a filter, we need the items).
+    std::optional<utils::chunked_vector<rjson::value>> opt_items;
+    if (!attrs_to_get || !attrs_to_get->empty()) {
+        opt_items = std::move(items);
+    }
+    co_return std::tuple(std::move(items_descr), std::move(opt_items), size);
+}
+
+static rjson::value encode_paging_state(const schema& schema, const service::pager::paging_state& paging_state) {
+    rjson::value last_evaluated_key = rjson::empty_object();
+    std::vector<bytes> exploded_pk = paging_state.get_partition_key().explode();
+    auto exploded_pk_it = exploded_pk.begin();
+    for (const column_definition& cdef : schema.partition_key_columns()) {
+        rjson::add_with_string_name(last_evaluated_key, std::string_view(cdef.name_as_text()), rjson::empty_object());
+        rjson::value& key_entry = last_evaluated_key[cdef.name_as_text()];
+        rjson::add_with_string_name(key_entry, type_to_string(cdef.type), json_key_column_value(*exploded_pk_it, cdef));
+        ++exploded_pk_it;
+    }
+    auto pos = paging_state.get_position_in_partition();
+    if (pos.has_key()) {
+        // Alternator itself allows at most one column in clustering key, but 
+        // user can use Alternator api to access system tables which might have
+        // multiple clustering key columns. So we need to handle that case here.
+        auto cdef_it = schema.clustering_key_columns().begin();        
+        for(const auto &exploded_ck : pos.key().explode()) {
+            rjson::add_with_string_name(last_evaluated_key, std::string_view(cdef_it->name_as_text()), rjson::empty_object());
+            rjson::value& key_entry = last_evaluated_key[cdef_it->name_as_text()];
+            rjson::add_with_string_name(key_entry, type_to_string(cdef_it->type), json_key_column_value(exploded_ck, *cdef_it));
+            ++cdef_it;
+        }
+    }
+    // To avoid possible conflicts (and thus having to reserve these names) we
+    // avoid adding the weight and region fields of the position to the paging
+    // state. Alternator will never need these as it doesn't have range
+    // tombstones (the only thing that can generate a position other than at(row)).
+    // We conditionally include these fields when reading CQL tables through alternator.
+    if (!is_alternator_keyspace(schema.ks_name()) && (!pos.has_key() || pos.get_bound_weight() != bound_weight::equal)) {
+        rjson::add_with_string_name(last_evaluated_key, scylla_paging_region, rjson::empty_object());
+        rjson::add(last_evaluated_key[scylla_paging_region.data()], "S", rjson::from_string(fmt::to_string(pos.region())));
+        rjson::add_with_string_name(last_evaluated_key, scylla_paging_weight, rjson::empty_object());
+        rjson::add(last_evaluated_key[scylla_paging_weight.data()], "N", static_cast<int>(pos.get_bound_weight()));
+    }
+    return last_evaluated_key;
+}
+
+// RapidJSON allocates arrays contiguously in memory, so we want to avoid
+// returning a large number of items as a single rapidjson array, and use
+// a chunked_vector instead. The following constant is an arbitrary cutoff
+// point for when to switch from a rapidjson array to a chunked_vector.
+static constexpr int max_items_for_rapidjson_array = 256;
+
+static future<executor::request_return_type> do_query(service::storage_proxy& proxy,
+        schema_ptr table_schema,
+        const rjson::value* exclusive_start_key,
+        dht::partition_range_vector partition_ranges,
+        std::vector<query::clustering_range> ck_bounds,
+        std::optional<attrs_to_get> attrs_to_get,
+        uint32_t limit,
+        db::consistency_level cl,
+        filter filter,
+        query::partition_slice::option_set custom_opts,
+        service::client_state& client_state,
+        alternator::stats& stats,
+        tracing::trace_state_ptr trace_state,
+        service_permit permit,
+        bool enforce_authorization,
+        bool warn_authorization) {
+    lw_shared_ptr<service::pager::paging_state> old_paging_state = nullptr;
+
+    tracing::trace(trace_state, "Performing a database query");
+
+    // Reverse the schema and the clustering bounds as the underlying code expects
+    // reversed queries in the native reversed format.
+    auto query_schema = table_schema;
+    const bool reversed = custom_opts.contains<query::partition_slice::option::reversed>();
+    if (reversed) {
+        query_schema = table_schema->get_reversed();
+
+        std::reverse(ck_bounds.begin(), ck_bounds.end());
+        for (auto& bound : ck_bounds) {
+            bound = query::reverse(bound);
+        }
+    }
+
+    if (exclusive_start_key) {
+        partition_key pk = pk_from_json(*exclusive_start_key, table_schema);
+        auto pos = position_in_partition::for_partition_start();
+        if (table_schema->clustering_key_size() > 0) {
+            pos = pos_from_json(*exclusive_start_key, table_schema);
+        }
+        old_paging_state = make_lw_shared<service::pager::paging_state>(pk, pos, query::max_partitions, query_id::create_null_id(), service::pager::paging_state::replicas_per_token_range{}, std::nullopt, 0);
+    }
+
+    co_await verify_permission(enforce_authorization, warn_authorization, client_state, table_schema, auth::permission::SELECT, stats);
+
+    auto regular_columns =
+            table_schema->regular_columns() | std::views::transform(&column_definition::id)
+            | std::ranges::to<query::column_id_vector>();
+    auto static_columns =
+            table_schema->static_columns() | std::views::transform(&column_definition::id)
+            | std::ranges::to<query::column_id_vector>();
+    auto selection = cql3::selection::selection::wildcard(table_schema);
+    query::partition_slice::option_set opts = selection->get_query_options();
+    opts.add(custom_opts);
+    auto partition_slice = query::partition_slice(std::move(ck_bounds), std::move(static_columns), std::move(regular_columns), opts);
+    auto command = ::make_lw_shared<query::read_command>(query_schema->id(), query_schema->version(), partition_slice, proxy.get_max_result_size(partition_slice),
+        query::tombstone_limit(proxy.get_tombstone_limit()));
+
+    elogger.trace("Executing read query (reversed {}): table schema {}, query schema {}", partition_slice.is_reversed(), table_schema->version(), query_schema->version());
+
+    auto query_state_ptr = std::make_unique<service::query_state>(client_state, trace_state, std::move(permit));
+
+    // FIXME: should be moved above, set on opts, so get_max_result_size knows it?
+    command->slice.options.set<query::partition_slice::option::allow_short_read>();
+    auto query_options = std::make_unique<cql3::query_options>(cl, std::vector<cql3::raw_value>{});
+    query_options = std::make_unique<cql3::query_options>(std::move(query_options), std::move(old_paging_state));
+    auto p = service::pager::query_pagers::pager(proxy, query_schema, selection, *query_state_ptr, *query_options, command, std::move(partition_ranges), nullptr);
+
+    std::unique_ptr<cql3::result_set> rs = co_await p->fetch_page(limit, gc_clock::now(), executor::default_timeout());
+    if (!p->is_exhausted()) {
+        rs->get_metadata().set_paging_state(p->state());
+    }
+    auto paging_state = rs->get_metadata().paging_state();
+    bool has_filter = filter;
+    auto [items_descr, opt_items, size] = co_await describe_items(*selection, std::move(rs), std::move(attrs_to_get), std::move(filter));
+    if (paging_state) {
+        rjson::add(items_descr, "LastEvaluatedKey", encode_paging_state(*table_schema, *paging_state));
+    }
+    if (has_filter) {
+        stats.cql_stats.filtered_rows_read_total += p->stats().rows_read_total;
+        // update our "filtered_row_matched_total" for all the rows matched, despited the filter
+        stats.cql_stats.filtered_rows_matched_total += size;
+    }
+    if (opt_items) {
+        if (opt_items->size() >= max_items_for_rapidjson_array) {
+            // There are many items, better print the JSON and the array of
+            // items (opt_items) separately to avoid RapidJSON's contiguous
+            // allocation of arrays.
+            co_return make_streamed_with_extra_array(std::move(items_descr), "Items", std::move(*opt_items));
+        }
+        // There aren't many items in the chunked vector opt_items,
+        // let's just insert them into the JSON object and print the
+        // full JSON normally.
+        rjson::value items_json = rjson::empty_array();
+        for (auto& item : *opt_items) {
+            rjson::push_back(items_json, std::move(item));
+        }
+        rjson::add(items_descr, "Items", std::move(items_json));
+    }
+    if (is_big(items_descr)) {
+        co_return make_streamed(std::move(items_descr));
+    }
+    co_return rjson::print(std::move(items_descr));
+}
+
+static dht::token token_for_segment(int segment, int total_segments) {
+    throwing_assert(total_segments > 1 && segment >= 0 && segment < total_segments);
+    uint64_t delta = std::numeric_limits<uint64_t>::max() / total_segments;
+    return dht::token::from_int64(std::numeric_limits<int64_t>::min() + delta * segment);
+}
+
+static dht::partition_range get_range_for_segment(int segment, int total_segments) {
+    if (total_segments == 1) {
+        return dht::partition_range::make_open_ended_both_sides();
+    }
+    if (segment == 0) {
+        dht::token ending_token = token_for_segment(1, total_segments);
+        return dht::partition_range::make_ending_with(
+                dht::partition_range::bound(dht::ring_position::ending_at(ending_token), false));
+    } else if (segment == total_segments - 1) {
+        dht::token starting_token = token_for_segment(segment, total_segments);
+        return dht::partition_range::make_starting_with(
+                dht::partition_range::bound(dht::ring_position::starting_at(starting_token)));
+    } else {
+        dht::token starting_token = token_for_segment(segment, total_segments);
+        dht::token ending_token = token_for_segment(segment + 1, total_segments);
+        return dht::partition_range::make(
+            dht::partition_range::bound(dht::ring_position::starting_at(starting_token)),
+            dht::partition_range::bound(dht::ring_position::ending_at(ending_token), false)
+        );
+    }
+}
+
+future<executor::request_return_type> executor::scan(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+    _stats.api_operations.scan++;
+    elogger.trace("Scanning {}", request);
+
+    auto [schema, table_type] = get_table_or_view(_proxy, request);
+    db::consistency_level cl = get_read_consistency(request);
+    maybe_audit(audit_info, audit::statement_category::QUERY, schema->ks_name(), schema->cf_name(), "Scan", request, cl);
+    tracing::add_alternator_table_name(trace_state, schema->cf_name());
+    get_stats_from_schema(_proxy, *schema)->api_operations.scan++;
+    auto segment = get_int_attribute(request, "Segment");
+    auto total_segments = get_int_attribute(request, "TotalSegments");
+    if (segment || total_segments) {
+        if (!segment || !total_segments) {
+            return make_ready_future<request_return_type>(api_error::validation(
+                    "Both Segment and TotalSegments attributes need to be present for a parallel scan"));
+        }
+        if (*segment < 0 || *segment >= *total_segments) {
+            return make_ready_future<request_return_type>(api_error::validation(
+                    "Segment must be non-negative and less than TotalSegments"));
+        }
+        if (*total_segments < 0 || *total_segments > 1000000) {
+            return make_ready_future<request_return_type>(api_error::validation(
+                    "TotalSegments must be non-negative and less or equal to 1000000"));
+        }
+    }
+
+    rjson::value* exclusive_start_key = rjson::find(request, "ExclusiveStartKey");
+
+    if (table_type == table_or_view_type::gsi && cl != db::consistency_level::LOCAL_ONE) {
+        return make_ready_future<request_return_type>(api_error::validation(
+                "Consistent reads are not allowed on global indexes (GSI)"));
+    }
+    rjson::value* limit_json = rjson::find(request, "Limit");
+    uint32_t limit = limit_json ? limit_json->GetUint64() : std::numeric_limits<uint32_t>::max();
+    if (limit <= 0) {
+        return make_ready_future<request_return_type>(api_error::validation("Limit must be greater than 0"));
+    }
+
+    select_type select = parse_select(request, table_type);
+
+    std::unordered_set<std::string> used_attribute_names;
+    std::unordered_set<std::string> used_attribute_values;
+    auto attrs_to_get = calculate_attrs_to_get(request, *_parsed_expression_cache, used_attribute_names, select);
+
+    dht::partition_range_vector partition_ranges;
+    if (segment) {
+        auto range = get_range_for_segment(*segment, *total_segments);
+        if (exclusive_start_key) {
+            auto ring_pos = dht::ring_position{dht::decorate_key(*schema, pk_from_json(*exclusive_start_key, schema))};
+            if (!range.contains(ring_pos, dht::ring_position_comparator(*schema))) {
+                return make_ready_future<request_return_type>(api_error::validation(
+                    format("The provided starting key is invalid: Invalid ExclusiveStartKey. Please use ExclusiveStartKey "
+                           "with correct Segment. TotalSegments: {} Segment: {}", *total_segments, *segment)));
+            }
+        }
+        partition_ranges.push_back(range);
+    } else {
+        partition_ranges.push_back(dht::partition_range::make_open_ended_both_sides());
+    }
+    std::vector<query::clustering_range> ck_bounds{query::clustering_range::make_open_ended_both_sides()};
+
+    filter filter(*_parsed_expression_cache, request, filter::request_type::SCAN, used_attribute_names, used_attribute_values);
+    // Note: Unlike Query, Scan does allow a filter on the key attributes.
+    // For some *specific* cases of key filtering, such an equality test on
+    // partition key or comparison operator for the sort key, we could have
+    // optimized the filtering by modifying partition_ranges and/or
+    // ck_bounds. We haven't done this optimization yet.
+
+    const rjson::value* expression_attribute_names = rjson::find(request, "ExpressionAttributeNames");
+    const rjson::value* expression_attribute_values = rjson::find(request, "ExpressionAttributeValues");
+    verify_all_are_used(expression_attribute_names, used_attribute_names, "ExpressionAttributeNames", "Scan");
+    verify_all_are_used(expression_attribute_values, used_attribute_values, "ExpressionAttributeValues", "Scan");
+
+    return do_query(_proxy, schema, exclusive_start_key, std::move(partition_ranges), std::move(ck_bounds), std::move(attrs_to_get), limit, cl,
+            std::move(filter), query::partition_slice::option_set(), client_state, _stats, trace_state, std::move(permit), _enforce_authorization, _warn_authorization);
+}
+
+static dht::partition_range calculate_pk_bound(schema_ptr schema, const column_definition& pk_cdef, const rjson::value& comp_definition, const rjson::value& attrs) {
+    auto op = get_comparison_operator(comp_definition);
+    if (op != comparison_operator_type::EQ) {
+        throw api_error::validation(format("Hash key can only be restricted with equality operator (EQ). {} not supported.", comp_definition));
+    }
+    if (attrs.Size() != 1) {
+        throw api_error::validation(format("A single attribute is required for a hash key EQ restriction: {}", attrs));
+    }
+    bytes raw_value = get_key_from_typed_value(attrs[0], pk_cdef);
+    partition_key pk = partition_key::from_singular_bytes(*schema, std::move(raw_value));
+    auto decorated_key = dht::decorate_key(*schema, pk);
+    return dht::partition_range(decorated_key);
+}
+
+static query::clustering_range get_clustering_range_for_begins_with(bytes&& target, const clustering_key& ck, schema_ptr schema, data_type t) {
+    auto it = boost::range::find_end(target, bytes("\xFF"), std::not_equal_to<bytes::value_type>());
+    if (it != target.end()) {
+        ++*it;
+        target.resize(std::distance(target.begin(), it) + 1);
+        clustering_key upper_limit = clustering_key::from_single_value(*schema, target);
+        return query::clustering_range::make(query::clustering_range::bound(ck), query::clustering_range::bound(upper_limit, false));
+    }
+    return query::clustering_range::make_starting_with(query::clustering_range::bound(ck));
+}
+
+static query::clustering_range calculate_ck_bound(schema_ptr schema, const column_definition& ck_cdef, const rjson::value& comp_definition, const rjson::value& attrs) {
+    auto op = get_comparison_operator(comp_definition);
+    const size_t expected_attrs_size = (op == comparison_operator_type::BETWEEN) ? 2 : 1;
+    if (attrs.Size() != expected_attrs_size) {
+        throw api_error::validation(format("{} arguments expected for a sort key restriction: {}", expected_attrs_size, attrs));
+    }
+    bytes raw_value = get_key_from_typed_value(attrs[0], ck_cdef);
+    clustering_key ck = clustering_key::from_single_value(*schema, raw_value);
+    switch (op) {
+    case comparison_operator_type::EQ:
+        return query::clustering_range(ck);
+    case comparison_operator_type::LE:
+        return query::clustering_range::make_ending_with(query::clustering_range::bound(ck));
+    case comparison_operator_type::LT:
+        return query::clustering_range::make_ending_with(query::clustering_range::bound(ck, false));
+    case comparison_operator_type::GE:
+        return query::clustering_range::make_starting_with(query::clustering_range::bound(ck));
+    case comparison_operator_type::GT:
+        return query::clustering_range::make_starting_with(query::clustering_range::bound(ck, false));
+    case comparison_operator_type::BETWEEN: {
+        bytes raw_upper_limit = get_key_from_typed_value(attrs[1], ck_cdef);
+        clustering_key upper_limit = clustering_key::from_single_value(*schema, raw_upper_limit);
+        return query::clustering_range::make(query::clustering_range::bound(ck), query::clustering_range::bound(upper_limit));
+    }
+    case comparison_operator_type::BEGINS_WITH: {
+        if (raw_value.empty()) {
+            return query::clustering_range::make_open_ended_both_sides();
+        }
+        // NOTICE(sarna): A range starting with given prefix and ending (non-inclusively) with a string "incremented" by a single
+        // character at the end. Throws for NUMBER instances.
+        if (!ck_cdef.type->is_compatible_with(*utf8_type)) {
+            throw api_error::validation(fmt::format("BEGINS_WITH operator cannot be applied to type {}", type_to_string(ck_cdef.type)));
+        }
+        return get_clustering_range_for_begins_with(std::move(raw_value), ck, schema, ck_cdef.type);
+    }
+    default:
+        throw api_error::validation(format("Operator {} not supported for sort key", comp_definition));
+    }
+}
+
+// Calculates primary key bounds from KeyConditions
+static std::pair<dht::partition_range_vector, std::vector<query::clustering_range>>
+calculate_bounds_conditions(schema_ptr schema, const rjson::value& conditions) {
+    dht::partition_range_vector partition_ranges;
+    std::vector<query::clustering_range> ck_bounds;
+
+    for (auto it = conditions.MemberBegin(); it != conditions.MemberEnd(); ++it) {
+        sstring key = rjson::to_sstring(it->name);
+        const rjson::value& condition = it->value;
+
+        const rjson::value& comp_definition = rjson::get(condition, "ComparisonOperator");
+        const rjson::value& attr_list = rjson::get(condition, "AttributeValueList");
+
+        const column_definition& pk_cdef = schema->partition_key_columns().front();
+        const column_definition* ck_cdef = schema->clustering_key_size() > 0 ? &schema->clustering_key_columns().front() : nullptr;
+        if (key == pk_cdef.name_as_text()) {
+            if (!partition_ranges.empty()) {
+                throw api_error::validation("Currently only a single restriction per key is allowed");
+            }
+            partition_ranges.push_back(calculate_pk_bound(schema, pk_cdef, comp_definition, attr_list));
+        }
+        if (ck_cdef && key == ck_cdef->name_as_text()) {
+            if (!ck_bounds.empty()) {
+                throw api_error::validation("Currently only a single restriction per key is allowed");
+            }
+            ck_bounds.push_back(calculate_ck_bound(schema, *ck_cdef, comp_definition, attr_list));
+        }
+    }
+
+    // Validate that a query's conditions must be on the hash key, and
+    // optionally also on the sort key if it exists.
+    if (partition_ranges.empty()) {
+        throw api_error::validation(format("Query missing condition on hash key '{}'", schema->partition_key_columns().front().name_as_text()));
+    }
+    if (schema->clustering_key_size() == 0) {
+        if (conditions.MemberCount() != 1) {
+            throw api_error::validation("Only one condition allowed in table with only hash key");
+        }
+    } else {
+        if (conditions.MemberCount() == 2 && ck_bounds.empty()) {
+            throw api_error::validation(format("Query missing condition on sort key '{}'", schema->clustering_key_columns().front().name_as_text()));
+        } else if (conditions.MemberCount() > 2) {
+            throw api_error::validation("Only one or two conditions allowed in table with hash key and sort key");
+        }
+    }
+
+    if (ck_bounds.empty()) {
+        ck_bounds.push_back(query::clustering_range::make_open_ended_both_sides());
+    }
+
+    return {std::move(partition_ranges), std::move(ck_bounds)};
+}
+
+// Extract the top-level column name specified in a KeyConditionExpression.
+// If a nested attribute path is given, a ValidationException is generated.
+// If the column name is a #reference to ExpressionAttributeNames, the
+// reference is resolved.
+// Note this function returns a string_view, which may refer to data in the
+// given parsed::value or expression_attribute_names.
+static std::string_view get_toplevel(const parsed::value& v,
+        const rjson::value* expression_attribute_names,
+        std::unordered_set<std::string>& used_attribute_names)
+{
+    const parsed::path& path = std::get<parsed::path>(v._value);
+    if (path.has_operators()) {
+        throw api_error::validation("KeyConditionExpression does not support nested attributes");
+    }
+    std::string_view column_name = path.root();
+    if (column_name.size() > 0 && column_name[0] == '#') {
+        used_attribute_names.emplace(column_name);
+        if (!expression_attribute_names) {
+            throw api_error::validation(
+                    fmt::format("ExpressionAttributeNames missing, entry '{}' required by KeyConditionExpression",
+                            column_name));
+        }
+        const rjson::value* value = rjson::find(*expression_attribute_names, column_name);
+        if (!value || !value->IsString()) {
+            throw api_error::validation(
+                    fmt::format("ExpressionAttributeNames missing entry '{}' required by KeyConditionExpression",
+                            column_name));
+        }
+        column_name = rjson::to_string_view(*value);
+    }
+    return column_name;
+}
+
+// Extract a constant value specified in a KeyConditionExpression.
+// This constant was originally parsed as a reference (:name) to a member of
+// ExpressionAttributeValues, but at this point, after resolve_value(), it
+// was already converted into a JSON value.
+// This function decodes the value (using its given expected type) into bytes
+// which Scylla uses as the actual key value. If the value has the wrong type,
+// or the input had other problems, a ValidationException is thrown.
+static bytes get_constant_value(const parsed::value& v,
+        const column_definition& column)
+{
+    const parsed::constant& constant = std::get<parsed::constant>(v._value);
+    const parsed::constant::literal& lit = std::get<parsed::constant::literal>(constant._value);
+    return get_key_from_typed_value(*lit, column);
+}
+
+// condition_expression_and_list extracts a list of ANDed primitive conditions
+// from a condition_expression. This is useful for KeyConditionExpression,
+// which may not use OR or NOT. If the given condition_expression does use
+// OR or NOT, this function throws a ValidationException.
+static void condition_expression_and_list(
+        const parsed::condition_expression& condition_expression,
+        std::vector<const parsed::primitive_condition*>& conditions)
+{
+    if (condition_expression._negated) {
+        throw api_error::validation("KeyConditionExpression cannot use NOT");
+    }
+    std::visit(overloaded_functor {
+        [&] (const parsed::primitive_condition& cond) {
+            conditions.push_back(&cond);
+        },
+        [&] (const parsed::condition_expression::condition_list& list) {
+            if (list.op == '|' && list.conditions.size() > 1) {
+                throw api_error::validation("KeyConditionExpression cannot use OR");
+            }
+            for (const parsed::condition_expression& cond : list.conditions) {
+                condition_expression_and_list(cond, conditions);
+            }
+        }
+    }, condition_expression._expression);
+}
+
+// Calculates primary key bounds from KeyConditionExpression
+static std::pair<dht::partition_range_vector, std::vector<query::clustering_range>>
+calculate_bounds_condition_expression(schema_ptr schema,
+        const rjson::value& expression,
+        const rjson::value* expression_attribute_values,
+        std::unordered_set<std::string>& used_attribute_values,
+        const rjson::value* expression_attribute_names,
+        std::unordered_set<std::string>& used_attribute_names,
+        parsed::expression_cache& parsed_expression_cache)
+{
+    if (!expression.IsString()) {
+        throw api_error::validation("KeyConditionExpression must be a string");
+    }
+    if (expression.GetStringLength() == 0) {
+        throw api_error::validation("KeyConditionExpression must not be empty");
+    }
+    // We parse the KeyConditionExpression with the same parser we use for
+    // ConditionExpression. But KeyConditionExpression only supports a subset
+    // of the ConditionExpression features, so we have many additional
+    // verifications below that the key condition is legal. Briefly, a valid
+    // key condition must contain a single partition key and a single
+    // sort-key range.
+    parsed::condition_expression p;
+    try {
+        p = parsed_expression_cache.parse_condition_expression(rjson::to_string_view(expression), "KeyConditionExpression");
+    } catch(expressions_syntax_error& e) {
+        throw api_error::validation(e.what());
+    }
+    resolve_condition_expression(p,
+            expression_attribute_names, expression_attribute_values,
+            used_attribute_names, used_attribute_values);
+    std::vector<const parsed::primitive_condition*> conditions;
+    condition_expression_and_list(p, conditions);
+
+    if (conditions.size() < 1 || conditions.size() > 2) {
+        throw api_error::validation(
+                "KeyConditionExpression syntax error: must have 1 or 2 conditions");
+    }
+    // Scylla allows us to have an (equality) constraint on the partition key
+    // pk_cdef, and a range constraint on the *first* clustering key ck_cdef.
+    // Note that this is also good enough for our GSI implementation - the
+    // GSI's user-specified sort key will be the first clustering key.
+    // FIXME: In the case described in issue #5320 (base and GSI both have
+    // just hash key - but different ones), this may allow the user to Query
+    // using the base key which isn't officially part of the GSI.
+    const column_definition& pk_cdef = schema->partition_key_columns().front();
+    const column_definition* ck_cdef = schema->clustering_key_size() > 0 ?
+            &schema->clustering_key_columns().front() : nullptr;
+
+    dht::partition_range_vector partition_ranges;
+    std::vector<query::clustering_range> ck_bounds;
+    for (const parsed::primitive_condition* condp : conditions) {
+        const parsed::primitive_condition& cond = *condp;
+        // In all comparison operators, one operand must be a column name,
+        // the other is a constant (value reference). We remember which is
+        // which in toplevel_ind, and also the column name in key (not just
+        // for comparison operators).
+        std::string_view key;
+        int toplevel_ind;
+        switch (cond._values.size()) {
+        case 1: {
+            // The only legal single-value condition is a begin_with() function,
+            // and it must have two parameters - a top-level attribute and a
+            // value reference..
+            const parsed::value::function_call *f = std::get_if<parsed::value::function_call>(&cond._values[0]._value);
+            if (!f) {
+                throw api_error::validation("KeyConditionExpression cannot be just a value");
+            }
+            if (f->_function_name != "begins_with") {
+                throw api_error::validation(
+                        fmt::format("KeyConditionExpression function '{}' not supported",f->_function_name));
+            }
+            if (f->_parameters.size() != 2 || !f->_parameters[0].is_path() ||
+                    !f->_parameters[1].is_constant()) {
+                throw api_error::validation(
+                        "KeyConditionExpression begins_with() takes attribute and value");
+            }
+            key = get_toplevel(f->_parameters[0], expression_attribute_names, used_attribute_names);
+            toplevel_ind = -1;
+            break;
+        }
+        case 2:
+            if (cond._values[0].is_path() && cond._values[1].is_constant()) {
+                toplevel_ind = 0;
+            } else if (cond._values[1].is_path() && cond._values[0].is_constant()) {
+                toplevel_ind = 1;
+            } else {
+                throw api_error::validation("KeyConditionExpression must compare attribute with constant");
+            }
+            key = get_toplevel(cond._values[toplevel_ind],  expression_attribute_names, used_attribute_names);
+            break;
+        case 3:
+            // Only BETWEEN has three operands. First must be a column name,
+            // two other must be value references (constants):
+            if (cond._op != parsed::primitive_condition::type::BETWEEN) {
+                // Shouldn't happen unless we have a bug in the parser
+                throw std::logic_error(format("Wrong number of values {} in primitive_condition", cond._values.size()));
+            }
+            if (cond._values[0].is_path() && cond._values[1].is_constant() && cond._values[2].is_constant()) {
+                toplevel_ind = 0;
+                key = get_toplevel(cond._values[0], expression_attribute_names, used_attribute_names);
+            } else {
+                throw api_error::validation("KeyConditionExpression must compare attribute with constants");
+            }
+            break;
+        default:
+            // Shouldn't happen unless we have a bug in the parser
+            throw std::logic_error(format("Wrong number of values {} in primitive_condition", cond._values.size()));
+        }
+        if (cond._op == parsed::primitive_condition::type::IN) {
+            throw api_error::validation("KeyConditionExpression does not support IN operator");
+        } else if (cond._op == parsed::primitive_condition::type::NE) {
+            throw api_error::validation("KeyConditionExpression does not support NE operator");
+        } else if (cond._op == parsed::primitive_condition::type::EQ) {
+            // the EQ operator (=) is the only one which can be used for both
+            // the partition key and sort key:
+            if (sstring(key) == pk_cdef.name_as_text()) {
+                if (!partition_ranges.empty()) {
+                    throw api_error::validation(
+                            "KeyConditionExpression allows only one condition for each key");
+                }
+                bytes raw_value = get_constant_value(cond._values[!toplevel_ind], pk_cdef);
+                partition_key pk = partition_key::from_singular_bytes(*schema, std::move(raw_value));
+                auto decorated_key = dht::decorate_key(*schema, pk);
+                partition_ranges.push_back(dht::partition_range(decorated_key));
+            } else if (ck_cdef && sstring(key) == ck_cdef->name_as_text()) {
+                if (!ck_bounds.empty()) {
+                    throw api_error::validation(
+                            "KeyConditionExpression allows only one condition for each key");
+                }
+                bytes raw_value = get_constant_value(cond._values[!toplevel_ind], *ck_cdef);
+                clustering_key ck = clustering_key::from_single_value(*schema, raw_value);
+                ck_bounds.push_back(query::clustering_range(ck));
+            } else {
+                throw api_error::validation(
+                        fmt::format("KeyConditionExpression condition on non-key attribute {}", key));
+            }
+            continue;
+        }
+        // If we're still here, it's any other operator besides EQ, and these
+        // are allowed *only* on the clustering key:
+        if (sstring(key) == pk_cdef.name_as_text()) {
+            throw api_error::validation(
+                    fmt::format("KeyConditionExpression only '=' condition is supported on partition key {}", key));
+        } else if (!ck_cdef || sstring(key) != ck_cdef->name_as_text()) {
+            throw api_error::validation(
+                    fmt::format("KeyConditionExpression condition on non-key attribute {}", key));
+        }
+        if (!ck_bounds.empty()) {
+            throw api_error::validation(
+                    "KeyConditionExpression allows only one condition for each key");
+        }
+        if (cond._op == parsed::primitive_condition::type::BETWEEN) {
+            clustering_key ck1 = clustering_key::from_single_value(*schema,
+                    get_constant_value(cond._values[1], *ck_cdef));
+            clustering_key ck2 = clustering_key::from_single_value(*schema,
+                    get_constant_value(cond._values[2], *ck_cdef));
+            ck_bounds.push_back(query::clustering_range::make(
+                    query::clustering_range::bound(ck1), query::clustering_range::bound(ck2)));
+            continue;
+        } else if (cond._values.size() == 1) {
+            // We already verified above, that this case this can only be a
+            // function call to begins_with(), with the first parameter the
+            // key, the second the value reference.
+            bytes raw_value = get_constant_value(
+                    std::get<parsed::value::function_call>(cond._values[0]._value)._parameters[1], *ck_cdef);
+            if (!ck_cdef->type->is_compatible_with(*utf8_type)) {
+                // begins_with() supported on bytes and strings (both stored
+                // in the database as strings) but not on numbers.
+                throw api_error::validation(
+                        fmt::format("KeyConditionExpression begins_with() not supported on type {}",
+                                type_to_string(ck_cdef->type)));
+            } else if (raw_value.empty()) {
+                ck_bounds.push_back(query::clustering_range::make_open_ended_both_sides());
+            } else {
+                clustering_key ck = clustering_key::from_single_value(*schema, raw_value);
+                ck_bounds.push_back(get_clustering_range_for_begins_with(std::move(raw_value), ck, schema, ck_cdef->type));
+            }
+            continue;
+        }
+
+        // All remaining operator have one value reference parameter in index
+        // !toplevel_ind. Note how toplevel_ind==1 reverses the direction of
+        // an inequality.
+        bytes raw_value = get_constant_value(cond._values[!toplevel_ind], *ck_cdef);
+        clustering_key ck = clustering_key::from_single_value(*schema, raw_value);
+        if ((cond._op == parsed::primitive_condition::type::LT && toplevel_ind == 0) ||
+            (cond._op == parsed::primitive_condition::type::GT && toplevel_ind == 1)) {
+            ck_bounds.push_back(query::clustering_range::make_ending_with(query::clustering_range::bound(ck, false)));
+        } else if ((cond._op == parsed::primitive_condition::type::GT && toplevel_ind == 0) ||
+                   (cond._op == parsed::primitive_condition::type::LT && toplevel_ind == 1)) {
+            ck_bounds.push_back(query::clustering_range::make_starting_with(query::clustering_range::bound(ck, false)));
+        } else if ((cond._op == parsed::primitive_condition::type::LE && toplevel_ind == 0) ||
+                   (cond._op == parsed::primitive_condition::type::GE && toplevel_ind == 1)) {
+            ck_bounds.push_back(query::clustering_range::make_ending_with(query::clustering_range::bound(ck)));
+        } else if ((cond._op == parsed::primitive_condition::type::GE && toplevel_ind == 0) ||
+                   (cond._op == parsed::primitive_condition::type::LE && toplevel_ind == 1)) {
+            ck_bounds.push_back(query::clustering_range::make_starting_with(query::clustering_range::bound(ck)));
+        }
+    }
+
+    if (partition_ranges.empty()) {
+        throw api_error::validation(
+                format("KeyConditionExpression requires a condition on partition key {}", pk_cdef.name_as_text()));
+    }
+    if (ck_bounds.empty()) {
+        ck_bounds.push_back(query::clustering_range::make_open_ended_both_sides());
+    }
+    return {std::move(partition_ranges), std::move(ck_bounds)};
+}
+
+static future<executor::request_return_type> query_vector(
+        service::storage_proxy& proxy,
+        vector_search::vector_store_client& vsc,
+        rjson::value request,
+        service::client_state& client_state,
+        tracing::trace_state_ptr trace_state,
+        service_permit permit,
+        bool enforce_authorization,
+        bool warn_authorization,
+        alternator::stats& stats,
+        parsed::expression_cache& parsed_expr_cache) {
+    // If vector search is requested, IndexName must be given and must
+    // refer to a vector index - not to a GSI or LSI.
+    const rjson::value* index_name_v = rjson::find(request, "IndexName");
+    if (!index_name_v || !index_name_v->IsString()) {
+        co_return api_error::validation(
+            "VectorSearch requires IndexName referring to a vector index");
+    }
+    std::string_view index_name = rjson::to_string_view(*index_name_v);
+    schema_ptr base_schema = get_table(proxy, request);
+    bool is_vector = std::ranges::any_of(base_schema->indices(), [&](const index_metadata& im) {
+        const auto& opts = im.options();
+        auto it = opts.find(db::index::secondary_index::custom_class_option_name);
+        return im.name() == index_name && it != opts.end() && it->second == "vector_index";
+    });
+    if (!is_vector) {
+        co_return api_error::validation(
+            format("VectorSearch IndexName '{}' is not a vector index.", index_name));
+    }
+    // QueryVector is required inside VectorSearch.
+    const rjson::value* vector_search = rjson::find(request, "VectorSearch");
+    if (!vector_search || !vector_search->IsObject()) {
+        co_return api_error::validation(
+            "VectorSearch requires a VectorSearch parameter");
+    }
+    const rjson::value* query_vector = rjson::find(*vector_search, "QueryVector");
+    if (!query_vector || !query_vector->IsObject()) {
+        co_return api_error::validation(
+            "VectorSearch requires a QueryVector parameter");
+    }
+    // QueryVector should be is a DynamoDB value, which must be of type "L"
+    // (a list), containing only elements of type "N" (numbers). The number
+    // of these elements must be exactly the "dimensions" defined for this
+    // vector index. We'll now validate all these assumptions and parse
+    // all the numbers in the vector into an std::vector<float> query_vec -
+    // the type that ann() wants.
+    int dimensions = 0;
+    for (const index_metadata& im : base_schema->indices()) {
+        if (im.name() == index_name) {
+            auto dims_it = im.options().find("dimensions");
+            if (dims_it != im.options().end()) {
+                try {
+                    dimensions = std::stoi(dims_it->second);
+                } catch (...) {}
+            }
+            break;
+        }
+    }
+    throwing_assert(dimensions > 0);
+    const rjson::value* qv_list = rjson::find(*query_vector, "L");
+    if (!qv_list || !qv_list->IsArray()) {
+        co_return api_error::validation(
+            "VectorSearch QueryVector must be a list of numbers");
+    }
+    const auto& arr = qv_list->GetArray();
+    if ((int)arr.Size() != dimensions) {
+        co_return api_error::validation(
+            format("VectorSearch QueryVector length {} does not match index Dimensions {}",
+                arr.Size(), dimensions));
+    }
+    std::vector<float> query_vec;
+    query_vec.reserve(arr.Size());
+    for (const rjson::value& elem : arr) {
+        if (!elem.IsObject()) {
+            co_return api_error::validation(
+                "VectorSearch QueryVector must contain only numbers");
+        }
+        const rjson::value* n_val = rjson::find(elem, "N");
+        if (!n_val || !n_val->IsString()) {
+            co_return api_error::validation(
+                "VectorSearch QueryVector must contain only numbers");
+        }
+        std::string_view num_str = rjson::to_string_view(*n_val);
+        float f;
+        auto [ptr, ec] = std::from_chars(num_str.data(), num_str.data() + num_str.size(), f);
+        if (ec != std::errc{} || ptr != num_str.data() + num_str.size()) {
+            co_return api_error::validation(
+                format("VectorSearch QueryVector element '{}' is not a valid number", num_str));
+        }
+        query_vec.push_back(f);
+    }
+
+    // Limit is mandatory for vector search: it defines k, the number of
+    // nearest neighbors to return.
+    const rjson::value* limit_json = rjson::find(request, "Limit");
+    if (!limit_json || !limit_json->IsUint()) {
+        co_return api_error::validation("VectorSearch requires a positive integer Limit parameter");
+    }
+    uint32_t limit = limit_json->GetUint();
+    if (limit == 0) {
+        co_return api_error::validation("Limit must be greater than 0");
+    }
+
+    // Consistent reads are not supported for vector search, just like GSI.
+    if (get_read_consistency(request) != db::consistency_level::LOCAL_ONE) {
+        co_return api_error::validation(
+            "Consistent reads are not allowed on vector indexes");
+    }
+
+    // Pagination (ExclusiveStartKey) is not supported for vector search.
+    if (rjson::find(request, "ExclusiveStartKey")) {
+        co_return api_error::validation(
+            "VectorSearch does not support pagination (ExclusiveStartKey)");
+    }
+
+    // ScanIndexForward is not supported for vector search: the ordering of
+    // results is determined by vector distance, not by the sort key.
+    if (rjson::find(request, "ScanIndexForward")) {
+        co_return api_error::validation(
+            "VectorSearch does not support ScanIndexForward");
+    }
+
+    std::unordered_set<std::string> used_attribute_names;
+    std::unordered_set<std::string> used_attribute_values;
+    // Parse the Select parameter and determine which attributes to return.
+    // For a vector index, the default Select is ALL_ATTRIBUTES (full items).
+    // ALL_PROJECTED_ATTRIBUTES is significantly more efficent because it
+    // returns what the vector store returned without looking up additional
+    // base-table data. Currently only the primary key attributes are projected
+    // but in the future we'll implement projecting additional attributes into
+    // the vector index - these additional attributes will also be usable for
+    // filtering). COUNT returns only the count without items.
+    select_type select = parse_select(request, table_or_view_type::vector_index);
+    std::optional<alternator::attrs_to_get> attrs_to_get_opt;
+    if (select == select_type::projection) {
+        // ALL_PROJECTED_ATTRIBUTES for a vector index: return only key attributes.
+        alternator::attrs_to_get key_attrs;
+        for (const column_definition& cdef : base_schema->partition_key_columns()) {
+            attribute_path_map_add("Select", key_attrs, cdef.name_as_text());
+        }
+        for (const column_definition& cdef : base_schema->clustering_key_columns()) {
+            attribute_path_map_add("Select", key_attrs, cdef.name_as_text());
+        }
+        attrs_to_get_opt = std::move(key_attrs);
+    } else {
+        attrs_to_get_opt = calculate_attrs_to_get(request, parsed_expr_cache, used_attribute_names, select);
+    }
+    // QueryFilter (the old-style API) is not supported for vector search Queries.
+    if (rjson::find(request, "QueryFilter")) {
+        co_return api_error::validation(
+            "VectorSearch does not support QueryFilter; use FilterExpression instead");
+    }
+    // FilterExpression: post-filter the vector search results by any attribute.
+    filter flt(parsed_expr_cache, request, filter::request_type::QUERY,
+               used_attribute_names, used_attribute_values);
+    const rjson::value* expression_attribute_names = rjson::find(request, "ExpressionAttributeNames");
+    verify_all_are_used(expression_attribute_names, used_attribute_names, "ExpressionAttributeNames", "Query");
+    const rjson::value* expression_attribute_values = rjson::find(request, "ExpressionAttributeValues");
+    verify_all_are_used(expression_attribute_values, used_attribute_values, "ExpressionAttributeValues", "Query");
+
+    // Verify the user has SELECT permission on the base table, as we
+    // do for every type of read operation after validating the input
+    // parameters.
+    co_await verify_permission(enforce_authorization, warn_authorization,
+            client_state, base_schema, auth::permission::SELECT, stats);
+
+    // Query the vector store for the approximate nearest neighbors.
+    auto timeout = executor::default_timeout();
+    abort_on_expiry aoe(timeout);
+    rjson::value pre_filter = rjson::empty_object(); // TODO, implement
+    auto pkeys_result = co_await vsc.ann(
+            base_schema->ks_name(), std::string(index_name), base_schema,
+            std::move(query_vec), limit, pre_filter, aoe.abort_source());
+    if (!pkeys_result.has_value()) {
+        const sstring error_msg = std::visit(vector_search::error_visitor{}, pkeys_result.error());
+        co_return api_error::validation(error_msg);
+    }
+    const std::vector<vector_search::primary_key>& pkeys = pkeys_result.value();
+
+    // For SELECT=COUNT with no filter: skip fetching from the base table and
+    // just return the count of candidates returned by the vector store.
+    // If a filter is present, fall through to the base-table fetch to apply it.
+    if (select == select_type::count && !flt) {
+        rjson::value response = rjson::empty_object();
+        rjson::add(response, "Count", rjson::value(static_cast<int>(pkeys.size())));
+        rjson::add(response, "ScannedCount", rjson::value(static_cast<int>(pkeys.size())));
+        co_return rjson::print(std::move(response));
+    }
+
+    // For SELECT=ALL_PROJECTED_ATTRIBUTES with no filter: skip fetching from
+    // the base table and build items directly from the key columns returned by
+    // the vector store. If a filter is present, fall through to the base-table
+    // fetch to apply it.
+    if (select == select_type::projection && !flt) {
+        rjson::value items_json = rjson::empty_array();
+        for (const auto& pkey : pkeys) {
+            rjson::value item = rjson::empty_object();
+            std::vector<bytes> exploded_pk = pkey.partition.key().explode();
+            auto exploded_pk_it = exploded_pk.begin();
+            for (const column_definition& cdef : base_schema->partition_key_columns()) {
+                rjson::value key_val = rjson::empty_object();
+                rjson::add_with_string_name(key_val, type_to_string(cdef.type), json_key_column_value(*exploded_pk_it, cdef));
+                rjson::add_with_string_name(item, std::string_view(cdef.name_as_text()), std::move(key_val));
+                ++exploded_pk_it;
+            }
+            if (base_schema->clustering_key_size() > 0) {
+                std::vector<bytes> exploded_ck = pkey.clustering.explode();
+                auto exploded_ck_it = exploded_ck.begin();
+                for (const column_definition& cdef : base_schema->clustering_key_columns()) {
+                    rjson::value key_val = rjson::empty_object();
+                    rjson::add_with_string_name(key_val, type_to_string(cdef.type), json_key_column_value(*exploded_ck_it, cdef));
+                    rjson::add_with_string_name(item, std::string_view(cdef.name_as_text()), std::move(key_val));
+                    ++exploded_ck_it;
+                }
+            }
+            rjson::push_back(items_json, std::move(item));
+        }
+        rjson::value response = rjson::empty_object();
+        rjson::add(response, "Count", rjson::value(static_cast<int>(items_json.Size())));
+        rjson::add(response, "ScannedCount", rjson::value(static_cast<int>(pkeys.size())));
+        rjson::add(response, "Items", std::move(items_json));
+        co_return rjson::print(std::move(response));
+    }
+
+    // TODO: For SELECT=SPECIFIC_ATTRIBUTES, if they are part of the projected
+    // attributes, we should use the above optimized code path - not fall through
+    // to the read from the base table as below as we need to do if the specific
+    // attributes contain non-projected columns.
+
+    // Fetch the matching items from the base table and build the response.
+    // When a filter is present, we always fetch the full item so that all
+    // attributes are available for filter evaluation, regardless of the
+    // projection required for the final response.
+    auto selection = cql3::selection::selection::wildcard(base_schema);
+    auto regular_columns = base_schema->regular_columns()
+            | std::views::transform(&column_definition::id)
+            | std::ranges::to<query::column_id_vector>();
+    auto attrs_to_get = ::make_shared<const std::optional<alternator::attrs_to_get>>(
+        flt ? std::nullopt : std::move(attrs_to_get_opt));
+
+    rjson::value items_json = rjson::empty_array();
+    int matched_count = 0;
+
+    if (base_schema->clustering_key_size() == 0) {
+        // Hash-only table: query each partition individually, in the order
+        // returned by the vector store, to preserve vector-distance ordering
+        // in the response. A multi-partition batch read would return items in
+        // token order instead, which would be wrong.
+        // FIXME: do this more efficiently with a batched read that preserves
+        // ordering.
+        for (const auto& pkey : pkeys) {
+            std::vector<query::clustering_range> bounds{
+                    query::clustering_range::make_open_ended_both_sides()};
+            auto partition_slice = query::partition_slice(std::move(bounds), {},
+                    regular_columns, selection->get_query_options());
+            auto command = ::make_lw_shared<query::read_command>(
+                    base_schema->id(), base_schema->version(), partition_slice,
+                    proxy.get_max_result_size(partition_slice),
+                    query::tombstone_limit(proxy.get_tombstone_limit()));
+            service::storage_proxy::coordinator_query_result qr =
+                    co_await proxy.query(base_schema, command,
+                            {dht::partition_range(pkey.partition)},
+                            db::consistency_level::LOCAL_ONE,
+                            service::storage_proxy::coordinator_query_options(
+                                    timeout, permit, client_state, trace_state));
+            auto opt_item = executor::describe_single_item(base_schema, partition_slice,
+                    *selection, *qr.query_result, *attrs_to_get);
+            if (opt_item && (!flt || flt.check(*opt_item))) {
+                ++matched_count;
+                if (select != select_type::count) {
+                    if (select == select_type::projection) {
+                        // A filter caused us to fall through here instead of
+                        // taking the projection early-exit above. Reconstruct
+                        // the key-only item from the full item we fetched.
+                        rjson::value key_item = rjson::empty_object();
+                        for (const column_definition& cdef : base_schema->partition_key_columns()) {
+                            if (const rjson::value* v = rjson::find(*opt_item, cdef.name_as_text())) {
+                                rjson::add_with_string_name(key_item, cdef.name_as_text(), rjson::copy(*v));
+                            }
+                        }
+                        rjson::push_back(items_json, std::move(key_item));
+                    } else {
+                        // When a filter caused us to fetch the full item, apply the
+                        // requested projection (attrs_to_get_opt) before returning it.
+                        // This mirrors describe_items_visitor::end_row() which removes
+                        // extra filter attributes from the returned item.
+                        if (flt && attrs_to_get_opt) {
+                            for (const auto& [attr_name, subpath] : *attrs_to_get_opt) {
+                                if (!subpath.has_value()) {
+                                    if (rjson::value* toplevel = rjson::find(*opt_item, attr_name)) {
+                                        if (!hierarchy_filter(*toplevel, subpath)) {
+                                            rjson::remove_member(*opt_item, attr_name);
+                                        }
+                                    }
+                                }
+                            }
+                            std::vector<std::string> to_remove;
+                            for (auto it = opt_item->MemberBegin(); it != opt_item->MemberEnd(); ++it) {
+                                std::string key(it->name.GetString(), it->name.GetStringLength());
+                                if (!attrs_to_get_opt->contains(key)) {
+                                    to_remove.push_back(std::move(key));
+                                }
+                            }
+                            for (const auto& key : to_remove) {
+                                rjson::remove_member(*opt_item, key);
+                            }
+                        }
+                        rjson::push_back(items_json, std::move(*opt_item));
+                    }
+                }
+            }
+        }
+    } else {
+        // Hash+range table: query each (partition, clustering) pair individually.
+        // FIXME: do this more efficiently!!!
+        for (const auto& pkey : pkeys) {
+            std::vector<query::clustering_range> bounds{
+                    query::clustering_range::make_singular(pkey.clustering)};
+            auto partition_slice = query::partition_slice(std::move(bounds), {},
+                    regular_columns, selection->get_query_options());
+            auto command = ::make_lw_shared<query::read_command>(
+                    base_schema->id(), base_schema->version(), partition_slice,
+                    proxy.get_max_result_size(partition_slice),
+                    query::tombstone_limit(proxy.get_tombstone_limit()));
+            service::storage_proxy::coordinator_query_result qr =
+                    co_await proxy.query(base_schema, command,
+                            {dht::partition_range(pkey.partition)},
+                            db::consistency_level::LOCAL_ONE,
+                            service::storage_proxy::coordinator_query_options(
+                                    timeout, permit, client_state, trace_state));
+            auto opt_item = executor::describe_single_item(base_schema, partition_slice,
+                    *selection, *qr.query_result, *attrs_to_get);
+            if (opt_item && (!flt || flt.check(*opt_item))) {
+                ++matched_count;
+                if (select != select_type::count) {
+                    if (select == select_type::projection) {
+                        // A filter caused us to fall through here; project to keys.
+                        rjson::value key_item = rjson::empty_object();
+                        for (const column_definition& cdef : base_schema->partition_key_columns()) {
+                            if (const rjson::value* v = rjson::find(*opt_item, cdef.name_as_text())) {
+                                rjson::add_with_string_name(key_item, cdef.name_as_text(), rjson::copy(*v));
+                            }
+                        }
+                        for (const column_definition& cdef : base_schema->clustering_key_columns()) {
+                            if (const rjson::value* v = rjson::find(*opt_item, cdef.name_as_text())) {
+                                rjson::add_with_string_name(key_item, cdef.name_as_text(), rjson::copy(*v));
+                            }
+                        }
+                        rjson::push_back(items_json, std::move(key_item));
+                    } else {
+                        // When a filter caused us to fetch the full item, apply the
+                        // requested projection (attrs_to_get_opt) before returning it.
+                        // This mirrors describe_items_visitor::end_row() which removes
+                        // extra filter attributes from the returned item.
+                        if (flt && attrs_to_get_opt) {
+                            for (const auto& [attr_name, subpath] : *attrs_to_get_opt) {
+                                if (!subpath.has_value()) {
+                                    if (rjson::value* toplevel = rjson::find(*opt_item, attr_name)) {
+                                        if (!hierarchy_filter(*toplevel, subpath)) {
+                                            rjson::remove_member(*opt_item, attr_name);
+                                        }
+                                    }
+                                }
+                            }
+                            std::vector<std::string> to_remove;
+                            for (auto it = opt_item->MemberBegin(); it != opt_item->MemberEnd(); ++it) {
+                                std::string key(it->name.GetString(), it->name.GetStringLength());
+                                if (!attrs_to_get_opt->contains(key)) {
+                                    to_remove.push_back(std::move(key));
+                                }
+                            }
+                            for (const auto& key : to_remove) {
+                                rjson::remove_member(*opt_item, key);
+                            }
+                        }
+                        rjson::push_back(items_json, std::move(*opt_item));
+                    }
+                }
+            }
+        }
+    }
+
+    rjson::value response = rjson::empty_object();
+    if (select == select_type::count) {
+        rjson::add(response, "Count", rjson::value(matched_count));
+    } else {
+        rjson::add(response, "Count", rjson::value(static_cast<int>(items_json.Size())));
+        rjson::add(response, "Items", std::move(items_json));
+    }
+    rjson::add(response, "ScannedCount", rjson::value(static_cast<int>(pkeys.size())));
+    co_return rjson::print(std::move(response));
+}
+
+future<executor::request_return_type> executor::query(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+    _stats.api_operations.query++;
+    elogger.trace("Querying {}", request);
+
+    if (rjson::find(request, "VectorSearch")) {
+        // If vector search is requested, we have a separate code path.
+        // IndexName must be given and must refer to a vector index - not
+        // to a GSI or LSI as the code below assumes.
+        return query_vector(_proxy, _vsc, std::move(request), client_state, trace_state, std::move(permit),
+                _enforce_authorization, _warn_authorization, _stats, *_parsed_expression_cache);
+    }
+
+    auto [schema, table_type] = get_table_or_view(_proxy, request);
+    db::consistency_level cl = get_read_consistency(request);
+    maybe_audit(audit_info, audit::statement_category::QUERY, schema->ks_name(), schema->cf_name(), "Query", request, cl);
+
+    get_stats_from_schema(_proxy, *schema)->api_operations.query++;
+    tracing::add_alternator_table_name(trace_state, schema->cf_name());
+
+    rjson::value* exclusive_start_key = rjson::find(request, "ExclusiveStartKey");
+    if (table_type == table_or_view_type::gsi && cl != db::consistency_level::LOCAL_ONE) {
+        return make_ready_future<request_return_type>(api_error::validation(
+                "Consistent reads are not allowed on global indexes (GSI)"));
+    }
+    rjson::value* limit_json = rjson::find(request, "Limit");
+    uint32_t limit = limit_json ? limit_json->GetUint64() : std::numeric_limits<uint32_t>::max();
+    if (limit <= 0) {
+        return make_ready_future<request_return_type>(api_error::validation("Limit must be greater than 0"));
+    }
+
+    const bool forward = get_bool_attribute(request, "ScanIndexForward", true);
+
+    rjson::value* key_conditions = rjson::find(request, "KeyConditions");
+    rjson::value* key_condition_expression = rjson::find(request, "KeyConditionExpression");
+    std::unordered_set<std::string> used_attribute_values;
+    std::unordered_set<std::string> used_attribute_names;
+    if (key_conditions && key_condition_expression) {
+        throw api_error::validation("Query does not allow both "
+                "KeyConditions and KeyConditionExpression to be given together");
+    } else if (!key_conditions && !key_condition_expression) {
+        throw api_error::validation("Query must have one of "
+                "KeyConditions or KeyConditionExpression");
+    }
+
+    const rjson::value* expression_attribute_names = rjson::find(request, "ExpressionAttributeNames");
+    const rjson::value* expression_attribute_values = rjson::find(request, "ExpressionAttributeValues");
+
+    // exactly one of key_conditions or key_condition_expression
+    auto [partition_ranges, ck_bounds] = key_conditions
+                ? calculate_bounds_conditions(schema, *key_conditions)
+                : calculate_bounds_condition_expression(schema, *key_condition_expression,
+                        expression_attribute_values,
+                        used_attribute_values,
+                        expression_attribute_names,
+                        used_attribute_names, *_parsed_expression_cache);
+
+    filter filter(*_parsed_expression_cache, request, filter::request_type::QUERY,
+            used_attribute_names, used_attribute_values);
+
+    // A query is not allowed to filter on the partition key or the sort key.
+    for (const column_definition& cdef : schema->partition_key_columns()) { // just one
+        if (filter.filters_on(cdef.name_as_text())) {
+            return make_ready_future<request_return_type>(api_error::validation(
+                    format("QueryFilter can only contain non-primary key attributes: Partition key attribute: {}", cdef.name_as_text())));
+        }
+    }
+    for (const column_definition& cdef : schema->clustering_key_columns()) {
+        if (filter.filters_on(cdef.name_as_text())) {
+            return make_ready_future<request_return_type>(api_error::validation(
+                    format("QueryFilter can only contain non-primary key attributes: Sort key attribute: {}", cdef.name_as_text())));
+        }
+        // FIXME: this "break" can avoid listing some clustering key columns
+        // we added for GSIs just because they existed in the base table -
+        // but not in all cases. We still have issue #5320.
+        break;
+    }
+
+    select_type select = parse_select(request, table_type);
+
+    auto attrs_to_get = calculate_attrs_to_get(request, *_parsed_expression_cache, used_attribute_names, select);
+    verify_all_are_used(expression_attribute_names, used_attribute_names, "ExpressionAttributeNames", "Query");
+    verify_all_are_used(expression_attribute_values, used_attribute_values, "ExpressionAttributeValues", "Query");
+    query::partition_slice::option_set opts;
+    opts.set_if<query::partition_slice::option::reversed>(!forward);
+    return do_query(_proxy, schema, exclusive_start_key, std::move(partition_ranges), std::move(ck_bounds), std::move(attrs_to_get), limit, cl,
+            std::move(filter), opts, client_state, _stats, std::move(trace_state), std::move(permit), _enforce_authorization, _warn_authorization);
+}
+
+future<std::vector<rjson::value>> executor::describe_multi_item(schema_ptr schema,
+        const query::partition_slice&& slice,
+        shared_ptr<cql3::selection::selection> selection,
+        foreign_ptr<lw_shared_ptr<query::result>> query_result,
+        shared_ptr<const std::optional<attrs_to_get>> attrs_to_get,
+        noncopyable_function<void(uint64_t)> item_callback) {
+    cql3::selection::result_set_builder builder(*selection, gc_clock::now());
+    query::result_view::consume(*query_result, slice, cql3::selection::result_set_builder::visitor(builder, *schema, *selection));
+    auto result_set = builder.build();
+    std::vector<rjson::value> ret;
+    for (auto& result_row : result_set->rows()) {
+        rjson::value item = rjson::empty_object();
+        uint64_t item_length_in_bytes = 0;
+        describe_single_item(*selection, result_row, *attrs_to_get, item, &item_length_in_bytes);
+        if (item_callback) {
+            item_callback(item_length_in_bytes);
+        }
+        ret.push_back(std::move(item));
+        co_await coroutine::maybe_yield();
+    }
+    co_return ret;
+}
+
+// describe_item() wraps the result of describe_single_item() by a map
+// as needed by the GetItem request. It should not be used for other purposes,
+// use describe_single_item() instead.
+static rjson::value describe_item(schema_ptr schema,
+        const query::partition_slice& slice,
+        const cql3::selection::selection& selection,
+        const query::result& query_result,
+        const std::optional<attrs_to_get>& attrs_to_get,
+        consumed_capacity_counter& consumed_capacity,
+        uint64_t& metric) {
+    std::optional<rjson::value> opt_item = executor::describe_single_item(std::move(schema), slice, selection, std::move(query_result), attrs_to_get, &consumed_capacity._total_bytes);
+    rjson::value item_descr = rjson::empty_object();
+    if (opt_item) {
+        rjson::add(item_descr, "Item", std::move(*opt_item));
+    }
+    consumed_capacity.add_consumed_capacity_to_response_if_needed(item_descr);
+    metric += consumed_capacity.get_half_units();
+    return item_descr;
+}
+
+future<executor::request_return_type> executor::get_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+    _stats.api_operations.get_item++;
+    auto start_time = std::chrono::steady_clock::now();
+    elogger.trace("Getting item {}", request);
+
+    schema_ptr schema = get_table(_proxy, request);
+    lw_shared_ptr<stats> per_table_stats = get_stats_from_schema(_proxy, *schema);
+    per_table_stats->api_operations.get_item++;
+    tracing::add_alternator_table_name(trace_state, schema->cf_name());
+
+    rjson::value& query_key = request["Key"];
+    db::consistency_level cl = get_read_consistency(request);
+
+    maybe_audit(audit_info, audit::statement_category::QUERY, schema->ks_name(), schema->cf_name(), "GetItem", request, cl);
+
+    partition_key pk = pk_from_json(query_key, schema);
+    dht::partition_range_vector partition_ranges{dht::partition_range(dht::decorate_key(*schema, pk))};
+
+    std::vector<query::clustering_range> bounds;
+    if (schema->clustering_key_size() == 0) {
+        bounds.push_back(query::clustering_range::make_open_ended_both_sides());
+    } else {
+        clustering_key ck = ck_from_json(query_key, schema);
+        bounds.push_back(query::clustering_range::make_singular(std::move(ck)));
+    }
+    check_key(query_key, schema);
+
+    //TODO(sarna): It would be better to fetch only some attributes of the map, not all
+    auto regular_columns =
+            schema->regular_columns() | std::views::transform(&column_definition::id)
+            | std::ranges::to<query::column_id_vector>();
+
+    auto selection = cql3::selection::selection::wildcard(schema);
+
+    auto partition_slice = query::partition_slice(std::move(bounds), {}, std::move(regular_columns), selection->get_query_options());
+    auto command = ::make_lw_shared<query::read_command>(schema->id(), schema->version(), partition_slice, _proxy.get_max_result_size(partition_slice),
+            query::tombstone_limit(_proxy.get_tombstone_limit()));
+
+    std::unordered_set<std::string> used_attribute_names;
+    auto attrs_to_get = calculate_attrs_to_get(request, *_parsed_expression_cache, used_attribute_names);
+    const rjson::value* expression_attribute_names = rjson::find(request, "ExpressionAttributeNames");
+    verify_all_are_used(expression_attribute_names, used_attribute_names, "ExpressionAttributeNames", "GetItem");
+    rcu_consumed_capacity_counter add_capacity(request, cl == db::consistency_level::LOCAL_QUORUM);
+    co_await verify_permission(_enforce_authorization, _warn_authorization, client_state, schema, auth::permission::SELECT, _stats);
+    service::storage_proxy::coordinator_query_result qr =
+        co_await _proxy.query(
+            schema, std::move(command), std::move(partition_ranges), cl,
+            service::storage_proxy::coordinator_query_options(executor::default_timeout(), std::move(permit), client_state, trace_state));
+    per_table_stats->api_operations.get_item_latency.mark(std::chrono::steady_clock::now() - start_time);
+    _stats.api_operations.get_item_latency.mark(std::chrono::steady_clock::now() - start_time);
+    uint64_t rcu_half_units = 0;
+    rjson::value res = describe_item(schema, partition_slice, *selection, *qr.query_result, std::move(attrs_to_get), add_capacity, rcu_half_units);
+    per_table_stats->rcu_half_units_total += rcu_half_units;
+    _stats.rcu_half_units_total += rcu_half_units;
+    // Update item size metrics only if we found an item.
+    if (qr.query_result->row_count().value_or(0) > 0) {
+        per_table_stats->operation_sizes.get_item_op_size_kb.add(bytes_to_kb_ceil(add_capacity._total_bytes));
+    }
+    co_return rjson::print(std::move(res));
+}
+
+future<executor::request_return_type> executor::batch_get_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
+    // FIXME: In this implementation, an unbounded batch size can cause
+    // unbounded response JSON object to be buffered in memory, unbounded
+    // parallelism of the requests, and unbounded amount of non-preemptable
+    // work in the following loops. So we should limit the batch size, and/or
+    // the response size, as DynamoDB does.
+    _stats.api_operations.batch_get_item++;
+    rjson::value& request_items = request["RequestItems"];
+    auto start_time = std::chrono::steady_clock::now();
+    // We need to validate all the parameters before starting any asynchronous
+    // query, and fail the entire request on any parse error. So we parse all
+    // the input into our own vector "requests", each element a table_requests
+    // listing all the request aimed at a single table. For efficiency, inside
+    // each table_requests we further group together all reads going to the
+    // same partition, so we can later send them together.
+    bool should_add_rcu = rcu_consumed_capacity_counter::should_add_capacity(request);
+    struct table_requests {
+        schema_ptr schema;
+        db::consistency_level cl;
+        ::shared_ptr<const std::optional<alternator::attrs_to_get>> attrs_to_get;
+        // clustering_keys keeps a sorted set of clustering keys. It must
+        // be sorted for the read below (see #10827). Additionally each
+        // clustering key is mapped to the original rjson::value "Key".
+        using clustering_keys = std::map<clustering_key, rjson::value*, clustering_key::less_compare>;
+        std::unordered_map<partition_key, clustering_keys, partition_key::hashing, partition_key::equality> requests;
+        table_requests(schema_ptr s)
+            : schema(std::move(s))
+            , requests(8, partition_key::hashing(*schema), partition_key::equality(*schema))
+        {}
+        void add(rjson::value& key) {
+            auto pk = pk_from_json(key, schema);
+            auto it = requests.find(pk);
+            if (it == requests.end()) {
+                it = requests.emplace(pk, clustering_key::less_compare(*schema)).first;
+            }
+            auto ck = ck_from_json(key, schema);
+            if (auto [_, inserted] = it->second.emplace(ck, &key); !inserted) {
+                throw api_error::validation("Provided list of item keys contains duplicates");
+            }
+        }
+    };
+    std::vector<table_requests> requests;
+    uint batch_size = 0;
+    for (auto it = request_items.MemberBegin(); it != request_items.MemberEnd(); ++it) {
+        table_requests rs(get_table_from_batch_request(_proxy, it));
+        tracing::add_alternator_table_name(trace_state, rs.schema->cf_name());
+        rs.cl = get_read_consistency(it->value);
+        std::unordered_set<std::string> used_attribute_names;
+        rs.attrs_to_get = ::make_shared<const std::optional<attrs_to_get>>(calculate_attrs_to_get(it->value, *_parsed_expression_cache, used_attribute_names));
+        const rjson::value* expression_attribute_names = rjson::find(it->value, "ExpressionAttributeNames");
+        verify_all_are_used(expression_attribute_names, used_attribute_names,"ExpressionAttributeNames", "GetItem");
+        auto& keys = (it->value)["Keys"];
+        for (rjson::value& key : keys.GetArray()) {
+            rs.add(key);
+            check_key(key, rs.schema);
+        }
+        batch_size += rs.requests.size();
+        requests.emplace_back(std::move(rs));
+    }
+
+    for (const table_requests& tr : requests) {
+        co_await verify_permission(_enforce_authorization, _warn_authorization, client_state, tr.schema, auth::permission::SELECT, _stats);
+    }
+
+    _stats.api_operations.batch_get_item_batch_total += batch_size;
+    _stats.api_operations.batch_get_item_histogram.add(batch_size);
+    // If we got here, all "requests" are valid, so let's start the
+    // requests for the different partitions all in parallel.
+    std::vector<future<std::vector<rjson::value>>> response_futures;
+    std::vector<uint64_t> consumed_rcu_half_units_per_table(requests.size());
+    for (size_t i = 0; i < requests.size(); i++) {
+        const table_requests& rs = requests[i];
+        bool is_quorum = rs.cl == db::consistency_level::LOCAL_QUORUM;
+        lw_shared_ptr<stats> per_table_stats = get_stats_from_schema(_proxy, *rs.schema);
+        per_table_stats->api_operations.batch_get_item_histogram.add(rs.requests.size());
+        for (const auto& [pk, cks] : rs.requests) {
+            dht::partition_range_vector partition_ranges{dht::partition_range(dht::decorate_key(*rs.schema, pk))};
+            std::vector<query::clustering_range> bounds;
+            if (rs.schema->clustering_key_size() == 0) {
+                bounds.push_back(query::clustering_range::make_open_ended_both_sides());
+            } else {
+                for (auto& ck : cks) {
+                    bounds.push_back(query::clustering_range::make_singular(ck.first));
+                }
+            }
+            auto regular_columns =
+                    rs.schema->regular_columns() | std::views::transform(&column_definition::id)
+                    | std::ranges::to<query::column_id_vector>();
+            auto selection = cql3::selection::selection::wildcard(rs.schema);
+            auto partition_slice = query::partition_slice(std::move(bounds), {}, std::move(regular_columns), selection->get_query_options());
+            auto command = ::make_lw_shared<query::read_command>(rs.schema->id(), rs.schema->version(), partition_slice, _proxy.get_max_result_size(partition_slice),
+                    query::tombstone_limit(_proxy.get_tombstone_limit()));
+            command->allow_limit = db::allow_per_partition_rate_limit::yes;
+            const auto item_callback = [is_quorum, per_table_stats, &rcus_per_table = consumed_rcu_half_units_per_table[i]](uint64_t size) {
+                rcus_per_table += rcu_consumed_capacity_counter::get_half_units(size, is_quorum);
+                // Update item size only if the item exists.
+                if (size > 0) {
+                    per_table_stats->operation_sizes.batch_get_item_op_size_kb.add(bytes_to_kb_ceil(size));
+                }
+            };
+            future<std::vector<rjson::value>> f = _proxy.query(rs.schema, std::move(command), std::move(partition_ranges), rs.cl,
+                    service::storage_proxy::coordinator_query_options(executor::default_timeout(), permit, client_state, trace_state)).then(
+                    [schema = rs.schema, partition_slice = std::move(partition_slice), selection = std::move(selection), attrs_to_get = rs.attrs_to_get, item_callback = std::move(item_callback)] (service::storage_proxy::coordinator_query_result qr) mutable {
+                utils::get_local_injector().inject("alternator_batch_get_item", [] { throw std::runtime_error("batch_get_item injection"); });
+                return describe_multi_item(std::move(schema), std::move(partition_slice), std::move(selection), std::move(qr.query_result), std::move(attrs_to_get), std::move(item_callback));
+            });
+            response_futures.push_back(std::move(f));
+        }
+    }
+
+    // Wait for all requests to complete, and then return the response.
+    // In case of full failure (no reads succeeded), an arbitrary error
+    // from one of the operations will be returned.
+    bool some_succeeded = false;
+    std::exception_ptr eptr;
+    std::set<sstring> table_names; // for auditing
+    // FIXME: will_log() here doesn't pass keyspace/table, so keyspace-level audit
+    // filtering is bypassed — a batch spanning multiple tables is audited as a whole.
+    bool should_audit = _audit.local_is_initialized() && _audit.local().will_log(audit::statement_category::QUERY);
+    rjson::value response = rjson::empty_object();
+    rjson::add(response, "Responses", rjson::empty_object());
+    rjson::add(response, "UnprocessedKeys", rjson::empty_object());
+    auto fut_it = response_futures.begin();
+    rjson::value consumed_capacity = rjson::empty_array();
+    for (size_t i = 0; i < requests.size(); i++) {
+        const table_requests& rs = requests[i];
+        std::string table = table_name(*rs.schema);
+        if (should_audit) {
+            table_names.insert(table);
+        }
+        for (const auto& [_, cks] : rs.requests) {
+            auto& fut = *fut_it;
+            ++fut_it;
+            try {
+                std::vector<rjson::value> results = co_await std::move(fut);
+                some_succeeded = true;
+                if (!response["Responses"].HasMember(table)) {
+                    rjson::add_with_string_name(response["Responses"], table, rjson::empty_array());
+                }
+                for (rjson::value& json : results) {
+                    rjson::push_back(response["Responses"][table], std::move(json));
+                }
+            } catch(...) {
+                eptr = std::current_exception();
+                // This read of potentially several rows in one partition,
+                // failed. We need to add the row key(s) to UnprocessedKeys.
+                if (!response["UnprocessedKeys"].HasMember(table)) {
+                    // Add the table's entry in UnprocessedKeys. Need to copy
+                    // all the table's parameters from the request except the
+                    // Keys field, which we start empty and then build below.
+                    rjson::add_with_string_name(response["UnprocessedKeys"], table, rjson::empty_object());
+                    rjson::value& unprocessed_item = response["UnprocessedKeys"][table];
+                    rjson::value& request_item = request_items[table];
+                    for (auto it = request_item.MemberBegin(); it != request_item.MemberEnd(); ++it) {
+                        if (it->name != "Keys") {
+                            rjson::add_with_string_name(unprocessed_item,
+                                rjson::to_string_view(it->name), rjson::copy(it->value));
+                        }
+                    }
+                    rjson::add_with_string_name(unprocessed_item, "Keys", rjson::empty_array());
+                }
+                for (auto& ck : cks) {
+                    rjson::push_back(response["UnprocessedKeys"][table]["Keys"], std::move(*ck.second));
+                }
+            }
+        }
+        uint64_t rcu_half_units = consumed_rcu_half_units_per_table[i];
+        _stats.rcu_half_units_total += rcu_half_units;
+        lw_shared_ptr<stats> per_table_stats = get_stats_from_schema(_proxy, *rs.schema);
+        per_table_stats->rcu_half_units_total += rcu_half_units;
+        if (should_add_rcu) {
+            rjson::value entry = rjson::empty_object();
+            rjson::add(entry, "TableName", table);
+            rjson::add(entry, "CapacityUnits", rcu_half_units*0.5);
+            rjson::push_back(consumed_capacity, std::move(entry));
+        }
+    }
+
+    if (should_add_rcu) {
+        rjson::add(response, "ConsumedCapacity", std::move(consumed_capacity));
+    }
+    elogger.trace("Unprocessed keys: {}", response["UnprocessedKeys"]);
+    // NOTE: Each table in the batch has its own CL (set by get_read_consistency()),
+    // but the audit entry records a single CL for the whole batch. We use ANY as a
+    // placeholder to indicate "mixed / not applicable".
+    // FIXME: Auditing is executed only for a complete success
+    maybe_audit(audit_info, audit::statement_category::QUERY, "",
+                print_names_for_audit(table_names), "BatchGetItem", request, db::consistency_level::ANY);
+    if (!some_succeeded && eptr) {
+        co_await coroutine::return_exception_ptr(std::move(eptr));
+    }
+    auto duration = std::chrono::steady_clock::now() - start_time;
+    _stats.api_operations.batch_get_item_latency.mark(duration);
+    for (const table_requests& rs : requests) {
+        lw_shared_ptr<stats> per_table_stats = get_stats_from_schema(_proxy, *rs.schema);
+        per_table_stats->api_operations.batch_get_item_latency.mark(duration);
+    }
+    if (is_big(response)) {
+        co_return make_streamed(std::move(response));
+    } else {
+        co_return rjson::print(std::move(response));
+    }
+}
+
+} // namespace alternator
diff --git a/configure.py b/configure.py
index 23b1822388..d1ce4ef2f8 100755
--- a/configure.py
+++ b/configure.py
@@ -1438,6 +1438,7 @@ alternator = [
        'alternator/controller.cc',
        'alternator/server.cc',
        'alternator/executor.cc',
+       'alternator/executor_read.cc',
        'alternator/stats.cc',
        'alternator/serialization.cc',
        'alternator/expressions.cc',