release: prepare for 5.2.2

doc: fix the links to the Enterprise docs
Fixes https://github.com/scylladb/scylladb/issues/13915 This commit fixes broken links to the Enterprise docs. They are links to the enterprise branch, which is not published. The links to the Enterprise docs should include "stable" instead of the branch name. This commit must be backported to branch-5.2, because the broken links are present in the published 5.2 docs. Closes #13917 (cherry picked from commit 6f4a68175b)
2023-05-18 14:03:20 +03:00 · 2023-05-18 08:40:02 +03:00 · 2023-05-17 16:46:49 +03:00 · 2023-05-17 15:41:47 +03:00 · 2023-05-16 10:11:21 +03:00 · 2023-05-15 23:29:08 +02:00
748 changed files with 31128 additions and 18196 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -12,7 +12,7 @@ test/cql/cdc_* @kbr- @elcallio @piodul @jul-stas
 test/boost/cdc_* @kbr- @elcallio @piodul @jul-stas

 # COMMITLOG / BATCHLOG
-db/commitlog/* @elcallio
+db/commitlog/* @elcallio @eliransin
 db/batch* @elcallio

 # COORDINATOR
@@ -25,7 +25,7 @@ compaction/* @raphaelsc @nyh
 transport/*

 # CQL QUERY LANGUAGE
-cql3/* @tgrabiec @psarna @cvybhu
+cql3/* @tgrabiec @cvybhu @nyh

 # COUNTERS
 counters* @jul-stas
@@ -33,7 +33,7 @@ tests/counter_test* @jul-stas

 # DOCS
 docs/* @annastuchlik @tzach
-docs/alternator @annastuchlik @tzach @nyh @psarna
+docs/alternator @annastuchlik @tzach @nyh @havaker @nuivall

 # GOSSIP
 gms/* @tgrabiec @asias
@@ -45,9 +45,9 @@ dist/docker/*
 utils/logalloc* @tgrabiec

 # MATERIALIZED VIEWS
-db/view/* @nyh @psarna
-cql3/statements/*view* @nyh @psarna
-test/boost/view_* @nyh @psarna
+db/view/* @nyh @cvybhu @piodul
+cql3/statements/*view* @nyh @cvybhu @piodul
+test/boost/view_* @nyh @cvybhu @piodul

 # PACKAGING
 dist/* @syuu1228
@@ -62,9 +62,9 @@ service/migration* @tgrabiec @nyh
 schema* @tgrabiec @nyh

 # SECONDARY INDEXES
-db/index/* @nyh @psarna
-cql3/statements/*index* @nyh @psarna
-test/boost/*index* @nyh @psarna
+index/* @nyh @cvybhu @piodul
+cql3/statements/*index* @nyh @cvybhu @piodul
+test/boost/*index* @nyh @cvybhu @piodul

 # SSTABLES
 sstables/* @tgrabiec @raphaelsc @nyh
@@ -74,11 +74,11 @@ streaming/* @tgrabiec @asias
 service/storage_service.* @tgrabiec @asias

 # ALTERNATOR
-alternator/* @nyh @psarna
-test/alternator/* @nyh @psarna
+alternator/* @nyh @havaker @nuivall
+test/alternator/* @nyh @havaker @nuivall

 # HINTED HANDOFF
-db/hints/* @piodul @vladzcloudius
+db/hints/* @piodul @vladzcloudius @eliransin

 # REDIS
 redis/* @nyh @syuu1228
--- a/.github/workflows/docs-amplify-enhanced.yaml
+++ b/.github/workflows/docs-amplify-enhanced.yaml
@@ -0,0 +1,17 @@
+name: "Docs / Amplify enhanced"
+
+on: issue_comment
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    if: ${{ github.event.issue.pull_request }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - name: Amplify enhanced
+        env:
+          TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        uses: scylladb/sphinx-scylladb-theme/.github/actions/amplify-enhanced@master
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,17 +1,11 @@
 [submodule "seastar"]
 	path = seastar
-	url = ../seastar
+	url = ../scylla-seastar
 	ignore = dirty
 [submodule "swagger-ui"]
 	path = swagger-ui
 	url = ../scylla-swagger-ui
 	ignore = dirty
-[submodule "libdeflate"]
-	path = libdeflate
-	url = ../libdeflate
-[submodule "abseil"]
-	path = abseil
-	url = ../abseil-cpp
 [submodule "scylla-jmx"]
 	path = tools/jmx
 	url = ../scylla-jmx
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -42,22 +42,13 @@ set(Seastar_CXX_FLAGS ${cxx_coro_flag} ${target_arch_flag} CACHE INTERNAL "" FOR
 set(Seastar_CXX_DIALECT gnu++20 CACHE INTERNAL "" FORCE)

 add_subdirectory(seastar)
-add_subdirectory(abseil)
-# Exclude absl::strerror from the default "all" target since it's not
-# used in Scylla build and, moreover, makes use of deprecated glibc APIs,
-# such as sys_nerr, which are not exposed from "stdio.h" since glibc 2.32,
-# which happens to be the case for recent Fedora distribution versions.
-#
-# Need to use the internal "absl_strerror" target name instead of namespaced
-# variant because `set_target_properties` does not understand the latter form,
-# unfortunately.
-set_target_properties(absl_strerror PROPERTIES EXCLUDE_FROM_ALL TRUE)

 # System libraries dependencies
 find_package(Boost COMPONENTS filesystem program_options system thread regex REQUIRED)
 find_package(Lua REQUIRED)
 find_package(ZLIB REQUIRED)
 find_package(ICU COMPONENTS uc REQUIRED)
+find_package(Abseil REQUIRED)

 set(scylla_build_dir "${CMAKE_BINARY_DIR}/build/${BUILD_TYPE}")
 set(scylla_gen_build_dir "${scylla_build_dir}/gen")
@@ -746,7 +737,6 @@ target_compile_definitions(scylla PRIVATE XXH_PRIVATE_API HAVE_LZ4_COMPRESS_DEFA
 target_include_directories(scylla PRIVATE
    "${CMAKE_CURRENT_SOURCE_DIR}"
    libdeflate
-    abseil
    "${scylla_gen_build_dir}")

 ###
--- a/4
+++ b/4
@@ -34,7 +34,7 @@ END

 DATE=""

-while [[ $# -gt 0 ]]; do
+while [ $# -gt 0 ]; do
 	opt="$1"
 	case $opt in
 		-h|--help)
@@ -72,7 +72,7 @@ fi

 # Default scylla product/version tags
 PRODUCT=scylla
-VERSION=5.2.0-dev
+VERSION=5.2.2

 if test -f version
 then
--- a/1
+++ b/1
--- a/alternator/auth.cc
+++ b/alternator/auth.cc
@@ -141,7 +141,7 @@ future<std::string> get_key_from_roles(service::storage_proxy& proxy, std::strin
    service::storage_proxy::coordinator_query_result qr = co_await proxy.query(schema, std::move(command), std::move(partition_ranges), cl,
            service::storage_proxy::coordinator_query_options(executor::default_timeout(), empty_service_permit(), client_state));

-    cql3::selection::result_set_builder builder(*selection, gc_clock::now(), cql_serialization_format::latest());
+    cql3::selection::result_set_builder builder(*selection, gc_clock::now());
    query::result_view::consume(*qr.query_result, partition_slice, cql3::selection::result_set_builder::visitor(builder, *schema, *selection));

    auto result_set = builder.build();
--- a/alternator/error.hh
+++ b/alternator/error.hh
@@ -23,7 +23,7 @@ namespace alternator {
 // api_error into a JSON object, and that is returned to the user.
 class api_error final : public std::exception {
 public:
-    using status_type = httpd::reply::status_type;
+    using status_type = http::reply::status_type;
    status_type _http_code;
    std::string _type;
    std::string _msg;
@@ -77,7 +77,7 @@ public:
        return api_error("TableNotFoundException", std::move(msg));
    }
    static api_error internal(std::string msg) {
-        return api_error("InternalServerError", std::move(msg), reply::status_type::internal_server_error);
+        return api_error("InternalServerError", std::move(msg), http::reply::status_type::internal_server_error);
    }

    // Provide the "std::exception" interface, to make it easier to print this
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
@@ -34,6 +34,7 @@
 #include "expressions.hh"
 #include "conditions.hh"
 #include "cql3/constants.hh"
+#include "cql3/util.hh"
 #include <optional>
 #include "utils/overloaded_functor.hh"
 #include <seastar/json/json_elements.hh>
@@ -927,9 +928,10 @@ static future<executor::request_return_type> create_table_on_shard0(tracing::tra
            if  (!range_key.empty() && range_key != view_hash_key && range_key != view_range_key) {
                add_column(view_builder, range_key, attribute_definitions, column_kind::clustering_key);
            }
-            sstring where_clause = "\"" + view_hash_key + "\" IS NOT NULL";
+            sstring where_clause = format("{} IS NOT NULL", cql3::util::maybe_quote(view_hash_key));
            if (!view_range_key.empty()) {
-                where_clause = where_clause + " AND \"" + view_hash_key + "\" IS NOT NULL";
+                where_clause = format("{} AND {} IS NOT NULL", where_clause,
+                    cql3::util::maybe_quote(view_range_key));
            }
            where_clauses.push_back(std::move(where_clause));
            view_builders.emplace_back(std::move(view_builder));
@@ -984,9 +986,10 @@ static future<executor::request_return_type> create_table_on_shard0(tracing::tra
            // Note above we don't need to add virtual columns, as all
            // base columns were copied to view. TODO: reconsider the need
            // for virtual columns when we support Projection.
-            sstring where_clause = "\"" + view_hash_key + "\" IS NOT NULL";
+            sstring where_clause = format("{} IS NOT NULL", cql3::util::maybe_quote(view_hash_key));
            if (!view_range_key.empty()) {
-                where_clause = where_clause + " AND \"" + view_range_key + "\" IS NOT NULL";
+                where_clause = format("{} AND {} IS NOT NULL", where_clause,
+                    cql3::util::maybe_quote(view_range_key));
            }
            where_clauses.push_back(std::move(where_clause));
            view_builders.emplace_back(std::move(view_builder));
@@ -2302,7 +2305,7 @@ void executor::describe_single_item(const cql3::selection::selection& selection,
                rjson::add_with_string_name(field, type_to_string((*column_it)->type), json_key_column_value(*cell, **column_it));
            }
        } else if (cell) {
-            auto deserialized = attrs_type()->deserialize(*cell, cql_serialization_format::latest());
+            auto deserialized = attrs_type()->deserialize(*cell);
            auto keys_and_values = value_cast<map_type_impl::native_type>(deserialized);
            for (auto entry : keys_and_values) {
                std::string attr_name = value_cast<sstring>(entry.first);
@@ -2337,7 +2340,7 @@ std::optional<rjson::value> executor::describe_single_item(schema_ptr schema,
        const std::optional<attrs_to_get>& attrs_to_get) {
    rjson::value item = rjson::empty_object();

-    cql3::selection::result_set_builder builder(selection, gc_clock::now(), cql_serialization_format::latest());
+    cql3::selection::result_set_builder builder(selection, gc_clock::now());
    query::result_view::consume(query_result, slice, cql3::selection::result_set_builder::visitor(builder, *schema, selection));

    auto result_set = builder.build();
@@ -2360,7 +2363,7 @@ std::vector<rjson::value> executor::describe_multi_item(schema_ptr schema,
        const cql3::selection::selection& selection,
        const query::result& query_result,
        const std::optional<attrs_to_get>& attrs_to_get) {
-    cql3::selection::result_set_builder builder(selection, gc_clock::now(), cql_serialization_format::latest());
+    cql3::selection::result_set_builder builder(selection, gc_clock::now());
    query::result_view::consume(query_result, slice, cql3::selection::result_set_builder::visitor(builder, *schema, selection));
    auto result_set = builder.build();
    std::vector<rjson::value> ret;
@@ -3508,7 +3511,7 @@ public:
                    rjson::add_with_string_name(field, type_to_string((*_column_it)->type), json_key_column_value(bv, **_column_it));
                }
            } else {
-                auto deserialized = attrs_type()->deserialize(bv, cql_serialization_format::latest());
+                auto deserialized = attrs_type()->deserialize(bv);
                auto keys_and_values = value_cast<map_type_impl::native_type>(deserialized);
                for (auto entry : keys_and_values) {
                    std::string attr_name = value_cast<sstring>(entry.first);
@@ -3642,7 +3645,7 @@ static future<executor::request_return_type> do_query(service::storage_proxy& pr

    if (exclusive_start_key) {
        partition_key pk = pk_from_json(*exclusive_start_key, schema);
-        auto pos = position_in_partition(position_in_partition::partition_start_tag_t());
+        auto pos = position_in_partition::for_partition_start();
        if (schema->clustering_key_size() > 0) {
            pos = pos_from_json(*exclusive_start_key, schema);
        }
--- a/alternator/serialization.cc
+++ b/alternator/serialization.cc
@@ -73,7 +73,7 @@ struct from_json_visitor {
    }
    // default
    void operator()(const abstract_type& t) const {
-        bo.write(from_json_object(t, v, cql_serialization_format::internal()));
+        bo.write(from_json_object(t, v));
    }
 };

@@ -279,7 +279,7 @@ position_in_partition pos_from_json(const rjson::value& item, schema_ptr schema)
        return position_in_partition(region, weight, region == partition_region::clustered ? std::optional(std::move(ck)) : std::nullopt);
    }
    if (ck.is_empty()) {
-        return position_in_partition(position_in_partition::partition_start_tag_t());
+        return position_in_partition::for_partition_start();
    }
    return position_in_partition::for_key(std::move(ck));
 }
--- a/alternator/server.cc
+++ b/alternator/server.cc
@@ -28,6 +28,8 @@
 static logging::logger slogger("alternator-server");

 using namespace httpd;
+using request = http::request;
+using reply = http::reply;

 namespace alternator {

--- a/alternator/server.hh
+++ b/alternator/server.hh
@@ -27,7 +27,7 @@ using chunked_content = rjson::chunked_content;
 class server {
    static constexpr size_t content_length_limit = 16*MB;
    using alternator_callback = std::function<future<executor::request_return_type>(executor&, executor::client_state&,
-            tracing::trace_state_ptr, service_permit, rjson::value, std::unique_ptr<request>)>;
+            tracing::trace_state_ptr, service_permit, rjson::value, std::unique_ptr<http::request>)>;
    using alternator_callbacks_map = std::unordered_map<std::string_view, alternator_callback>;

    http_server _http_server;
@@ -76,8 +76,8 @@ public:
 private:
    void set_routes(seastar::httpd::routes& r);
    // If verification succeeds, returns the authenticated user's username
-    future<std::string> verify_signature(const seastar::httpd::request&, const chunked_content&);
-    future<executor::request_return_type> handle_api_request(std::unique_ptr<request> req);
+    future<std::string> verify_signature(const seastar::http::request&, const chunked_content&);
+    future<executor::request_return_type> handle_api_request(std::unique_ptr<http::request> req);
 };

 }
--- a/alternator/streams.cc
+++ b/alternator/streams.cc
@@ -145,19 +145,24 @@ future<alternator::executor::request_return_type> alternator::executor::list_str
    auto table = find_table(_proxy, request);
    auto db = _proxy.data_dictionary();
    auto cfs = db.get_tables();
-    auto i = cfs.begin();
-    auto e = cfs.end();

    if (limit < 1) {
        throw api_error::validation("Limit must be 1 or more");
    }

-    // TODO: the unordered_map here is not really well suited for partial
-    // querying - we're sorting on local hash order, and creating a table
-    // between queries may or may not miss info. But that should be rare,
-    // and we can probably expect this to be a single call.
+    // # 12601 (maybe?) - sort the set of tables on ID. This should ensure we never
+    // generate duplicates in a paged listing here. Can obviously miss things if they 
+    // are added between paged calls and end up with a "smaller" UUID/ARN, but that 
+    // is to be expected.
+    std::sort(cfs.begin(), cfs.end(), [](const data_dictionary::table& t1, const data_dictionary::table& t2) {
+        return t1.schema()->id().uuid() < t2.schema()->id().uuid();
+    });
+
+    auto i = cfs.begin();
+    auto e = cfs.end();
+
    if (streams_start) {
-        i = std::find_if(i, e, [&](data_dictionary::table t) {
+        i = std::find_if(i, e, [&](const data_dictionary::table& t) {
            return t.schema()->id().uuid() == streams_start
                && cdc::get_base_table(db.real_database(), *t.schema())
                && is_alternator_keyspace(t.schema()->ks_name())
@@ -883,7 +888,7 @@ future<executor::request_return_type> executor::get_records(client_state& client

    return _proxy.query(schema, std::move(command), std::move(partition_ranges), cl, service::storage_proxy::coordinator_query_options(default_timeout(), std::move(permit), client_state)).then(
            [this, schema, partition_slice = std::move(partition_slice), selection = std::move(selection), start_time = std::move(start_time), limit, key_names = std::move(key_names), attr_names = std::move(attr_names), type, iter, high_ts] (service::storage_proxy::coordinator_query_result qr) mutable {       
-        cql3::selection::result_set_builder builder(*selection, gc_clock::now(), cql_serialization_format::latest());
+        cql3::selection::result_set_builder builder(*selection, gc_clock::now());
        query::result_view::consume(*qr.query_result, partition_slice, cql3::selection::result_set_builder::visitor(builder, *schema, *selection));

        auto result_set = builder.build();
--- a/alternator/ttl.cc
+++ b/alternator/ttl.cc
@@ -8,6 +8,7 @@

 #include <chrono>
 #include <cstdint>
+#include <exception>
 #include <optional>
 #include <seastar/core/sstring.hh>
 #include <seastar/core/coroutine.hh>
@@ -17,6 +18,7 @@
 #include <seastar/coroutine/maybe_yield.hh>
 #include <boost/multiprecision/cpp_int.hpp>

+#include "exceptions/exceptions.hh"
 #include "gms/gossiper.hh"
 #include "gms/inet_address.hh"
 #include "inet_address_vectors.hh"
@@ -548,13 +550,34 @@ static future<> scan_table_ranges(
            co_return;
        }
        auto units = co_await get_units(page_sem, 1);
-        // We don't to limit page size in number of rows because there is a
-        // builtin limit of the page's size in bytes. Setting this limit to 1
-        // is useful for debugging the paging code with moderate-size data.
+        // We don't need to limit page size in number of rows because there is
+        // a builtin limit of the page's size in bytes. Setting this limit to
+        // 1 is useful for debugging the paging code with moderate-size data.
        uint32_t limit = std::numeric_limits<uint32_t>::max();
-        // FIXME: which timeout?
-        // FIXME: if read times out, need to retry it.
-        std::unique_ptr<cql3::result_set> rs = co_await p->fetch_page(limit, gc_clock::now(), executor::default_timeout());
+        // Read a page, and if that times out, try again after a small sleep.
+        // If we didn't catch the timeout exception, it would cause the scan
+        // be aborted and only be restarted at the next scanning period.
+        // If we retry too many times, give up and restart the scan later.
+        std::unique_ptr<cql3::result_set> rs;
+        for (int retries=0; ; retries++) {
+            try {
+                // FIXME: which timeout?
+                rs = co_await p->fetch_page(limit, gc_clock::now(), executor::default_timeout());
+                break;
+            } catch(exceptions::read_timeout_exception&) {
+                tlogger.warn("expiration scanner read timed out, will retry: {}",
+                    std::current_exception());
+            }
+            // If we didn't break out of this loop, add a minimal sleep
+            if (retries >= 10) {
+                // Don't get stuck forever asking the same page, maybe there's
+                // a bug or a real problem in several replicas. Give up on
+                // this scan an retry the scan from a random position later,
+                // in the next scan period.
+                throw runtime_exception("scanner thread failed after too many timeouts for the same page");
+            }
+            co_await sleep_abortable(std::chrono::seconds(1), abort_source);
+        }
        auto rows = rs->rows();
        auto meta = rs->get_metadata().get_names();
        std::optional<unsigned> expiration_column;
--- a/amplify.yml
+++ b/amplify.yml
@@ -0,0 +1,15 @@
+version: 1
+applications:
+  - frontend:
+      phases:
+        build:
+          commands:
+            - make setupenv
+            - make dirhtml
+      artifacts:
+        baseDirectory: _build/dirhtml
+        files:
+          - '**/*'
+      cache:
+        paths: []
+    appRoot: docs
--- a/api/api-doc/task_manager.json
+++ b/api/api-doc/task_manager.json
@@ -148,7 +148,34 @@
              ]
           }
        ]
-     }
+     },
+     {
+      "path":"/task_manager/task_status_recursive/{task_id}",
+      "operations":[
+         {
+            "method":"GET",
+            "summary":"Get statuses of the task and all its descendants",
+            "type":"array",
+            "items":{
+               "type":"task_status"
+            },
+            "nickname":"get_task_status_recursively",
+            "produces":[
+               "application/json"
+            ],
+            "parameters":[
+                {
+                    "name":"task_id",
+                    "description":"The uuid of a task to query about",
+                    "required":true,
+                    "allowMultiple":false,
+                    "type":"string",
+                    "paramType":"path"
+                }
+            ]
+         }
+      ]
+    }
    ],
    "models":{
       "task_stats" :{
@@ -168,6 +195,26 @@
                  "failed"
                ],
                "description":"The state of a task"
+             },
+             "type":{
+                "type":"string",
+                "description":"The description of the task"
+             },
+             "keyspace":{
+                "type":"string",
+                "description":"The keyspace the task is working on (if applicable)"
+             },
+             "table":{
+                "type":"string",
+                "description":"The table the task is working on (if applicable)"
+             },
+             "entity":{
+                "type":"string",
+                "description":"Task-specific entity description"
+             },
+             "sequence_number":{
+                "type":"long",
+                "description":"The running sequence number of the task"
             }
           }
       },
@@ -244,6 +291,13 @@
            "progress_completed":{
               "type":"double",
               "description":"The number of units completed so far"
+            },
+            "children_ids":{
+               "type":"array",
+                "items":{
+                    "type":"string"
+                },
+               "description":"Task IDs of children of this task"
            }
          }
       }
--- a/api/api-doc/task_manager_test.json
+++ b/api/api-doc/task_manager_test.json
@@ -86,14 +86,6 @@
                        "type":"string",
                        "paramType":"query"
                    },
-                    {
-                        "name":"type",
-                        "description":"The type of the task",
-                        "required":false,
-                        "allowMultiple":false,
-                        "type":"string",
-                        "paramType":"query"
-                    },
                    {
                        "name":"entity",
                        "description":"Task-specific entity description",
--- a/api/api_init.hh
+++ b/api/api_init.hh
@@ -14,6 +14,9 @@
 #include "tasks/task_manager.hh"
 #include "seastarx.hh"

+using request = http::request;
+using reply = http::reply;
+
 namespace service {

 class load_meter;
--- a/api/column_family.cc
+++ b/api/column_family.cc
@@ -334,13 +334,13 @@ void set_column_family(http_context& ctx, routes& r) {

    cf::get_memtable_columns_count.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t{0}, [](replica::column_family& cf) {
-            return cf.active_memtable().partition_count();
+            return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed(std::mem_fn(&replica::memtable::partition_count)), uint64_t(0));
        }, std::plus<>());
    });

    cf::get_all_memtable_columns_count.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t{0}, [](replica::column_family& cf) {
-            return cf.active_memtable().partition_count();
+            return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed(std::mem_fn(&replica::memtable::partition_count)), uint64_t(0));
        }, std::plus<>());
    });

@@ -354,25 +354,33 @@ void set_column_family(http_context& ctx, routes& r) {

    cf::get_memtable_off_heap_size.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
-            return cf.active_memtable().region().occupancy().total_space();
+            return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed([] (replica::memtable* active_memtable) {
+                return active_memtable->region().occupancy().total_space();
+            }), uint64_t(0));
        }, std::plus<int64_t>());
    });

    cf::get_all_memtable_off_heap_size.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, int64_t(0), [](replica::column_family& cf) {
-            return cf.active_memtable().region().occupancy().total_space();
+            return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed([] (replica::memtable* active_memtable) {
+                return active_memtable->region().occupancy().total_space();
+            }), uint64_t(0));
        }, std::plus<int64_t>());
    });

    cf::get_memtable_live_data_size.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
-            return cf.active_memtable().region().occupancy().used_space();
+            return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed([] (replica::memtable* active_memtable) {
+                return active_memtable->region().occupancy().used_space();
+            }), uint64_t(0));
        }, std::plus<int64_t>());
    });

    cf::get_all_memtable_live_data_size.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, int64_t(0), [](replica::column_family& cf) {
-            return cf.active_memtable().region().occupancy().used_space();
+            return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed([] (replica::memtable* active_memtable) {
+                return active_memtable->region().occupancy().used_space();
+            }), uint64_t(0));
        }, std::plus<int64_t>());
    });

@@ -410,7 +418,9 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_cf_all_memtables_live_data_size.set(r, [&ctx] (std::unique_ptr<request> req) {
        warn(unimplemented::cause::INDEXES);
        return map_reduce_cf(ctx, int64_t(0), [](replica::column_family& cf) {
-            return cf.active_memtable().region().occupancy().used_space();
+            return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed([] (replica::memtable* active_memtable) {
+                return active_memtable->region().occupancy().used_space();
+            }), uint64_t(0));
        }, std::plus<int64_t>());
    });

@@ -529,13 +539,13 @@ void set_column_family(http_context& ctx, routes& r) {

    cf::get_pending_compactions.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
-            return cf.get_compaction_strategy().estimated_pending_compactions(cf.as_table_state());
+            return cf.estimate_pending_compactions();
        }, std::plus<int64_t>());
    });

    cf::get_all_pending_compactions.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, int64_t(0), [](replica::column_family& cf) {
-            return cf.get_compaction_strategy().estimated_pending_compactions(cf.as_table_state());
+            return cf.estimate_pending_compactions();
        }, std::plus<int64_t>());
    });

--- a/api/compaction_manager.cc
+++ b/api/compaction_manager.cc
@@ -41,7 +41,6 @@ static std::unordered_map<std::pair<sstring, sstring>, uint64_t, utils::tuple_ha
    return std::move(a);
 }

-
 void set_compaction_manager(http_context& ctx, routes& r) {
    cm::get_compactions.set(r, [&ctx] (std::unique_ptr<request> req) {
        return ctx.db.map_reduce0([](replica::database& db) {
@@ -68,9 +67,9 @@ void set_compaction_manager(http_context& ctx, routes& r) {
    cm::get_pending_tasks_by_table.set(r, [&ctx] (std::unique_ptr<request> req) {
        return ctx.db.map_reduce0([&ctx](replica::database& db) {
            return do_with(std::unordered_map<std::pair<sstring, sstring>, uint64_t, utils::tuple_hash>(), [&ctx, &db](std::unordered_map<std::pair<sstring, sstring>, uint64_t, utils::tuple_hash>& tasks) {
-                return do_for_each(db.get_column_families(), [&tasks](const std::pair<table_id, seastar::lw_shared_ptr<replica::table>>& i) {
+                return do_for_each(db.get_column_families(), [&tasks](const std::pair<table_id, seastar::lw_shared_ptr<replica::table>>& i) -> future<> {
                    replica::table& cf = *i.second.get();
-                    tasks[std::make_pair(cf.schema()->ks_name(), cf.schema()->cf_name())] = cf.get_compaction_strategy().estimated_pending_compactions(cf.as_table_state());
+                    tasks[std::make_pair(cf.schema()->ks_name(), cf.schema()->cf_name())] = cf.estimate_pending_compactions();
                    return make_ready_future<>();
                }).then([&tasks] {
                    return std::move(tasks);
@@ -119,7 +118,9 @@ void set_compaction_manager(http_context& ctx, routes& r) {
            auto& cm = db.get_compaction_manager();
            return parallel_for_each(table_names, [&db, &cm, &ks_name, type] (sstring& table_name) {
                auto& t = db.find_column_family(ks_name, table_name);
-                return cm.stop_compaction(type, &t.as_table_state());
+                return t.parallel_foreach_table_state([&] (compaction::table_state& ts) {
+                    return cm.stop_compaction(type, &ts);
+                });
            });
        });
        co_return json_void();
@@ -127,7 +128,7 @@ void set_compaction_manager(http_context& ctx, routes& r) {

    cm::get_pending_tasks.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, int64_t(0), [](replica::column_family& cf) {
-            return cf.get_compaction_strategy().estimated_pending_compactions(cf.as_table_state());
+            return cf.estimate_pending_compactions();
        }, std::plus<int64_t>());
    });

--- a/api/endpoint_snitch.cc
+++ b/api/endpoint_snitch.cc
@@ -25,7 +25,7 @@ void set_endpoint_snitch(http_context& ctx, routes& r, sharded<locator::snitch_p
    httpd::endpoint_snitch_info_json::get_datacenter.set(r, [&ctx](const_req req) {
        auto& topology = ctx.shared_token_metadata.local().get()->get_topology();
        auto ep = host_or_broadcast(req);
-        if (!topology.has_endpoint(ep, locator::topology::pending::yes)) {
+        if (!topology.has_endpoint(ep)) {
            // Cannot return error here, nodetool status can race, request
            // info about just-left node and not handle it nicely
            return sstring(locator::production_snitch_base::default_dc);
@@ -36,7 +36,7 @@ void set_endpoint_snitch(http_context& ctx, routes& r, sharded<locator::snitch_p
    httpd::endpoint_snitch_info_json::get_rack.set(r, [&ctx](const_req req) {
        auto& topology = ctx.shared_token_metadata.local().get()->get_topology();
        auto ep = host_or_broadcast(req);
-        if (!topology.has_endpoint(ep, locator::topology::pending::yes)) {
+        if (!topology.has_endpoint(ep)) {
            // Cannot return error here, nodetool status can race, request
            // info about just-left node and not handle it nicely
            return sstring(locator::production_snitch_base::default_rack);
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -49,6 +49,14 @@

 extern logging::logger apilog;

+namespace std {
+
+std::ostream& operator<<(std::ostream& os, const api::table_info& ti) {
+    return os << "table{name=" << ti.name << ", id=" << ti.id << "}";
+}
+
+} // namespace std
+
 namespace api {

 const locator::token_metadata& http_context::get_token_metadata() {
@@ -100,6 +108,55 @@ std::vector<sstring> parse_tables(const sstring& ks_name, http_context& ctx, con
    return parse_tables(ks_name, ctx, it->second);
 }

+std::vector<table_info> parse_table_infos(const sstring& ks_name, http_context& ctx, sstring value) {
+    std::vector<table_info> res;
+    try {
+        if (value.empty()) {
+            const auto& cf_meta_data = ctx.db.local().find_keyspace(ks_name).metadata().get()->cf_meta_data();
+            res.reserve(cf_meta_data.size());
+            for (const auto& [name, schema] : cf_meta_data) {
+                res.emplace_back(table_info{name, schema->id()});
+            }
+        } else {
+            std::vector<sstring> names = split(value, ",");
+            res.reserve(names.size());
+            const auto& db = ctx.db.local();
+            for (const auto& table_name : names) {
+                res.emplace_back(table_info{table_name, db.find_uuid(ks_name, table_name)});
+            }
+        }
+    } catch (const replica::no_such_keyspace& e) {
+        throw bad_param_exception(e.what());
+    } catch (const replica::no_such_column_family& e) {
+        throw bad_param_exception(e.what());
+    }
+    return res;
+}
+
+std::vector<table_info> parse_table_infos(const sstring& ks_name, http_context& ctx, const std::unordered_map<sstring, sstring>& query_params, sstring param_name) {
+    auto it = query_params.find(param_name);
+    return parse_table_infos(ks_name, ctx, it != query_params.end() ? it->second : "");
+}
+
+// Run on all tables, skipping dropped tables
+future<> run_on_existing_tables(sstring op, replica::database& db, std::string_view keyspace, const std::vector<table_info> local_tables, std::function<future<> (replica::table&)> func) {
+    std::exception_ptr ex;
+    for (const auto& ti : local_tables) {
+        apilog.debug("Starting {} on {}.{}", op, keyspace, ti);
+        try {
+            co_await func(db.find_column_family(ti.id));
+        } catch (const replica::no_such_column_family& e) {
+            apilog.warn("Skipping {} of {}.{}: {}", op, keyspace, ti, e.what());
+        } catch (...) {
+            ex = std::current_exception();
+            apilog.error("Failed {} of {}.{}: {}", op, keyspace, ti, ex);
+        }
+        if (ex) {
+            co_await coroutine::return_exception_ptr(std::move(ex));
+        }
+    }
+}
+
 static ss::token_range token_range_endpoints_to_json(const dht::token_range_endpoints& d) {
    ss::token_range r;
    r.start_token = d._start_token;
@@ -118,16 +175,13 @@ static ss::token_range token_range_endpoints_to_json(const dht::token_range_endp
    return r;
 }

-using ks_cf_func = std::function<future<json::json_return_type>(http_context&, std::unique_ptr<request>, sstring, std::vector<sstring>)>;
+using ks_cf_func = std::function<future<json::json_return_type>(http_context&, std::unique_ptr<request>, sstring, std::vector<table_info>)>;

 static auto wrap_ks_cf(http_context &ctx, ks_cf_func f) {
    return [&ctx, f = std::move(f)](std::unique_ptr<request> req) {
        auto keyspace = validate_keyspace(ctx, req->param);
-        auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
-        if (column_families.empty()) {
-            column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
-        }
-        return f(ctx, std::move(req), std::move(keyspace), std::move(column_families));
+        auto table_infos = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
+        return f(ctx, std::move(req), std::move(keyspace), std::move(table_infos));
    };
 }

@@ -609,93 +663,114 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        });
    });

-    ss::force_keyspace_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
+    ss::force_keyspace_compaction.set(r, [&ctx](std::unique_ptr<request> req) -> future<json::json_return_type> {
+        auto& db = ctx.db;
        auto keyspace = validate_keyspace(ctx, req->param);
-        auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
-        if (column_families.empty()) {
-            column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
-        }
-        return ctx.db.invoke_on_all([keyspace, column_families] (replica::database& db) -> future<> {
-            auto table_ids = boost::copy_range<std::vector<table_id>>(column_families | boost::adaptors::transformed([&] (auto& cf_name) {
-                return db.find_uuid(keyspace, cf_name);
-            }));
-            // major compact smaller tables first, to increase chances of success if low on space.
-            std::ranges::sort(table_ids, std::less<>(), [&] (const table_id& id) {
-                return db.find_column_family(id).get_stats().live_disk_space_used;
+        auto table_infos = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
+        apilog.debug("force_keyspace_compaction: keyspace={} tables={}", keyspace, table_infos);
+        try {
+            co_await db.invoke_on_all([&] (replica::database& db) -> future<> {
+                auto local_tables = table_infos;
+                // major compact smaller tables first, to increase chances of success if low on space.
+                std::ranges::sort(local_tables, std::less<>(), [&] (const table_info& ti) {
+                    try {
+                        return db.find_column_family(ti.id).get_stats().live_disk_space_used;
+                    } catch (const replica::no_such_column_family& e) {
+                        return int64_t(-1);
+                    }
+                });
+                co_await run_on_existing_tables("force_keyspace_compaction", db, keyspace, local_tables, [] (replica::table& t) {
+                    return t.compact_all_sstables();
+                });
            });
-            // as a table can be dropped during loop below, let's find it before issuing major compaction request.
-            for (auto& id : table_ids) {
-                co_await db.find_column_family(id).compact_all_sstables();
-            }
-            co_return;
-        }).then([]{
-                return make_ready_future<json::json_return_type>(json_void());
-        });
+        } catch (...) {
+            apilog.error("force_keyspace_compaction: keyspace={} tables={} failed: {}", keyspace, table_infos, std::current_exception());
+            throw;
+        }
+
+        co_return json_void();
    });

-    ss::force_keyspace_cleanup.set(r, [&ctx, &ss](std::unique_ptr<request> req) {
+    ss::force_keyspace_cleanup.set(r, [&ctx, &ss](std::unique_ptr<request> req) -> future<json::json_return_type> {
+        auto& db = ctx.db;
        auto keyspace = validate_keyspace(ctx, req->param);
-        auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
-        if (column_families.empty()) {
-            column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
+        auto table_infos = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
+        apilog.info("force_keyspace_cleanup: keyspace={} tables={}", keyspace, table_infos);
+        if (!co_await ss.local().is_cleanup_allowed(keyspace)) {
+            auto msg = "Can not perform cleanup operation when topology changes";
+            apilog.warn("force_keyspace_cleanup: keyspace={} tables={}: {}", keyspace, table_infos, msg);
+            co_await coroutine::return_exception(std::runtime_error(msg));
        }
-        return ss.local().is_cleanup_allowed(keyspace).then([&ctx, keyspace,
-                column_families = std::move(column_families)] (bool is_cleanup_allowed) mutable {
-            if (!is_cleanup_allowed) {
-                return make_exception_future<json::json_return_type>(
-                        std::runtime_error("Can not perform cleanup operation when topology changes"));
-            }
-            return ctx.db.invoke_on_all([keyspace, column_families] (replica::database& db) -> future<> {
-                auto table_ids = boost::copy_range<std::vector<table_id>>(column_families | boost::adaptors::transformed([&] (auto& table_name) {
-                    return db.find_uuid(keyspace, table_name);
-                }));
+        try {
+            co_await db.invoke_on_all([&] (replica::database& db) -> future<> {
+                auto local_tables = table_infos;
                // cleanup smaller tables first, to increase chances of success if low on space.
-                std::ranges::sort(table_ids, std::less<>(), [&] (const table_id& id) {
-                    return db.find_column_family(id).get_stats().live_disk_space_used;
+                std::ranges::sort(local_tables, std::less<>(), [&] (const table_info& ti) {
+                    try {
+                        return db.find_column_family(ti.id).get_stats().live_disk_space_used;
+                    } catch (const replica::no_such_column_family& e) {
+                        return int64_t(-1);
+                    }
                });
                auto& cm = db.get_compaction_manager();
                auto owned_ranges_ptr = compaction::make_owned_ranges_ptr(db.get_keyspace_local_ranges(keyspace));
-                // as a table can be dropped during loop below, let's find it before issuing the cleanup request.
-                for (auto& id : table_ids) {
-                    replica::table& t = db.find_column_family(id);
-                    co_await cm.perform_cleanup(owned_ranges_ptr, t.as_table_state());
-                }
-                co_return;
-            }).then([]{
-                return make_ready_future<json::json_return_type>(0);
+                co_await run_on_existing_tables("force_keyspace_cleanup", db, keyspace, local_tables, [&] (replica::table& t) {
+                    return t.perform_cleanup_compaction(owned_ranges_ptr);
+                });
            });
-        });
+        } catch (...) {
+            apilog.error("force_keyspace_cleanup: keyspace={} tables={} failed: {}", keyspace, table_infos, std::current_exception());
+            throw;
+        }
+
+        co_return json::json_return_type(0);
    });

-    ss::perform_keyspace_offstrategy_compaction.set(r, wrap_ks_cf(ctx, [] (http_context& ctx, std::unique_ptr<request> req, sstring keyspace, std::vector<sstring> tables) -> future<json::json_return_type> {
-        co_return co_await ctx.db.map_reduce0([&keyspace, &tables] (replica::database& db) -> future<bool> {
-            bool needed = false;
-            for (const auto& table : tables) {
-                auto& t = db.find_column_family(keyspace, table);
-                needed |= co_await t.perform_offstrategy_compaction();
-            }
-            co_return needed;
-        }, false, std::plus<bool>());
+    ss::perform_keyspace_offstrategy_compaction.set(r, wrap_ks_cf(ctx, [] (http_context& ctx, std::unique_ptr<request> req, sstring keyspace, std::vector<table_info> table_infos) -> future<json::json_return_type> {
+        apilog.info("perform_keyspace_offstrategy_compaction: keyspace={} tables={}", keyspace, table_infos);
+        bool res = false;
+        try {
+            res = co_await ctx.db.map_reduce0([&] (replica::database& db) -> future<bool> {
+                bool needed = false;
+                co_await run_on_existing_tables("perform_keyspace_offstrategy_compaction", db, keyspace, table_infos, [&needed] (replica::table& t) -> future<> {
+                    needed |= co_await t.perform_offstrategy_compaction();
+                });
+                co_return needed;
+            }, false, std::plus<bool>());
+        } catch (...) {
+            apilog.error("perform_keyspace_offstrategy_compaction: keyspace={} tables={} failed: {}", keyspace, table_infos, std::current_exception());
+            throw;
+        }
+
+        co_return json::json_return_type(res);
    }));

-    ss::upgrade_sstables.set(r, wrap_ks_cf(ctx, [] (http_context& ctx, std::unique_ptr<request> req, sstring keyspace, std::vector<sstring> column_families) {
+    ss::upgrade_sstables.set(r, wrap_ks_cf(ctx, [] (http_context& ctx, std::unique_ptr<request> req, sstring keyspace, std::vector<table_info> table_infos) -> future<json::json_return_type> {
+        auto& db = ctx.db;
        bool exclude_current_version = req_param<bool>(*req, "exclude_current_version", false);

-        return ctx.db.invoke_on_all([=] (replica::database& db) {
-            auto owned_ranges_ptr = compaction::make_owned_ranges_ptr(db.get_keyspace_local_ranges(keyspace));
-            return do_for_each(column_families, [=, &db](sstring cfname) {
-                auto& cm = db.get_compaction_manager();
-                auto& cf = db.find_column_family(keyspace, cfname);
-                return cm.perform_sstable_upgrade(owned_ranges_ptr, cf.as_table_state(), exclude_current_version);
+        apilog.info("upgrade_sstables: keyspace={} tables={} exclude_current_version={}", keyspace, table_infos, exclude_current_version);
+        try {
+            co_await db.invoke_on_all([&] (replica::database& db) -> future<> {
+                auto owned_ranges_ptr = compaction::make_owned_ranges_ptr(db.get_keyspace_local_ranges(keyspace));
+                co_await run_on_existing_tables("upgrade_sstables", db, keyspace, table_infos, [&] (replica::table& t) {
+                    return t.parallel_foreach_table_state([&] (compaction::table_state& ts) {
+                        return t.get_compaction_manager().perform_sstable_upgrade(owned_ranges_ptr, ts, exclude_current_version);
+                    });
+                });
            });
-        }).then([]{
-            return make_ready_future<json::json_return_type>(0);
-        });
+        } catch (...) {
+            apilog.error("upgrade_sstables: keyspace={} tables={} failed: {}", keyspace, table_infos, std::current_exception());
+            throw;
+        }
+
+        co_return json::json_return_type(0);
    }));

    ss::force_keyspace_flush.set(r, [&ctx](std::unique_ptr<request> req) -> future<json::json_return_type> {
        auto keyspace = validate_keyspace(ctx, req->param);
        auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
+        apilog.info("perform_keyspace_flush: keyspace={} tables={}", keyspace, column_families);
        auto& db = ctx.db;
        if (column_families.empty()) {
            co_await replica::database::flush_keyspace_on_all_shards(db, keyspace);
@@ -707,6 +782,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_


    ss::decommission.set(r, [&ss](std::unique_ptr<request> req) {
+        apilog.info("decommission");
        return ss.local().decommission().then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
@@ -722,6 +798,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    ss::remove_node.set(r, [&ss](std::unique_ptr<request> req) {
        auto host_id = validate_host_id(req->get_query_param("host_id"));
        std::vector<sstring> ignore_nodes_strs= split(req->get_query_param("ignore_nodes"), ",");
+        apilog.info("remove_node: host_id={} ignore_nodes={}", host_id, ignore_nodes_strs);
        auto ignore_nodes = std::list<locator::host_id_or_endpoint>();
        for (std::string n : ignore_nodes_strs) {
            try {
@@ -797,6 +874,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    });

    ss::drain.set(r, [&ss](std::unique_ptr<request> req) {
+        apilog.info("drain");
        return ss.local().drain().then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
@@ -820,12 +898,14 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    });

    ss::stop_gossiping.set(r, [&ss](std::unique_ptr<request> req) {
+        apilog.info("stop_gossiping");
        return ss.local().stop_gossiping().then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
    });

    ss::start_gossiping.set(r, [&ss](std::unique_ptr<request> req) {
+        apilog.info("start_gossiping");
        return ss.local().start_gossiping().then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
@@ -928,6 +1008,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_

    ss::rebuild.set(r, [&ss](std::unique_ptr<request> req) {
        auto source_dc = req->get_query_param("source_dc");
+        apilog.info("rebuild: source_dc={}", source_dc);
        return ss.local().rebuild(std::move(source_dc)).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
@@ -964,6 +1045,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        // FIXME: We should truncate schema tables if more than one node in the cluster.
        auto& sp = service::get_storage_proxy();
        auto& fs = sp.local().features();
+        apilog.info("reset_local_schema");
        return db::schema_tables::recalculate_schema_version(sys_ks, sp, fs).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
@@ -971,6 +1053,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_

    ss::set_trace_probability.set(r, [](std::unique_ptr<request> req) {
        auto probability = req->get_query_param("probability");
+        apilog.info("set_trace_probability: probability={}", probability);
        return futurize_invoke([probability] {
            double real_prob = std::stod(probability.c_str());
            return tracing::tracing::tracing_instance().invoke_on_all([real_prob] (auto& local_tracing) {
@@ -1008,6 +1091,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        auto ttl = req->get_query_param("ttl");
        auto threshold = req->get_query_param("threshold");
        auto fast = req->get_query_param("fast");
+        apilog.info("set_slow_query: enable={} ttl={} threshold={} fast={}", enable, ttl, threshold, fast);
        try {
            return tracing::tracing::tracing_instance().invoke_on_all([enable, ttl, threshold, fast] (auto& local_tracing) {
                if (threshold != "") {
@@ -1034,6 +1118,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        auto keyspace = validate_keyspace(ctx, req->param);
        auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");

+        apilog.info("enable_auto_compaction: keyspace={} tables={}", keyspace, tables);
        return set_tables_autocompaction(ctx, keyspace, tables, true);
    });

@@ -1041,6 +1126,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        auto keyspace = validate_keyspace(ctx, req->param);
        auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");

+        apilog.info("disable_auto_compaction: keyspace={} tables={}", keyspace, tables);
        return set_tables_autocompaction(ctx, keyspace, tables, false);
    });

@@ -1366,7 +1452,8 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
        });
    });

-    ss::scrub.set(r, [&ctx, &snap_ctl] (std::unique_ptr<request> req) {
+    ss::scrub.set(r, [&ctx, &snap_ctl] (std::unique_ptr<request> req) -> future<json::json_return_type> {
+        auto& db = ctx.db;
        auto rp = req_params({
            {"keyspace", {mandatory::yes}},
            {"cf", {""}},
@@ -1402,10 +1489,9 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
            }
        }

-        auto f = make_ready_future<>();
        if (!req_param<bool>(*req, "disable_snapshot", false)) {
            auto tag = format("pre-scrub-{:d}", db_clock::now().time_since_epoch().count());
-            f = parallel_for_each(column_families, [&snap_ctl, keyspace, tag](sstring cf) {
+            co_await coroutine::parallel_for_each(column_families, [&snap_ctl, keyspace, tag](sstring cf) {
                // We always pass here db::snapshot_ctl::snap_views::no since:
                // 1. When scrubbing particular tables, there's no need to auto-snapshot their views.
                // 2. When scrubbing the whole keyspace, column_families will contain both base tables and views.
@@ -1434,28 +1520,30 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
            return stats;
        };

-        return f.then([&ctx, keyspace, column_families, opts, &reduce_compaction_stats] {
-            return ctx.db.map_reduce0([=] (replica::database& db) {
-                return map_reduce(column_families, [=, &db] (sstring cfname) {
+        try {
+            auto opt_stats = co_await db.map_reduce0([&] (replica::database& db) {
+                return map_reduce(column_families, [&] (sstring cfname) -> future<std::optional<sstables::compaction_stats>> {
                    auto& cm = db.get_compaction_manager();
                    auto& cf = db.find_column_family(keyspace, cfname);
-                    return cm.perform_sstable_scrub(cf.as_table_state(), opts);
+                    sstables::compaction_stats stats{};
+                    co_await cf.parallel_foreach_table_state([&] (compaction::table_state& ts) mutable -> future<> {
+                        auto r = co_await cm.perform_sstable_scrub(ts, opts);
+                        stats += r.value_or(sstables::compaction_stats{});
+                    });
+                    co_return stats;
                }, std::make_optional(sstables::compaction_stats{}), reduce_compaction_stats);
            }, std::make_optional(sstables::compaction_stats{}), reduce_compaction_stats);
-        }).then_wrapped([] (auto f) {
-            if (f.failed()) {
-                auto ex = f.get_exception();
-                if (try_catch<sstables::compaction_aborted_exception>(ex)) {
-                    return make_ready_future<json::json_return_type>(static_cast<int>(scrub_status::aborted));
-                } else {
-                    return make_exception_future<json::json_return_type>(std::move(ex));
-                }
-            } else if (f.get()->validation_errors) {
-                return make_ready_future<json::json_return_type>(static_cast<int>(scrub_status::validation_errors));
-            } else {
-                return make_ready_future<json::json_return_type>(static_cast<int>(scrub_status::successful));
+            if (opt_stats && opt_stats->validation_errors) {
+                co_return json::json_return_type(static_cast<int>(scrub_status::validation_errors));
            }
-        });
+        } catch (const sstables::compaction_aborted_exception&) {
+            co_return json::json_return_type(static_cast<int>(scrub_status::aborted));
+        } catch (...) {
+            apilog.error("scrub keyspace={} tables={} failed: {}", keyspace, column_families, std::current_exception());
+            throw;
+        }
+
+        co_return json::json_return_type(static_cast<int>(scrub_status::successful));
    });
 }

--- a/api/storage_service.hh
+++ b/api/storage_service.hh
@@ -8,6 +8,8 @@

 #pragma once

+#include <iostream>
+
 #include <seastar/core/sharded.hh>
 #include "api.hh"
 #include "db/data_listeners.hh"
@@ -41,8 +43,22 @@ sstring validate_keyspace(http_context& ctx, const parameters& param);
 // splits a request parameter assumed to hold a comma-separated list of table names
 // verify that the tables are found, otherwise a bad_param_exception exception is thrown
 // containing the description of the respective no_such_column_family error.
+// Returns an empty vector if no parameter was found.
+// If the parameter is found and empty, returns a list of all table names in the keyspace.
 std::vector<sstring> parse_tables(const sstring& ks_name, http_context& ctx, const std::unordered_map<sstring, sstring>& query_params, sstring param_name);

+struct table_info {
+    sstring name;
+    table_id id;
+};
+
+// splits a request parameter assumed to hold a comma-separated list of table names
+// verify that the tables are found, otherwise a bad_param_exception exception is thrown
+// containing the description of the respective no_such_column_family error.
+// Returns a vector of all table infos given by the parameter, or
+// if the parameter is not found or is empty, returns a list of all table infos in the keyspace.
+std::vector<table_info> parse_table_infos(const sstring& ks_name, http_context& ctx, const std::unordered_map<sstring, sstring>& query_params, sstring param_name);
+
 void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_service>& ss, gms::gossiper& g, sharded<cdc::generation_service>& cdc_gs, sharded<db::system_keyspace>& sys_ls);
 void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>& sst_loader);
 void unset_sstables_loader(http_context& ctx, routes& r);
@@ -58,4 +74,10 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
 void unset_snapshot(http_context& ctx, routes& r);
 seastar::future<json::json_return_type> run_toppartitions_query(db::toppartitions_query& q, http_context &ctx, bool legacy_request = false);

-}
+} // namespace api
+
+namespace std {
+
+std::ostream& operator<<(std::ostream& os, const api::table_info& ti);
+
+} // namespace std
--- a/api/task_manager.cc
+++ b/api/task_manager.cc
@@ -30,17 +30,32 @@ inline bool filter_tasks(tasks::task_manager::task_ptr task, std::unordered_map<

 struct full_task_status {
    tasks::task_manager::task::status task_status;
+    std::string type;
    tasks::task_manager::task::progress progress;
    std::string module;
    tasks::task_id parent_id;
    tasks::is_abortable abortable;
+    std::vector<std::string> children_ids;
 };

 struct task_stats {
-    task_stats(tasks::task_manager::task_ptr task) : task_id(task->id().to_sstring()), state(task->get_status().state) {}
+    task_stats(tasks::task_manager::task_ptr task)
+        : task_id(task->id().to_sstring())
+        , state(task->get_status().state)
+        , type(task->type())
+        , keyspace(task->get_status().keyspace)
+        , table(task->get_status().table)
+        , entity(task->get_status().entity)
+        , sequence_number(task->get_status().sequence_number)
+    { }

    sstring task_id;
    tasks::task_manager::task_state state;
+    std::string type;
+    std::string keyspace;
+    std::string table;
+    std::string entity;
+    uint64_t sequence_number;
 };

 tm::task_status make_status(full_task_status status) {
@@ -52,7 +67,7 @@ tm::task_status make_status(full_task_status status) {

    tm::task_status res{};
    res.id = status.task_status.id.to_sstring();
-    res.type = status.task_status.type;
+    res.type = status.type;
    res.state = status.task_status.state;
    res.is_abortable = bool(status.abortable);
    res.start_time = st;
@@ -67,22 +82,29 @@ tm::task_status make_status(full_task_status status) {
    res.progress_units = status.task_status.progress_units;
    res.progress_total = status.progress.total;
    res.progress_completed = status.progress.completed;
+    res.children_ids = std::move(status.children_ids);
    return res;
 }

-future<json::json_return_type> retrieve_status(tasks::task_manager::foreign_task_ptr task) {
+future<full_task_status> retrieve_status(const tasks::task_manager::foreign_task_ptr& task) {
    if (task.get() == nullptr) {
        co_return coroutine::return_exception(httpd::bad_param_exception("Task not found"));
    }
    auto progress = co_await task->get_progress();
    full_task_status s;
    s.task_status = task->get_status();
+    s.type = task->type();
    s.parent_id = task->get_parent_id();
    s.abortable = task->is_abortable();
    s.module = task->get_module_name();
    s.progress.completed = progress.completed;
    s.progress.total = progress.total;
-    co_return make_status(s);
+    std::vector<std::string> ct{task->get_children().size()};
+    boost::transform(task->get_children(), ct.begin(), [] (const auto& child) {
+        return child->id().to_sstring();
+    });
+    s.children_ids = std::move(ct);
+    co_return s;
 }

 void set_task_manager(http_context& ctx, routes& r) {
@@ -134,7 +156,8 @@ void set_task_manager(http_context& ctx, routes& r) {
            }
            co_return std::move(task);
        }));
-        co_return co_await retrieve_status(std::move(task));
+        auto s = co_await retrieve_status(task);
+        co_return make_status(s);
    });

    tm::abort_task.set(r, [&ctx] (std::unique_ptr<request> req) -> future<json::json_return_type> {
@@ -157,7 +180,49 @@ void set_task_manager(http_context& ctx, routes& r) {
                return make_foreign(task);
            });
        }));
-        co_return co_await retrieve_status(std::move(task));
+        auto s = co_await retrieve_status(task);
+        co_return make_status(s);
+    });
+
+    tm::get_task_status_recursively.set(r, [&ctx] (std::unique_ptr<request> req) -> future<json::json_return_type> {
+        auto& _ctx = ctx;
+        auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
+        std::queue<tasks::task_manager::foreign_task_ptr> q;
+        utils::chunked_vector<full_task_status> res;
+
+        // Get requested task.
+        auto task = co_await tasks::task_manager::invoke_on_task(_ctx.tm, id, std::function([] (tasks::task_manager::task_ptr task) -> future<tasks::task_manager::foreign_task_ptr> {
+            auto state = task->get_status().state;
+            if (state == tasks::task_manager::task_state::done || state == tasks::task_manager::task_state::failed) {
+                task->unregister_task();
+            }
+            co_return task;
+        }));
+
+        // Push children's statuses in BFS order.
+        q.push(co_await task.copy());   // Task cannot be moved since we need it to be alive during whole loop execution.
+        while (!q.empty()) {
+            auto& current = q.front();
+            res.push_back(co_await retrieve_status(current));
+            for (auto i = 0; i < current->get_children().size(); ++i) {
+                q.push(co_await current->get_children()[i].copy());
+            }
+            q.pop();
+        }
+
+        std::function<future<>(output_stream<char>&&)> f = [r = std::move(res)] (output_stream<char>&& os) -> future<> {
+            auto s = std::move(os);
+            auto res = std::move(r);
+            co_await s.write("[");
+            std::string delim = "";
+            for (auto& status: res) {
+                co_await s.write(std::exchange(delim, ", "));
+                co_await formatter::write(s, make_status(status));
+            }
+            co_await s.write("]");
+            co_await s.close();
+        };
+        co_return f;
    });
 }

--- a/api/task_manager_test.cc
+++ b/api/task_manager_test.cc
@@ -47,8 +47,6 @@ void set_task_manager_test(http_context& ctx, routes& r, db::config& cfg) {
        std::string keyspace = it != req->query_parameters.end() ? it->second : "";
        it = req->query_parameters.find("table");
        std::string table = it != req->query_parameters.end() ? it->second : "";
-        it = req->query_parameters.find("type");
-        std::string type = it != req->query_parameters.end() ? it->second : "";
        it = req->query_parameters.find("entity");
        std::string entity = it != req->query_parameters.end() ? it->second : "";
        it = req->query_parameters.find("parent_id");
@@ -60,7 +58,7 @@ void set_task_manager_test(http_context& ctx, routes& r, db::config& cfg) {
        }

        auto module = tms.local().find_module("test");
-        id = co_await module->make_task<tasks::test_task_impl>(shard, id, keyspace, table, type, entity, data);
+        id = co_await module->make_task<tasks::test_task_impl>(shard, id, keyspace, table, entity, data);
        co_await tms.invoke_on(shard, [id] (tasks::task_manager& tm) {
            auto it = tm.get_all_tasks().find(id);
            if (it != tm.get_all_tasks().end()) {
@@ -99,7 +97,7 @@ void set_task_manager_test(http_context& ctx, routes& r, db::config& cfg) {

    tmt::get_and_update_ttl.set(r, [&ctx, &cfg] (std::unique_ptr<request> req) -> future<json::json_return_type> {
        uint32_t ttl = cfg.task_ttl_seconds();
-        cfg.task_ttl_seconds.set(boost::lexical_cast<uint32_t>(req->query_parameters["ttl"]));
+        co_await cfg.task_ttl_seconds.set_value_on_all_shards(req->query_parameters["ttl"], utils::config_file::config_source::API);
        co_return json::json_return_type(ttl);
    });
 }
--- a/bytes_ostream.hh
+++ b/bytes_ostream.hh
@@ -457,7 +457,9 @@ public:
            _begin.ptr->size = _size;
            _current = nullptr;
            _size = 0;
-            return managed_bytes(std::exchange(_begin.ptr, {}));
+            auto begin_ptr = _begin.ptr;
+            _begin.ptr = nullptr;
+            return managed_bytes(begin_ptr);
        } else {
            return managed_bytes();
        }
--- a/cache_flat_mutation_reader.hh
+++ b/cache_flat_mutation_reader.hh
@@ -572,7 +572,7 @@ void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
        _read_context.cache().on_mispopulate();
        return;
    }
-    auto rt_opt = _rt_assembler.flush(*_schema, position_in_partition::after_key(cr.key()));
+    auto rt_opt = _rt_assembler.flush(*_schema, position_in_partition::after_key(*_schema, cr.key()));
    clogger.trace("csm {}: populate({})", fmt::ptr(this), clustering_row::printer(*_schema, cr));
    _lsa_manager.run_in_update_section_with_allocator([this, &cr, &rt_opt] {
        mutation_partition& mp = _snp->version()->partition();
@@ -634,8 +634,8 @@ inline
 void cache_flat_mutation_reader::copy_from_cache_to_buffer() {
    clogger.trace("csm {}: copy_from_cache, next={}, next_row_in_range={}", fmt::ptr(this), _next_row.position(), _next_row_in_range);
    _next_row.touch();
-    position_in_partition_view next_lower_bound = _next_row.dummy() ? _next_row.position() : position_in_partition_view::after_key(_next_row.key());
-    auto upper_bound = _next_row_in_range ? next_lower_bound : _upper_bound;
+    auto next_lower_bound = position_in_partition_view::after_key(table_schema(), _next_row.position());
+    auto upper_bound = _next_row_in_range ? next_lower_bound.view : _upper_bound;
    if (_snp->range_tombstones(_lower_bound, upper_bound, [&] (range_tombstone rts) {
        add_range_tombstone_to_buffer(std::move(rts));
        return stop_iteration(_lower_bound_changed && is_buffer_full());
@@ -774,14 +774,14 @@ void cache_flat_mutation_reader::move_to_next_entry() {
    }
 }

-void cache_flat_mutation_reader::flush_tombstones(position_in_partition_view pos, bool end_of_range) {
+void cache_flat_mutation_reader::flush_tombstones(position_in_partition_view pos_, bool end_of_range) {
    // Ensure position is appropriate for range tombstone bound
-    pos = position_in_partition_view::after_key(pos);
-    clogger.trace("csm {}: flush_tombstones({}) end_of_range: {}", fmt::ptr(this), pos, end_of_range);
-    _rt_gen.flush(pos, [this] (range_tombstone_change&& rtc) {
+    auto pos = position_in_partition_view::after_key(*_schema, pos_);
+    clogger.trace("csm {}: flush_tombstones({}) end_of_range: {}", fmt::ptr(this), pos.view, end_of_range);
+    _rt_gen.flush(pos.view, [this] (range_tombstone_change&& rtc) {
        add_to_buffer(std::move(rtc), source::cache);
    }, end_of_range);
-    if (auto rtc_opt = _rt_merger.flush(pos, end_of_range)) {
+    if (auto rtc_opt = _rt_merger.flush(pos.view, end_of_range)) {
        do_add_to_buffer(std::move(*rtc_opt));
    }
 }
@@ -832,7 +832,7 @@ inline
 void cache_flat_mutation_reader::add_clustering_row_to_buffer(mutation_fragment_v2&& mf) {
    clogger.trace("csm {}: add_clustering_row_to_buffer({})", fmt::ptr(this), mutation_fragment_v2::printer(*_schema, mf));
    auto& row = mf.as_clustering_row();
-    auto new_lower_bound = position_in_partition::after_key(row.key());
+    auto new_lower_bound = position_in_partition::after_key(*_schema, row.key());
    push_mutation_fragment(std::move(mf));
    _lower_bound = std::move(new_lower_bound);
    _lower_bound_changed = true;
--- a/cdc/log.cc
+++ b/cdc/log.cc
@@ -605,7 +605,7 @@ private:
 public:
    collection_iterator(managed_bytes_view_opt v = {})
        : _v(v.value_or(managed_bytes_view{}))
-        , _rem(_v.empty() ? 0 : read_collection_size(_v, cql_serialization_format::internal()))
+        , _rem(_v.empty() ? 0 : read_collection_size(_v))
    {
        if (_rem != 0) {
            parse();
@@ -650,8 +650,8 @@ template<>
 void collection_iterator<std::pair<managed_bytes_view, managed_bytes_view>>::parse() {
    assert(_rem > 0);
    _next = _v;
-    auto k = read_collection_value(_next, cql_serialization_format::internal());
-    auto v = read_collection_value(_next, cql_serialization_format::internal());
+    auto k = read_collection_value(_next);
+    auto v = read_collection_value(_next);
    _current = std::make_pair(k, v);
 }

@@ -659,7 +659,7 @@ template<>
 void collection_iterator<managed_bytes_view>::parse() {
    assert(_rem > 0);
    _next = _v;
-    auto k = read_collection_value(_next, cql_serialization_format::internal());
+    auto k = read_collection_value(_next);
    _current = k;
 }

@@ -728,7 +728,7 @@ auto make_maybe_back_inserter(Container& c, const abstract_type& type, collectio
 static size_t collection_size(const managed_bytes_opt& bo) {
    if (bo) {
        managed_bytes_view mbv(*bo);
-        return read_collection_size(mbv, cql_serialization_format::internal());
+        return read_collection_size(mbv);
    }
    return 0;
 }
@@ -750,7 +750,7 @@ static managed_bytes merge(const collection_type_impl& ctype, const managed_byte
    // note order: set_union, when finding doubles, use value from first1 (j here). So
    // since this is next, it has prio
    std::set_union(j, e, i, e, make_maybe_back_inserter(res, *type, collection_iterator<managed_bytes_view>(deleted)), cmp);
-    return map_type_impl::serialize_partially_deserialized_form_fragmented(res, cql_serialization_format::internal());
+    return map_type_impl::serialize_partially_deserialized_form_fragmented(res);
 }
 static managed_bytes merge(const set_type_impl& ctype, const managed_bytes_opt& prev, const managed_bytes_opt& next, const managed_bytes_opt& deleted) {
    std::vector<managed_bytes_view> res;
@@ -761,7 +761,7 @@ static managed_bytes merge(const set_type_impl& ctype, const managed_bytes_opt&
    };
    collection_iterator<managed_bytes_view> e, i(prev), j(next), d(deleted);
    std::set_union(j, e, i, e, make_maybe_back_inserter(res, *type, d), cmp);
-    return set_type_impl::serialize_partially_deserialized_form_fragmented(res, cql_serialization_format::internal());
+    return set_type_impl::serialize_partially_deserialized_form_fragmented(res);
 }
 static managed_bytes merge(const user_type_impl& type, const managed_bytes_opt& prev, const managed_bytes_opt& next, const managed_bytes_opt& deleted) {
    std::vector<managed_bytes_view_opt> res(type.size());
@@ -812,15 +812,14 @@ static managed_bytes_opt get_preimage_col_value(const column_definition& cdef, c
            // flatten set
            [&] (const set_type_impl& type) {
                auto v = pirow->get_view(cdef.name_as_text());
-                auto f = cql_serialization_format::internal();
-                auto n = read_collection_size(v, f);
+                auto n = read_collection_size(v);
                std::vector<managed_bytes> tmp;
                tmp.reserve(n);
                while (n--) {
-                    tmp.emplace_back(read_collection_value(v, f)); // key
-                    read_collection_value(v, f); // value. ignore.
+                    tmp.emplace_back(read_collection_value(v)); // key
+                    read_collection_value(v); // value. ignore.
                }
-                return set_type_impl::serialize_partially_deserialized_form_fragmented({tmp.begin(), tmp.end()}, f);
+                return set_type_impl::serialize_partially_deserialized_form_fragmented({tmp.begin(), tmp.end()});
            },
            [&] (const abstract_type& o) -> managed_bytes {
                return pirow->get_blob_fragmented(cdef.name_as_text());
@@ -1122,7 +1121,7 @@ struct process_row_visitor {
                visit_collection(v);

                managed_bytes_opt added_keys = v._added_keys.empty() ? std::nullopt :
-                    std::optional{set_type_impl::serialize_partially_deserialized_form_fragmented(v._added_keys, cql_serialization_format::internal())};
+                    std::optional{set_type_impl::serialize_partially_deserialized_form_fragmented(v._added_keys)};

                return {
                    v._is_column_delete,
@@ -1178,7 +1177,7 @@ struct process_row_visitor {
                visit_collection(v);

                managed_bytes_opt added_cells = v._added_cells.empty() ? std::nullopt :
-                    std::optional{map_type_impl::serialize_partially_deserialized_form_fragmented(v._added_cells, cql_serialization_format::internal())};
+                    std::optional{map_type_impl::serialize_partially_deserialized_form_fragmented(v._added_cells)};

                return {
                    v._is_column_delete,
@@ -1198,7 +1197,7 @@ struct process_row_visitor {
        // then we deserialize again when merging images below
        managed_bytes_opt deleted_elements = std::nullopt;
        if (!deleted_keys.empty()) {
-            deleted_elements = set_type_impl::serialize_partially_deserialized_form_fragmented(deleted_keys, cql_serialization_format::internal());
+            deleted_elements = set_type_impl::serialize_partially_deserialized_form_fragmented(deleted_keys);
        }

        // delta
--- a/collection_mutation.hh
+++ b/collection_mutation.hh
@@ -21,8 +21,6 @@ class row_tombstone;

 class collection_mutation;

-class cql_serialization_format;
-
 // An auxiliary struct used to (de)construct collection_mutations.
 // Unlike collection_mutation which is a serialized blob, this struct allows to inspect logical units of information
 // (tombstone and cells) inside the mutation easily.
@@ -131,4 +129,4 @@ collection_mutation merge(const abstract_type&, collection_mutation_view, collec
 collection_mutation difference(const abstract_type&, collection_mutation_view, collection_mutation_view);

 // Serializes the given collection of cells to a sequence of bytes ready to be sent over the CQL protocol.
-bytes_ostream serialize_for_cql(const abstract_type&, collection_mutation_view, cql_serialization_format);
+bytes_ostream serialize_for_cql(const abstract_type&, collection_mutation_view);
--- a/column_computation.hh
+++ b/column_computation.hh
@@ -12,11 +12,11 @@

 class schema;
 class partition_key;
-class clustering_row;
 struct atomic_cell_view;
 struct tombstone;

 namespace db::view {
+struct clustering_or_static_row;
 struct view_key_and_action;
 }

@@ -118,7 +118,7 @@ class collection_column_computation final : public column_computation {
    using collection_kv = std::pair<bytes_view, atomic_cell_view>;
    void operate_on_collection_entries(
            std::invocable<collection_kv*, collection_kv*, tombstone> auto&& old_and_new_row_func, const schema& schema,
-            const partition_key& key, const clustering_row& update, const std::optional<clustering_row>& existing) const;
+            const partition_key& key, const db::view::clustering_or_static_row& update, const std::optional<db::view::clustering_or_static_row>& existing) const;

 public:
    static collection_column_computation for_keys(const bytes& collection_name) {
@@ -141,5 +141,6 @@ public:
        return true;
    }

-    std::vector<db::view::view_key_and_action> compute_values_with_action(const schema& schema, const partition_key& key, const clustering_row& row, const std::optional<clustering_row>& existing) const;
+    std::vector<db::view::view_key_and_action> compute_values_with_action(const schema& schema, const partition_key& key,
+            const db::view::clustering_or_static_row& row, const std::optional<db::view::clustering_or_static_row>& existing) const;
 };
--- a/compaction/compaction.cc
+++ b/compaction/compaction.cc
@@ -28,6 +28,7 @@
 #include <seastar/util/closeable.hh>
 #include <seastar/core/shared_ptr.hh>

+#include "dht/i_partitioner.hh"
 #include "sstables/sstables.hh"
 #include "sstables/sstable_writer.hh"
 #include "sstables/progress_monitor.hh"
@@ -41,6 +42,7 @@
 #include "mutation_compactor.hh"
 #include "leveled_manifest.hh"
 #include "dht/token.hh"
+#include "dht/partition_filter.hh"
 #include "mutation_writer/shard_based_splitting_writer.hh"
 #include "mutation_writer/partition_based_splitting_writer.hh"
 #include "mutation_source_metadata.hh"
@@ -220,13 +222,13 @@ public:

    ~compaction_write_monitor() {
        if (_sst) {
-            _table_s.get_compaction_strategy().get_backlog_tracker().revert_charges(_sst);
+            _table_s.get_backlog_tracker().revert_charges(_sst);
        }
    }

    virtual void on_write_started(const sstables::writer_offset_tracker& tracker) override {
        _tracker = &tracker;
-        _table_s.get_compaction_strategy().get_backlog_tracker().register_partially_written_sstable(_sst, *this);
+        _table_s.get_backlog_tracker().register_partially_written_sstable(_sst, *this);
    }

    virtual void on_data_write_completed() override {
@@ -351,7 +353,7 @@ struct compaction_read_monitor_generator final : public read_monitor_generator {
    public:
        virtual void on_read_started(const sstables::reader_position_tracker& tracker) override {
            _tracker = &tracker;
-            _table_s.get_compaction_strategy().get_backlog_tracker().register_compacting_sstable(_sst, *this);
+            _table_s.get_backlog_tracker().register_compacting_sstable(_sst, *this);
        }

        virtual void on_read_completed() override {
@@ -370,7 +372,7 @@ struct compaction_read_monitor_generator final : public read_monitor_generator {

        void remove_sstable() {
            if (_sst) {
-                _table_s.get_compaction_strategy().get_backlog_tracker().revert_charges(_sst);
+                _table_s.get_backlog_tracker().revert_charges(_sst);
            }
            _sst = {};
        }
@@ -382,7 +384,7 @@ struct compaction_read_monitor_generator final : public read_monitor_generator {
            // We failed to finish handling this SSTable, so we have to update the backlog_tracker
            // about it.
            if (_sst) {
-                _table_s.get_compaction_strategy().get_backlog_tracker().revert_charges(_sst);
+                _table_s.get_backlog_tracker().revert_charges(_sst);
            }
        }

@@ -905,7 +907,7 @@ void compacted_fragments_writer::split_large_partition() {
    // will result in current fragment storing an inclusive end bound for last pos, and the
    // next fragment storing an exclusive start bound for last pos. This is very important
    // for not losing information on the range tombstone.
-    auto after_last_pos = position_in_partition::after_key(_current_partition.last_pos.key());
+    auto after_last_pos = position_in_partition::after_key(*_c.schema(), _current_partition.last_pos.key());
    if (_current_partition.current_emitted_tombstone) {
        auto rtc = range_tombstone_change(after_last_pos, tombstone{});
        _c.log_debug("Closing active tombstone {} with {} for partition {}", _current_partition.current_emitted_tombstone, rtc, *_current_partition.dk);
@@ -948,7 +950,7 @@ void compacted_fragments_writer::consume_new_partition(const dht::decorated_key&
        .dk = dk,
        .tombstone = tombstone(),
        .current_emitted_tombstone = tombstone(),
-        .last_pos = position_in_partition(position_in_partition::partition_start_tag_t()),
+        .last_pos = position_in_partition::for_partition_start(),
        .is_splitting_partition = false
    };
    do_consume_new_partition(dk);
@@ -1173,30 +1175,8 @@ private:
 };

 class cleanup_compaction final : public regular_compaction {
-    class incremental_owned_ranges_checker {
-        const dht::token_range_vector& _sorted_owned_ranges;
-        mutable dht::token_range_vector::const_iterator _it;
-    public:
-        incremental_owned_ranges_checker(const dht::token_range_vector& sorted_owned_ranges)
-                : _sorted_owned_ranges(sorted_owned_ranges)
-                , _it(_sorted_owned_ranges.begin()) {
-        }
-
-        // Must be called with increasing token values.
-        bool belongs_to_current_node(const dht::token& t) const {
-            // While token T is after a range Rn, advance the iterator.
-            // iterator will be stopped at a range which either overlaps with T (if T belongs to node),
-            // or at a range which is after T (if T doesn't belong to this node).
-            while (_it != _sorted_owned_ranges.end() && _it->after(t, dht::token_comparator())) {
-                _it++;
-            }
-
-            return _it != _sorted_owned_ranges.end() && _it->contains(t, dht::token_comparator());
-        }
-    };
-
    owned_ranges_ptr _owned_ranges;
-    incremental_owned_ranges_checker _owned_ranges_checker;
+    mutable dht::incremental_owned_ranges_checker _owned_ranges_checker;
 private:
    // Called in a seastar thread
    dht::partition_range_vector
@@ -1209,21 +1189,8 @@ private:
            return dht::partition_range::make({sst->get_first_decorated_key(), true},
                                              {sst->get_last_decorated_key(), true});
        }));
-        // optimize set of potentially overlapping ranges by deoverlapping them.
-        non_owned_ranges = dht::partition_range::deoverlap(std::move(non_owned_ranges), dht::ring_position_comparator(*_schema));

-        // subtract *each* owned range from the partition range of *each* sstable*,
-        // such that we'll be left only with a set of non-owned ranges.
-        for (auto& owned_range : owned_ranges) {
-            dht::partition_range_vector new_non_owned_ranges;
-            for (auto& non_owned_range : non_owned_ranges) {
-                auto ret = non_owned_range.subtract(owned_range, dht::ring_position_comparator(*_schema));
-                new_non_owned_ranges.insert(new_non_owned_ranges.end(), ret.begin(), ret.end());
-                seastar::thread::maybe_yield();
-            }
-            non_owned_ranges = std::move(new_non_owned_ranges);
-        }
-        return non_owned_ranges;
+        return dht::subtract_ranges(*_schema, non_owned_ranges, std::move(owned_ranges)).get();
    }
 protected:
    virtual compaction_completion_desc
--- a/compaction/compaction.hh
+++ b/compaction/compaction.hh
@@ -80,8 +80,10 @@ struct compaction_data {
    }

    void stop(sstring reason) {
-        stop_requested = std::move(reason);
-        abort.request_abort();
+        if (!abort.abort_requested()) {
+            stop_requested = std::move(reason);
+            abort.request_abort();
+        }
    }
 };

--- a/compaction/compaction_backlog_manager.hh
+++ b/compaction/compaction_backlog_manager.hh
@@ -66,7 +66,8 @@ public:
    };

    compaction_backlog_tracker(std::unique_ptr<impl> impl) : _impl(std::move(impl)) {}
-    compaction_backlog_tracker(compaction_backlog_tracker&&) = default;
+    compaction_backlog_tracker(compaction_backlog_tracker&&);
+    compaction_backlog_tracker& operator=(compaction_backlog_tracker&&) noexcept;
    compaction_backlog_tracker(const compaction_backlog_tracker&) = delete;
    ~compaction_backlog_tracker();

@@ -74,7 +75,7 @@ public:
    void replace_sstables(const std::vector<sstables::shared_sstable>& old_ssts, const std::vector<sstables::shared_sstable>& new_ssts);
    void register_partially_written_sstable(sstables::shared_sstable sst, backlog_write_progress_manager& wp);
    void register_compacting_sstable(sstables::shared_sstable sst, backlog_read_progress_manager& rp);
-    void transfer_ongoing_charges(compaction_backlog_tracker& new_bt, bool move_read_charges = true);
+    void copy_ongoing_charges(compaction_backlog_tracker& new_bt, bool move_read_charges = true) const;
    void revert_charges(sstables::shared_sstable sst);

    void disable() {
--- a/compaction/compaction_manager.cc
+++ b/compaction/compaction_manager.cc
@@ -15,7 +15,9 @@
 #include <seastar/core/coroutine.hh>
 #include <seastar/coroutine/switch_to.hh>
 #include <seastar/coroutine/parallel_for_each.hh>
+#include <seastar/coroutine/maybe_yield.hh>
 #include "sstables/exceptions.hh"
+#include "sstables/sstable_directory.hh"
 #include "locator/abstract_replication_strategy.hh"
 #include "utils/fb_utilities.hh"
 #include "utils/UUID_gen.hh"
@@ -645,6 +647,7 @@ sstables::compaction_stopped_exception compaction_manager::task::make_compaction

 compaction_manager::compaction_manager(config cfg, abort_source& as)
    : _cfg(std::move(cfg))
+    , _compaction_submission_timer(compaction_sg().cpu, compaction_submission_callback())
    , _compaction_controller(make_compaction_controller(compaction_sg(), static_shares(), [this] () -> float {
        _last_backlog = backlog();
        auto b = _last_backlog / available_memory();
@@ -679,6 +682,7 @@ compaction_manager::compaction_manager(config cfg, abort_source& as)

 compaction_manager::compaction_manager()
    : _cfg(config{ .available_memory = 1 })
+    , _compaction_submission_timer(compaction_sg().cpu, compaction_submission_callback())
    , _compaction_controller(make_compaction_controller(compaction_sg(), 1, [] () -> float { return 1.0; }))
    , _backlog_manager(_compaction_controller)
    , _throughput_updater(serialized_action([this] { return update_throughput(throughput_mbs()); }))
@@ -736,38 +740,46 @@ void compaction_manager::register_metrics() {
 void compaction_manager::enable() {
    assert(_state == state::none || _state == state::disabled);
    _state = state::enabled;
-    _compaction_submission_timer.arm(periodic_compaction_submission_interval());
-    postponed_compactions_reevaluation();
+    _compaction_submission_timer.arm_periodic(periodic_compaction_submission_interval());
+    _waiting_reevalution = postponed_compactions_reevaluation();
 }

 std::function<void()> compaction_manager::compaction_submission_callback() {
    return [this] () mutable {
        for (auto& e: _compaction_state) {
-            submit(*e.first);
+            postpone_compaction_for_table(e.first);
        }
+        reevaluate_postponed_compactions();
    };
 }

-void compaction_manager::postponed_compactions_reevaluation() {
-    _waiting_reevalution = repeat([this] {
-        return _postponed_reevaluation.wait().then([this] {
-            if (_state != state::enabled) {
-                _postponed.clear();
-                return stop_iteration::yes;
-            }
-            auto postponed = std::move(_postponed);
-            try {
-                for (auto& t : postponed) {
-                    auto s = t->schema();
-                    cmlog.debug("resubmitting postponed compaction for table {}.{} [{}]", s->ks_name(), s->cf_name(), fmt::ptr(t));
-                    submit(*t);
+future<> compaction_manager::postponed_compactions_reevaluation() {
+     while (true) {
+        co_await _postponed_reevaluation.when();
+        if (_state != state::enabled) {
+            _postponed.clear();
+            co_return;
+        }
+        // A task_state being reevaluated can re-insert itself into postponed list, which is the reason
+        // for moving the list to be processed into a local.
+        auto postponed = std::exchange(_postponed, {});
+        try {
+            for (auto it = postponed.begin(); it != postponed.end();) {
+                compaction::table_state* t = *it;
+                it = postponed.erase(it);
+                // skip reevaluation of a table_state that became invalid post its removal
+                if (!_compaction_state.contains(t)) {
+                    continue;
                }
-            } catch (...) {
-                _postponed = std::move(postponed);
+                auto s = t->schema();
+                cmlog.debug("resubmitting postponed compaction for table {}.{} [{}]", s->ks_name(), s->cf_name(), fmt::ptr(t));
+                submit(*t);
+                co_await coroutine::maybe_yield();
            }
-            return stop_iteration::no;
-        });
-    });
+        } catch (...) {
+            _postponed.insert(postponed.begin(), postponed.end());
+        }
+    }
 }

 void compaction_manager::reevaluate_postponed_compactions() noexcept {
@@ -1045,7 +1057,7 @@ future<> compaction_manager::maybe_wait_for_sstable_count_reduction(compaction::
            desc.sstables
            | boost::adaptors::transformed(std::mem_fn(&sstables::sstable::run_identifier))).size();
    };
-    const auto threshold = std::max(schema->max_compaction_threshold(), 32);
+    const auto threshold = size_t(std::max(schema->max_compaction_threshold(), 32));
    auto count = num_runs_for_compaction();
    if (count <= threshold) {
        cmlog.trace("No need to wait for sstable count reduction in {}.{}: {} <= {}",
@@ -1097,7 +1109,12 @@ private:
        compaction::table_state& t = *_compacting_table;
        const auto& maintenance_sstables = t.maintenance_sstable_set();

-        const auto old_sstables = boost::copy_range<std::vector<sstables::shared_sstable>>(*maintenance_sstables.all());
+        // Filter out sstables that require view building, to avoid a race between off-strategy
+        // and view building. Refs: #11882
+        const auto old_sstables = boost::copy_range<std::vector<sstables::shared_sstable>>(*maintenance_sstables.all()
+                | boost::adaptors::filtered([] (const sstables::shared_sstable& sst) {
+            return !sst->requires_view_building();
+        }));
        std::vector<sstables::shared_sstable> reshape_candidates = old_sstables;
        std::vector<sstables::shared_sstable> sstables_to_remove;
        std::unordered_set<sstables::shared_sstable> new_unused_sstables;
@@ -1163,12 +1180,7 @@ private:
        co_await t.on_compaction_completion(std::move(completion_desc), sstables::offstrategy::yes);

        cleanup_new_unused_sstables_on_failure.cancel();
-        // By marking input sstables for deletion instead, the ones which require view building will stay in the staging
-        // directory until they're moved to the main dir when the time comes. Also, that allows view building to resume
-        // on restart if there's a crash midway.
-        for (auto& sst : sstables_to_remove) {
-            sst->mark_for_deletion();
-        }
+        co_await sstables::sstable_directory::delete_atomically(std::move(sstables_to_remove));
        if (err) {
            co_await coroutine::return_exception_ptr(std::move(err));
        }
@@ -1470,10 +1482,8 @@ private:
 bool needs_cleanup(const sstables::shared_sstable& sst,
                   const dht::token_range_vector& sorted_owned_ranges,
                   schema_ptr s) {
-    auto first = sst->get_first_partition_key();
-    auto last = sst->get_last_partition_key();
-    auto first_token = dht::get_token(*s, first);
-    auto last_token = dht::get_token(*s, last);
+    auto first_token = sst->get_first_decorated_key().token();
+    auto last_token = sst->get_last_decorated_key().token();
    dht::token_range sst_token_range = dht::token_range::make(first_token, last_token);

    auto r = std::lower_bound(sorted_owned_ranges.begin(), sorted_owned_ranges.end(), first_token,
@@ -1573,8 +1583,13 @@ future<compaction_manager::compaction_stats_opt> compaction_manager::perform_sst
    }, can_purge_tombstones::no);
 }

+compaction_manager::compaction_state::compaction_state(table_state& t)
+    : backlog_tracker(t.get_compaction_strategy().make_backlog_tracker())
+{
+}
+
 void compaction_manager::add(compaction::table_state& t) {
-    auto [_, inserted] = _compaction_state.insert({&t, compaction_state{}});
+    auto [_, inserted] = _compaction_state.try_emplace(&t, t);
    if (!inserted) {
        auto s = t.schema();
        on_internal_error(cmlog, format("compaction_state for table {}.{} [{}] already exists", s->ks_name(), s->cf_name(), fmt::ptr(&t)));
@@ -1582,22 +1597,21 @@ void compaction_manager::add(compaction::table_state& t) {
 }

 future<> compaction_manager::remove(compaction::table_state& t) noexcept {
-    auto handle = _compaction_state.extract(&t);
+    auto& c_state = get_compaction_state(&t);

-    if (!handle.empty()) {
-        auto& c_state = handle.mapped();
+    // We need to guarantee that a task being stopped will not retry to compact
+    // a table being removed.
+    // The requirement above is provided by stop_ongoing_compactions().
+    _postponed.erase(&t);

-        // We need to guarantee that a task being stopped will not retry to compact
-        // a table being removed.
-        // The requirement above is provided by stop_ongoing_compactions().
-        _postponed.erase(&t);
+    // Wait for all compaction tasks running under gate to terminate
+    // and prevent new tasks from entering the gate.
+    co_await seastar::when_all_succeed(stop_ongoing_compactions("table removal", &t), c_state.gate.close()).discard_result();

-        // Wait for the termination of an ongoing compaction on table T, if any.
-        co_await stop_ongoing_compactions("table removal", &t);
+    c_state.backlog_tracker.disable();
+
+    _compaction_state.erase(&t);

-        // Wait for all functions running under gate to terminate.
-        co_await c_state.gate.close();
-    }
 #ifdef DEBUG
    auto found = false;
    sstring msg;
@@ -1756,7 +1770,7 @@ void compaction_backlog_tracker::register_compacting_sstable(sstables::shared_ss
    }
 }

-void compaction_backlog_tracker::transfer_ongoing_charges(compaction_backlog_tracker& new_bt, bool move_read_charges) {
+void compaction_backlog_tracker::copy_ongoing_charges(compaction_backlog_tracker& new_bt, bool move_read_charges) const {
    for (auto&& w : _ongoing_writes) {
        new_bt.register_partially_written_sstable(w.first, *w.second);
    }
@@ -1766,8 +1780,6 @@ void compaction_backlog_tracker::transfer_ongoing_charges(compaction_backlog_tra
            new_bt.register_compacting_sstable(w.first, *w.second);
        }
    }
-    _ongoing_writes = {};
-    _ongoing_compactions = {};
 }

 void compaction_backlog_tracker::revert_charges(sstables::shared_sstable sst) {
@@ -1775,6 +1787,26 @@ void compaction_backlog_tracker::revert_charges(sstables::shared_sstable sst) {
    _ongoing_compactions.erase(sst);
 }

+compaction_backlog_tracker::compaction_backlog_tracker(compaction_backlog_tracker&& other)
+        : _impl(std::move(other._impl))
+        , _ongoing_writes(std::move(other._ongoing_writes))
+        , _ongoing_compactions(std::move(other._ongoing_compactions))
+        , _manager(std::exchange(other._manager, nullptr)) {
+}
+
+compaction_backlog_tracker&
+compaction_backlog_tracker::operator=(compaction_backlog_tracker&& x) noexcept {
+    if (this != &x) {
+        if (auto manager = std::exchange(_manager, x._manager)) {
+            manager->remove_backlog_tracker(this);
+        }
+        _impl = std::move(x._impl);
+        _ongoing_writes = std::move(x._ongoing_writes);
+        _ongoing_compactions = std::move(x._ongoing_compactions);
+    }
+    return *this;
+}
+
 compaction_backlog_tracker::~compaction_backlog_tracker() {
    if (_manager) {
        _manager->remove_backlog_tracker(this);
@@ -1812,3 +1844,14 @@ compaction_backlog_manager::~compaction_backlog_manager() {
        tracker->_manager = nullptr;
    }
 }
+
+void compaction_manager::register_backlog_tracker(compaction::table_state& t, compaction_backlog_tracker new_backlog_tracker) {
+    auto& cs = get_compaction_state(&t);
+    cs.backlog_tracker = std::move(new_backlog_tracker);
+    register_backlog_tracker(cs.backlog_tracker);
+}
+
+compaction_backlog_tracker& compaction_manager::get_backlog_tracker(compaction::table_state& t) {
+    auto& cs = get_compaction_state(&t);
+    return cs.backlog_tracker;
+}
--- a/compaction/compaction_manager.hh
+++ b/compaction/compaction_manager.hh
@@ -83,8 +83,10 @@ private:
        // Signaled whenever a compaction task completes.
        condition_variable compaction_done;

-        compaction_state() = default;
-        compaction_state(compaction_state&&) = default;
+        compaction_backlog_tracker backlog_tracker;
+
+        explicit compaction_state(table_state& t);
+        compaction_state(compaction_state&&) = delete;
        ~compaction_state();

        bool compaction_disabled() const noexcept {
@@ -294,10 +296,10 @@ private:
    std::function<void()> compaction_submission_callback();
    // all registered tables are reevaluated at a constant interval.
    // Submission is a NO-OP when there's nothing to do, so it's fine to call it regularly.
-    timer<lowres_clock> _compaction_submission_timer = timer<lowres_clock>(compaction_submission_callback());
    static constexpr std::chrono::seconds periodic_compaction_submission_interval() { return std::chrono::seconds(3600); }

    config _cfg;
+    timer<lowres_clock> _compaction_submission_timer;
    compaction_controller _compaction_controller;
    compaction_backlog_manager _backlog_manager;
    optimized_optional<abort_source::subscription> _early_abort_subscription;
@@ -348,7 +350,7 @@ private:
    // table still exists and compaction is not disabled for the table.
    inline bool can_proceed(compaction::table_state* t) const;

-    void postponed_compactions_reevaluation();
+    future<> postponed_compactions_reevaluation();
    void reevaluate_postponed_compactions() noexcept;
    // Postpone compaction for a table that couldn't be executed due to ongoing
    // similar-sized compaction.
@@ -524,6 +526,9 @@ public:
    void register_backlog_tracker(compaction_backlog_tracker& backlog_tracker) {
        _backlog_manager.register_backlog_tracker(backlog_tracker);
    }
+    void register_backlog_tracker(compaction::table_state& t, compaction_backlog_tracker new_backlog_tracker);
+
+    compaction_backlog_tracker& get_backlog_tracker(compaction::table_state& t);

    static sstables::compaction_data create_compaction_data();

--- a/compaction/compaction_strategy.cc
+++ b/compaction/compaction_strategy.cc
@@ -409,7 +409,9 @@ public:
                l0_old_ssts.push_back(std::move(sst));
            }
        }
-        _l0_scts.replace_sstables(std::move(l0_old_ssts), std::move(l0_new_ssts));
+        if (l0_old_ssts.size() || l0_new_ssts.size()) {
+            _l0_scts.replace_sstables(std::move(l0_old_ssts), std::move(l0_new_ssts));
+        }
    }
 };

@@ -427,14 +429,6 @@ struct null_backlog_tracker final : public compaction_backlog_tracker::impl {
    virtual void replace_sstables(std::vector<sstables::shared_sstable> old_ssts, std::vector<sstables::shared_sstable> new_ssts) override {}
 };

-// Just so that if we have more than one CF with NullStrategy, we don't create a lot
-// of objects to iterate over for no reason
-// Still thread local because of make_unique. But this will disappear soon
-static thread_local compaction_backlog_tracker null_backlog_tracker(std::make_unique<null_backlog_tracker>());
-compaction_backlog_tracker& get_null_backlog_tracker() {
-    return null_backlog_tracker;
-}
-
 //
 // Null compaction strategy is the default compaction strategy.
 // As the name implies, it does nothing.
@@ -453,8 +447,8 @@ public:
        return compaction_strategy_type::null;
    }

-    virtual compaction_backlog_tracker& get_backlog_tracker() override {
-        return get_null_backlog_tracker();
+    virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() override {
+        return std::make_unique<null_backlog_tracker>();
    }
 };

@@ -462,11 +456,14 @@ leveled_compaction_strategy::leveled_compaction_strategy(const std::map<sstring,
        : compaction_strategy_impl(options)
        , _max_sstable_size_in_mb(calculate_max_sstable_size_in_mb(compaction_strategy_impl::get_value(options, SSTABLE_SIZE_OPTION)))
        , _stcs_options(options)
-        , _backlog_tracker(std::make_unique<leveled_compaction_backlog_tracker>(_max_sstable_size_in_mb, _stcs_options))
 {
    _compaction_counter.resize(leveled_manifest::MAX_LEVELS);
 }

+std::unique_ptr<compaction_backlog_tracker::impl> leveled_compaction_strategy::make_backlog_tracker() {
+    return std::make_unique<leveled_compaction_backlog_tracker>(_max_sstable_size_in_mb, _stcs_options);
+}
+
 int32_t
 leveled_compaction_strategy::calculate_max_sstable_size_in_mb(std::optional<sstring> option_value) const {
    using namespace cql3::statements;
@@ -486,7 +483,6 @@ time_window_compaction_strategy::time_window_compaction_strategy(const std::map<
    : compaction_strategy_impl(options)
    , _options(options)
    , _stcs_options(options)
-    , _backlog_tracker(std::make_unique<time_window_backlog_tracker>(_options, _stcs_options))
 {
    if (!options.contains(TOMBSTONE_COMPACTION_INTERVAL_OPTION) && !options.contains(TOMBSTONE_THRESHOLD_OPTION)) {
        _disable_tombstone_compaction = true;
@@ -497,6 +493,10 @@ time_window_compaction_strategy::time_window_compaction_strategy(const std::map<
    _use_clustering_key_filter = true;
 }

+std::unique_ptr<compaction_backlog_tracker::impl> time_window_compaction_strategy::make_backlog_tracker() {
+    return std::make_unique<time_window_backlog_tracker>(_options, _stcs_options);
+}
+
 } // namespace sstables

 std::vector<sstables::shared_sstable>
@@ -640,7 +640,6 @@ namespace sstables {
 date_tiered_compaction_strategy::date_tiered_compaction_strategy(const std::map<sstring, sstring>& options)
    : compaction_strategy_impl(options)
    , _manifest(options)
-    , _backlog_tracker(std::make_unique<unimplemented_backlog_tracker>())
 {
    clogger.warn("DateTieredCompactionStrategy is deprecated. Usually cases for which it is used are better handled by TimeWindowCompactionStrategy."
            " Please change your compaction strategy to TWCS as DTCS will be retired in the near future");
@@ -685,17 +684,23 @@ compaction_descriptor date_tiered_compaction_strategy::get_sstables_for_compacti
    return sstables::compaction_descriptor({ *it }, service::get_local_compaction_priority());
 }

+std::unique_ptr<compaction_backlog_tracker::impl> date_tiered_compaction_strategy::make_backlog_tracker() {
+    return std::make_unique<unimplemented_backlog_tracker>();
+}
+
 size_tiered_compaction_strategy::size_tiered_compaction_strategy(const std::map<sstring, sstring>& options)
    : compaction_strategy_impl(options)
    , _options(options)
-    , _backlog_tracker(std::make_unique<size_tiered_backlog_tracker>(_options))
 {}

 size_tiered_compaction_strategy::size_tiered_compaction_strategy(const size_tiered_compaction_strategy_options& options)
    : _options(options)
-    , _backlog_tracker(std::make_unique<size_tiered_backlog_tracker>(_options))
 {}

+std::unique_ptr<compaction_backlog_tracker::impl> size_tiered_compaction_strategy::make_backlog_tracker() {
+    return std::make_unique<size_tiered_backlog_tracker>(_options);
+}
+
 compaction_strategy::compaction_strategy(::shared_ptr<compaction_strategy_impl> impl)
    : _compaction_strategy_impl(std::move(impl)) {}
 compaction_strategy::compaction_strategy() = default;
@@ -736,8 +741,8 @@ bool compaction_strategy::use_clustering_key_filter() const {
    return _compaction_strategy_impl->use_clustering_key_filter();
 }

-compaction_backlog_tracker& compaction_strategy::get_backlog_tracker() {
-    return _compaction_strategy_impl->get_backlog_tracker();
+compaction_backlog_tracker compaction_strategy::make_backlog_tracker() {
+    return compaction_backlog_tracker(_compaction_strategy_impl->make_backlog_tracker());
 }

 sstables::compaction_descriptor
--- a/compaction/compaction_strategy.hh
+++ b/compaction/compaction_strategy.hh
@@ -106,7 +106,7 @@ public:

    sstable_set make_sstable_set(schema_ptr schema) const;

-    compaction_backlog_tracker& get_backlog_tracker();
+    compaction_backlog_tracker make_backlog_tracker();

    uint64_t adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate);

--- a/compaction/compaction_strategy_impl.hh
+++ b/compaction/compaction_strategy_impl.hh
@@ -22,8 +22,6 @@ class strategy_control;

 namespace sstables {

-compaction_backlog_tracker& get_unimplemented_backlog_tracker();
-
 class sstable_set_impl;
 class resharding_descriptor;

@@ -70,7 +68,7 @@ public:
    // droppable tombstone histogram and gc_before.
    bool worth_dropping_tombstones(const shared_sstable& sst, gc_clock::time_point compaction_time, const tombstone_gc_state& gc_state);

-    virtual compaction_backlog_tracker& get_backlog_tracker() = 0;
+    virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() = 0;

    virtual uint64_t adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate);

--- a/compaction/date_tiered_compaction_strategy.hh
+++ b/compaction/date_tiered_compaction_strategy.hh
@@ -259,7 +259,6 @@ namespace sstables {

 class date_tiered_compaction_strategy : public compaction_strategy_impl {
    date_tiered_manifest _manifest;
-    compaction_backlog_tracker _backlog_tracker;
 public:
    date_tiered_compaction_strategy(const std::map<sstring, sstring>& options);
    virtual compaction_descriptor get_sstables_for_compaction(table_state& table_s, strategy_control& control, std::vector<sstables::shared_sstable> candidates) override;
@@ -272,9 +271,7 @@ public:
        return compaction_strategy_type::date_tiered;
    }

-    virtual compaction_backlog_tracker& get_backlog_tracker() override {
-        return _backlog_tracker;
-    }
+    virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() override;
 };

 }
--- a/compaction/leveled_compaction_strategy.cc
+++ b/compaction/leveled_compaction_strategy.cc
@@ -200,10 +200,8 @@ leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input

        auto [disjoint, overlapping_sstables] = is_disjoint(level_info[level], tolerance(level));
        if (!disjoint) {
-            auto ideal_level = ideal_level_for_input(input, max_sstable_size_in_bytes);
-            leveled_manifest::logger.warn("Turns out that level {} is not disjoint, found {} overlapping SSTables, so compacting everything on behalf of {}.{}", level, overlapping_sstables, schema->ks_name(), schema->cf_name());
-            // Unfortunately no good limit to limit input size to max_sstables for LCS major
-            compaction_descriptor desc(std::move(input), iop, ideal_level, max_sstable_size_in_bytes);
+            leveled_manifest::logger.warn("Turns out that level {} is not disjoint, found {} overlapping SSTables, so the level will be entirely compacted on behalf of {}.{}", level, overlapping_sstables, schema->ks_name(), schema->cf_name());
+            compaction_descriptor desc(std::move(level_info[level]), iop, level, max_sstable_size_in_bytes);
            desc.options = compaction_type_options::make_reshape();
            return desc;
        }
--- a/compaction/leveled_compaction_strategy.hh
+++ b/compaction/leveled_compaction_strategy.hh
@@ -35,7 +35,6 @@ class leveled_compaction_strategy : public compaction_strategy_impl {
    std::optional<std::vector<std::optional<dht::decorated_key>>> _last_compacted_keys;
    std::vector<int> _compaction_counter;
    size_tiered_compaction_strategy_options _stcs_options;
-    compaction_backlog_tracker _backlog_tracker;
    int32_t calculate_max_sstable_size_in_mb(std::optional<sstring> option_value) const;
 public:
    static unsigned ideal_level_for_input(const std::vector<sstables::shared_sstable>& input, uint64_t max_sstable_size);
@@ -64,9 +63,7 @@ public:
    }
    virtual std::unique_ptr<sstable_set_impl> make_sstable_set(schema_ptr schema) const override;

-    virtual compaction_backlog_tracker& get_backlog_tracker() override {
-        return _backlog_tracker;
-    }
+    virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() override;

    virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, const ::io_priority_class& iop, reshape_mode mode) override;
 };
--- a/compaction/size_tiered_compaction_strategy.cc
+++ b/compaction/size_tiered_compaction_strategy.cc
@@ -6,6 +6,7 @@
 * SPDX-License-Identifier: AGPL-3.0-or-later
 */

+#include "sstables/sstables.hh"
 #include "size_tiered_compaction_strategy.hh"

 #include <boost/range/adaptor/transformed.hpp>
--- a/compaction/size_tiered_compaction_strategy.hh
+++ b/compaction/size_tiered_compaction_strategy.hh
@@ -10,7 +10,7 @@

 #include "compaction_strategy_impl.hh"
 #include "compaction.hh"
-#include "sstables/sstables.hh"
+#include "sstables/shared_sstable.hh"
 #include <boost/algorithm/cxx11/any_of.hpp>

 class size_tiered_backlog_tracker;
@@ -82,7 +82,6 @@ public:

 class size_tiered_compaction_strategy : public compaction_strategy_impl {
    size_tiered_compaction_strategy_options _options;
-    compaction_backlog_tracker _backlog_tracker;

    // Return a list of pair of shared_sstable and its respective size.
    static std::vector<std::pair<sstables::shared_sstable, uint64_t>> create_sstable_and_length_pairs(const std::vector<sstables::shared_sstable>& sstables);
@@ -128,9 +127,7 @@ public:
    most_interesting_bucket(const std::vector<sstables::shared_sstable>& candidates, int min_threshold, int max_threshold,
        size_tiered_compaction_strategy_options options = {});

-    virtual compaction_backlog_tracker& get_backlog_tracker() override {
-        return _backlog_tracker;
-    }
+    virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() override;

    virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, const ::io_priority_class& iop, reshape_mode mode) override;

--- a/compaction/table_state.hh
+++ b/compaction/table_state.hh
@@ -10,14 +10,15 @@
 #pragma once

 #include "schema_fwd.hh"
-#include "sstables/sstable_set.hh"
-#include "sstables/sstables_manager.hh"
 #include "compaction_descriptor.hh"

 class reader_permit;
+class compaction_backlog_tracker;

 namespace sstables {
+class sstable_set;
 class compaction_strategy;
+class sstables_manager;
 struct sstable_writer_config;
 }

@@ -43,6 +44,7 @@ public:
    virtual future<> on_compaction_completion(sstables::compaction_completion_desc desc, sstables::offstrategy offstrategy) = 0;
    virtual bool is_auto_compaction_disabled_by_user() const noexcept = 0;
    virtual const tombstone_gc_state& get_tombstone_gc_state() const noexcept = 0;
+    virtual compaction_backlog_tracker& get_backlog_tracker() = 0;
 };

 }
--- a/compaction/time_window_compaction_strategy.hh
+++ b/compaction/time_window_compaction_strategy.hh
@@ -15,7 +15,7 @@
 #include "size_tiered_compaction_strategy.hh"
 #include "timestamp.hh"
 #include "exceptions/exceptions.hh"
-#include "sstables/sstables.hh"
+#include "sstables/shared_sstable.hh"
 #include "service/priority_manager.hh"

 namespace sstables {
@@ -73,7 +73,6 @@ class time_window_compaction_strategy : public compaction_strategy_impl {
    // Keep track of all recent active windows that still need to be compacted into a single SSTable
    std::unordered_set<timestamp_type> _recent_active_windows;
    size_tiered_compaction_strategy_options _stcs_options;
-    compaction_backlog_tracker _backlog_tracker;
 public:
    // The maximum amount of buckets we segregate data into when writing into sstables.
    // To prevent an explosion in the number of sstables we cap it.
@@ -156,9 +155,7 @@ public:

    virtual std::unique_ptr<sstable_set_impl> make_sstable_set(schema_ptr schema) const override;

-    virtual compaction_backlog_tracker& get_backlog_tracker() override {
-        return _backlog_tracker;
-    }
+    virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() override;

    virtual uint64_t adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate) override;

--- a/compound.hh
+++ b/compound.hh
@@ -16,7 +16,6 @@
 #include <boost/range/adaptor/transformed.hpp>
 #include "utils/serialization.hh"
 #include <seastar/util/backtrace.hh>
-#include "cql_serialization_format.hh"

 enum class allow_prefixes { no, yes };

@@ -280,7 +279,7 @@ public:
        }
        for (size_t i = 0; i != values.size(); ++i) {
            //FIXME: is it safe to assume internal serialization-format format?
-            _types[i]->validate(values[i], cql_serialization_format::internal());
+            _types[i]->validate(values[i]);
        }
    }
    bool equal(managed_bytes_view v1, managed_bytes_view v2) const {
--- a/conf/scylla.yaml
+++ b/conf/scylla.yaml
@@ -448,20 +448,20 @@ commitlog_total_space_in_mb: -1
 #    internode_encryption: none
 #    certificate: conf/scylla.crt
 #    keyfile: conf/scylla.key
-#    truststore: <none, use system trust>
-#    certficate_revocation_list: <none>
+#    truststore: <not set, use system trust>
+#    certficate_revocation_list: <not set>
 #    require_client_auth: False
-#    priority_string: <none, use default>
+#    priority_string: <not set, use default>

 # enable or disable client/server encryption.
 # client_encryption_options:
 #    enabled: false
 #    certificate: conf/scylla.crt
 #    keyfile: conf/scylla.key
-#    truststore: <none, use system trust>
-#    certficate_revocation_list: <none>
+#    truststore: <not set, use system trust>
+#    certficate_revocation_list: <not set>
 #    require_client_auth: False
-#    priority_string: <none, use default>
+#    priority_string: <not set, use default>

 # internode_compression controls whether traffic between nodes is
 # compressed.
@@ -553,4 +553,16 @@ murmur3_partitioner_ignore_msb_bits: 12
 # WARNING: It's unsafe to set this to false if the node previously booted
 # with the schema commit log enabled. In such case, some schema changes
 # may be lost if the node was not cleanly stopped.
-force_schema_commit_log: true
+force_schema_commit_log: true
+
+# Use Raft to consistently manage schema information in the cluster.
+# Refer to https://docs.scylladb.com/master/architecture/raft.html for more details.
+# The 'Handling Failures' section is especially important.
+#
+# Once enabled in a cluster, this cannot be turned off.
+# If you want to bootstrap a new cluster without Raft, make sure to set this to `false`
+# before starting your nodes for the first time.
+#
+# A cluster not using Raft can be 'upgraded' to use Raft. Refer to the aforementioned
+# documentation, section 'Enabling Raft in ScyllaDB 5.2 and further', for the procedure.
+consistent_cluster_management: true
--- a/configure.py
+++ b/configure.py
@@ -44,16 +44,12 @@ distro_extra_cflags = ''
 distro_extra_ldflags = ''
 distro_extra_cmake_args = []
 employ_ld_trickery = True
-has_wasmtime = False
-use_wasmtime_as_library = False

 # distro-specific setup
 def distro_setup_nix():
-    global os_ids, employ_ld_trickery, has_wasmtime, use_wasmtime_as_library
+    global os_ids, employ_ld_trickery
    os_ids = ['linux']
    employ_ld_trickery = False
-    has_wasmtime = True
-    use_wasmtime_as_library = True

 if os.environ.get('NIX_CC'):
        distro_setup_nix()
@@ -200,7 +196,7 @@ def linker_flags(compiler):


 def maybe_static(flag, libs):
-    if flag and not args.static:
+    if flag:
        libs = '-Wl,-Bstatic {} -Wl,-Bdynamic'.format(libs)
    return libs

@@ -289,7 +285,8 @@ modes = {
        'cxxflags': '-DDEBUG -DSANITIZE -DDEBUG_LSA_SANITIZER -DSCYLLA_ENABLE_ERROR_INJECTION',
        'cxx_ld_flags': '',
        'stack-usage-threshold': 1024*40,
-        'optimization-level': 'g',
+        # -fasan -Og breaks some coroutines on aarch64, use -O0 instead
+        'optimization-level': ('0' if platform.machine() == 'aarch64' else 'g'),
        'per_src_extra_cxxflags': {},
        'cmake_build_type': 'Debug',
        'can_have_debug_info': True,
@@ -482,6 +479,8 @@ scylla_tests = set([
    'test/boost/virtual_reader_test',
    'test/boost/virtual_table_mutation_source_test',
    'test/boost/virtual_table_test',
+    'test/boost/wasm_test',
+    'test/boost/wasm_alloc_test',
    'test/boost/bptree_test',
    'test/boost/btree_test',
    'test/boost/radix_tree_test',
@@ -573,13 +572,6 @@ all_artifacts = apps | tests | other
 arg_parser = argparse.ArgumentParser('Configure scylla')
 arg_parser.add_argument('--out', dest='buildfile', action='store', default='build.ninja',
                        help='Output build-file name (by default build.ninja)')
-arg_parser.add_argument('--static', dest='static', action='store_const', default='',
-                        const='-static',
-                        help='Static link (useful for running on hosts outside the build environment')
-arg_parser.add_argument('--pie', dest='pie', action='store_true',
-                        help='Build position-independent executable (PIE)')
-arg_parser.add_argument('--so', dest='so', action='store_true',
-                        help='Build shared object (SO) instead of executable')
 arg_parser.add_argument('--mode', action='append', choices=list(modes.keys()), dest='selected_modes',
                        help="Build modes to generate ninja files for. The available build modes are:\n{}".format("; ".join(["{} - {}".format(m, cfg['description']) for m, cfg in modes.items()])))
 arg_parser.add_argument('--with', dest='artifacts', action='append', default=[],
@@ -670,7 +662,7 @@ scylla_core = (['message/messaging_service.cc',
                'replica/distributed_loader.cc',
                'replica/memtable.cc',
                'replica/exceptions.cc',
-                'dirty_memory_manager.cc',
+                'replica/dirty_memory_manager.cc',
                'absl-flat_hash_map.cc',
                'atomic_cell.cc',
                'caching_options.cc',
@@ -824,6 +816,7 @@ scylla_core = (['message/messaging_service.cc',
                'cql3/statements/detach_service_level_statement.cc',
                'cql3/statements/list_service_level_statement.cc',
                'cql3/statements/list_service_level_attachments_statement.cc',
+                'cql3/statements/describe_statement.cc',
                'cql3/update_parameters.cc',
                'cql3/util.cc',
                'cql3/ut_name.cc',
@@ -909,6 +902,7 @@ scylla_core = (['message/messaging_service.cc',
                'utils/config_file.cc',
                'utils/multiprecision_int.cc',
                'utils/gz/crc_combine.cc',
+                'utils/gz/crc_combine_table.cc',
                'gms/version_generator.cc',
                'gms/versioned_value.cc',
                'gms/gossiper.cc',
@@ -943,6 +937,8 @@ scylla_core = (['message/messaging_service.cc',
                'locator/ec2_snitch.cc',
                'locator/ec2_multi_region_snitch.cc',
                'locator/gce_snitch.cc',
+                'locator/topology.cc',
+                'locator/util.cc',
                'service/client_state.cc',
                'service/storage_service.cc',
                'service/misc_services.cc',
@@ -1033,6 +1029,7 @@ scylla_core = (['message/messaging_service.cc',
                'service/raft/raft_group0_client.cc',
                'service/broadcast_tables/experimental/lang.cc',
                'tasks/task_manager.cc',
+                'rust/wasmtime_bindings/src/lib.rs',
                ] + [Antlr3Grammar('cql3/Cql.g')] + [Thrift('interface/cassandra.thrift', 'Cassandra')] \
                  + scylla_raft_core
               )
@@ -1150,10 +1147,6 @@ idls = ['idl/gossip_digest.idl.hh',
        'idl/experimental/broadcast_tables_lang.idl.hh',
        ]

-rusts = [
-    'rust/inc/src/lib.rs',
-]
-
 headers = find_headers('.', excluded_dirs=['idl', 'build', 'seastar', '.git'])

 scylla_tests_generic_dependencies = [
@@ -1177,7 +1170,7 @@ scylla_tests_dependencies = scylla_core + idls + scylla_tests_generic_dependenci

 scylla_raft_dependencies = scylla_raft_core + ['utils/uuid.cc', 'utils/error_injection.cc']

-scylla_tools = ['tools/scylla-types.cc', 'tools/scylla-sstable.cc', 'tools/schema_loader.cc', 'tools/utils.cc']
+scylla_tools = ['tools/scylla-types.cc', 'tools/scylla-sstable.cc', 'tools/schema_loader.cc', 'tools/utils.cc', 'tools/lua_sstable_consumer.cc']

 deps = {
    'scylla': idls + ['main.cc'] + scylla_core + api + alternator + redis + scylla_tools,
@@ -1306,7 +1299,7 @@ deps['test/boost/exceptions_fallback_test'] = ['test/boost/exceptions_fallback_t

 deps['test/boost/duration_test'] += ['test/lib/exception_utils.cc']
 deps['test/boost/schema_loader_test'] += ['tools/schema_loader.cc']
-deps['test/boost/rust_test'] += rusts
+deps['test/boost/rust_test'] += ['rust/inc/src/lib.rs']

 deps['test/raft/replication_test'] = ['test/raft/replication_test.cc', 'test/raft/replication.cc', 'test/raft/helpers.cc'] + scylla_raft_dependencies
 deps['test/raft/raft_server_test'] = ['test/raft/raft_server_test.cc', 'test/raft/replication.cc', 'test/raft/helpers.cc'] + scylla_raft_dependencies
@@ -1323,8 +1316,6 @@ deps['test/raft/discovery_test'] =  ['test/raft/discovery_test.cc',
                                     'test/lib/log.cc',
                                     'service/raft/discovery.cc'] + scylla_raft_dependencies

-deps['utils/gz/gen_crc_combine_table'] = ['utils/gz/gen_crc_combine_table.cc']
-

 warnings = [
    '-Wall',
@@ -1374,7 +1365,7 @@ warnings = [w

 warnings = ' '.join(warnings + ['-Wno-error=deprecated-declarations'])

-def clang_inline_threshold():
+def get_clang_inline_threshold():
    if args.clang_inline_threshold != -1:
        return args.clang_inline_threshold
    elif platform.machine() == 'aarch64':
@@ -1395,7 +1386,7 @@ for mode in modes:

 optimization_flags = [
    '--param inline-unit-growth=300', # gcc
-    f'-mllvm -inline-threshold={clang_inline_threshold()}',  # clang
+    f'-mllvm -inline-threshold={get_clang_inline_threshold()}',  # clang
    # clang generates 16-byte loads that break store-to-load forwarding
    # gcc also has some trouble: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103554
    '-fno-slp-vectorize',
@@ -1409,19 +1400,6 @@ if flag_supported(flag='-Wstack-usage=4096', compiler=args.cxx):
    for mode in modes:
        modes[mode]['cxxflags'] += f' -Wstack-usage={modes[mode]["stack-usage-threshold"]} -Wno-error=stack-usage='

-if not has_wasmtime:
-    has_wasmtime = os.path.isfile('/usr/lib64/libwasmtime.a') and os.path.isdir('/usr/local/include/wasmtime')
-
-if has_wasmtime:
-    if platform.machine() == 'aarch64':
-        print("wasmtime is temporarily not supported on aarch64. Ref: issue #9387")
-        has_wasmtime = False
-    else:
-        for mode in modes:
-            modes[mode]['cxxflags'] += ' -DSCYLLA_ENABLE_WASMTIME'
-else:
-    print("wasmtime not found - WASM support will not be enabled in this build")
-
 linker_flags = linker_flags(compiler=args.cxx)

 dbgflag = '-g -gz' if args.debuginfo else ''
@@ -1432,16 +1410,6 @@ perf_tests_link_rule = 'link' if args.perf_tests_debuginfo else 'link_stripped'
 # debug info from the libraries we static link with
 regular_link_rule = 'link' if args.debuginfo else 'link_stripped'

-if args.so:
-    args.pie = '-shared'
-    args.fpie = '-fpic'
-elif args.pie:
-    args.pie = '-pie'
-    args.fpie = '-fpie'
-else:
-    args.pie = ''
-    args.fpie = ''
-
 # a list element means a list of alternative packages to consider
 # the first element becomes the HAVE_pkg define
 # a string element is a package name with no alternatives
@@ -1598,13 +1566,14 @@ args.user_ldflags = forced_ldflags + ' ' + args.user_ldflags

 args.user_cflags += f" -ffile-prefix-map={curdir}=."

-seastar_cflags = args.user_cflags
-
 if args.target != '':
-    seastar_cflags += ' -march=' + args.target
-seastar_ldflags = args.user_ldflags
+    args.user_cflags += ' -march=' + args.target

-libdeflate_cflags = seastar_cflags
+for mode in modes:
+    # Those flags are passed not only to Scylla objects, but also to libraries
+    # that we compile ourselves.
+    modes[mode]['lib_cflags'] = args.user_cflags
+    modes[mode]['lib_ldflags'] = args.user_ldflags + linker_flags

 # cmake likes to separate things with semicolons
 def semicolon_separated(*flags):
@@ -1624,8 +1593,8 @@ def configure_seastar(build_dir, mode, mode_config):
        '-DCMAKE_C_COMPILER={}'.format(args.cc),
        '-DCMAKE_CXX_COMPILER={}'.format(args.cxx),
        '-DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON',
-        '-DSeastar_CXX_FLAGS={}'.format((seastar_cflags).replace(' ', ';')),
-        '-DSeastar_LD_FLAGS={}'.format(semicolon_separated(seastar_ldflags, modes[mode]['cxx_ld_flags'])),
+        '-DSeastar_CXX_FLAGS=SHELL:{}'.format(mode_config['lib_cflags']),
+        '-DSeastar_LD_FLAGS={}'.format(semicolon_separated(mode_config['lib_ldflags'], mode_config['cxx_ld_flags'])),
        '-DSeastar_CXX_DIALECT=gnu++20',
        '-DSeastar_API_LEVEL=6',
        '-DSeastar_UNUSED_RESULT_ERROR=ON',
@@ -1686,52 +1655,16 @@ for mode in build_modes:
    seastar_pc_cflags, seastar_pc_libs = query_seastar_flags(pc[mode], link_static_cxx=args.staticcxx)
    modes[mode]['seastar_cflags'] = seastar_pc_cflags
    modes[mode]['seastar_libs'] = seastar_pc_libs
+    modes[mode]['seastar_testing_libs'] = pkg_config(pc[mode].replace('seastar.pc', 'seastar-testing.pc'), '--libs', '--static')

-def configure_abseil(build_dir, mode, mode_config):
-    abseil_build_dir = os.path.join(build_dir, mode, 'abseil')
+abseil_pkgs = [
+    'absl_raw_hash_set',
+    'absl_hash',
+]

-    abseil_cflags = seastar_cflags + ' ' + modes[mode]['cxx_ld_flags']
-    cmake_mode = mode_config['cmake_build_type']
-    abseil_cmake_args = [
-        '-DCMAKE_BUILD_TYPE={}'.format(cmake_mode),
-        '-DCMAKE_INSTALL_PREFIX={}'.format(build_dir + '/inst'), # just to avoid a warning from absl
-        '-DCMAKE_C_COMPILER={}'.format(args.cc),
-        '-DCMAKE_CXX_COMPILER={}'.format(args.cxx),
-        '-DCMAKE_CXX_FLAGS_{}={}'.format(cmake_mode.upper(), abseil_cflags),
-        '-DCMAKE_EXPORT_COMPILE_COMMANDS=ON',
-        '-DCMAKE_CXX_STANDARD=20',
-        '-DABSL_PROPAGATE_CXX_STD=ON',
-    ] + distro_extra_cmake_args
-
-    abseil_cmd = ['cmake', '-G', 'Ninja', real_relpath('abseil', abseil_build_dir)] + abseil_cmake_args
-
-    os.makedirs(abseil_build_dir, exist_ok=True)
-    subprocess.check_call(abseil_cmd, shell=False, cwd=abseil_build_dir)
-
-abseil_libs = ['absl/' + lib for lib in [
-    'container/libabsl_hashtablez_sampler.a',
-    'container/libabsl_raw_hash_set.a',
-    'synchronization/libabsl_synchronization.a',
-    'synchronization/libabsl_graphcycles_internal.a',
-    'debugging/libabsl_stacktrace.a',
-    'debugging/libabsl_symbolize.a',
-    'debugging/libabsl_debugging_internal.a',
-    'debugging/libabsl_demangle_internal.a',
-    'time/libabsl_time.a',
-    'time/libabsl_time_zone.a',
-    'numeric/libabsl_int128.a',
-    'hash/libabsl_city.a',
-    'hash/libabsl_hash.a',
-    'hash/libabsl_low_level_hash.a',
-    'base/libabsl_malloc_internal.a',
-    'base/libabsl_spinlock_wait.a',
-    'base/libabsl_base.a',
-    'base/libabsl_raw_logging_internal.a',
-    'profiling/libabsl_exponential_biased.a',
-    'base/libabsl_throw_delegate.a']]
+pkgs += abseil_pkgs

 args.user_cflags += " " + pkg_config('jsoncpp', '--cflags')
-args.user_cflags += ' -march=' + args.target
 libs = ' '.join([maybe_static(args.staticyamlcpp, '-lyaml-cpp'), '-latomic', '-llz4', '-lz', '-lsnappy', pkg_config('jsoncpp', '--libs'),
                 ' -lstdc++fs', ' -lcrypt', ' -lcryptopp', ' -lpthread',
                 # Must link with static version of libzstd, since
@@ -1739,11 +1672,8 @@ libs = ' '.join([maybe_static(args.staticyamlcpp, '-lyaml-cpp'), '-latomic', '-l
                 maybe_static(True, '-lzstd'),
                 maybe_static(args.staticboost, '-lboost_date_time -lboost_regex -licuuc -licui18n'),
                 '-lxxhash',
+                 '-ldeflate',
                ])
-if has_wasmtime:
-    print("Found wasmtime dependency, linking with libwasmtime")
-    if use_wasmtime_as_library:
-        libs += " -lwasmtime"

 if not args.staticboost:
    args.user_cflags += ' -DBOOST_TEST_DYN_LINK'
@@ -1762,7 +1692,6 @@ if any(filter(thrift_version.startswith, thrift_boost_versions)):
 for pkg in pkgs:
    args.user_cflags += ' ' + pkg_config(pkg, '--cflags')
    libs += ' ' + pkg_config(pkg, '--libs')
-args.user_cflags += ' -isystem abseil'
 user_cflags = args.user_cflags + ' -fvisibility=hidden'
 user_ldflags = args.user_ldflags + ' -fvisibility=hidden'
 if args.staticcxx:
@@ -1784,10 +1713,6 @@ if args.ragel_exec:
 else:
    ragel_exec = "ragel"

-if not args.dist_only:
-    for mode, mode_config in build_modes.items():
-        configure_abseil(outdir, mode, mode_config)
-
 with open(buildfile, 'w') as f:
    f.write(textwrap.dedent('''\
        configure_args = {configure_args}
@@ -1840,18 +1765,24 @@ with open(buildfile, 'w') as f:
        rule unified
            command = unified/build_unified.sh --mode $mode --unified-pkg $out
        rule rust_header
-            command = cxxbridge $in > $out
+            command = cxxbridge --include rust/cxx.h --header $in > $out
            description = RUST_HEADER $out
+        rule rust_source
+            command = cxxbridge --include rust/cxx.h $in > $out
+            description = RUST_SOURCE $out
+        rule cxxbridge_header
+            command = cxxbridge --header > $out
        ''').format(**globals()))
    for mode in build_modes:
        modeval = modes[mode]
        fmt_lib = 'fmt'
        f.write(textwrap.dedent('''\
            cxx_ld_flags_{mode} = {cxx_ld_flags}
-            ld_flags_{mode} = $cxx_ld_flags_{mode}
-            cxxflags_{mode} = $cxx_ld_flags_{mode} {cxxflags} -iquote. -iquote $builddir/{mode}/gen
+            ld_flags_{mode} = $cxx_ld_flags_{mode} {lib_ldflags}
+            cxxflags_{mode} = $cxx_ld_flags_{mode} {lib_cflags} {cxxflags} -iquote. -iquote $builddir/{mode}/gen
            libs_{mode} = -l{fmt_lib}
            seastar_libs_{mode} = {seastar_libs}
+            seastar_testing_libs_{mode} = {seastar_testing_libs}
            rule cxx.{mode}
              command = $cxx -MD -MT $out -MF $out.d {seastar_cflags} $cxxflags_{mode} $cxxflags $obj_cxxflags -c -o $out $in
              description = CXX $out
@@ -1901,7 +1832,8 @@ with open(buildfile, 'w') as f:
              pool = console
              description = TEST {mode}
            rule rust_lib.{mode}
-              command = CARGO_HOME=build/{mode}/rust/.cargo cargo build --release --manifest-path=rust/Cargo.toml --target-dir=build/{mode}/rust -p ${{pkg}}
+              command = CARGO_BUILD_DEP_INFO_BASEDIR='.' cargo build --locked --manifest-path=rust/Cargo.toml --target-dir=$builddir/{mode} --profile=rust-{mode} $
+                        && touch $out
              description = RUST_LIB $out
            ''').format(mode=mode, antlr3_exec=antlr3_exec, fmt_lib=fmt_lib, test_repeat=test_repeat, test_timeout=test_timeout, **modeval))
        f.write(
@@ -1920,7 +1852,6 @@ with open(buildfile, 'w') as f:
        ragels = {}
        antlr3_grammars = set()
        rust_headers = {}
-        rust_libs = {}
        seastar_dep = '$builddir/{}/seastar/libseastar.a'.format(mode)
        seastar_testing_dep = '$builddir/{}/seastar/libseastar_testing.a'.format(mode)
        for binary in sorted(build_artifacts):
@@ -1931,9 +1862,8 @@ with open(buildfile, 'w') as f:
                    for src in srcs
                    if src.endswith('.cc')]
            objs.append('$builddir/../utils/arch/powerpc/crc32-vpmsum/crc32.S')
-            if has_wasmtime and not use_wasmtime_as_library:
-                objs.append('/usr/lib64/libwasmtime.a')
            has_thrift = False
+            has_rust = False
            for dep in deps[binary]:
                if isinstance(dep, Thrift):
                    has_thrift = True
@@ -1942,43 +1872,36 @@ with open(buildfile, 'w') as f:
                    objs += dep.objects('$builddir/' + mode + '/gen')
                if isinstance(dep, Json2Code):
                    objs += dep.objects('$builddir/' + mode + '/gen')
-                if dep.endswith('/src/lib.rs'):
-                    lib = dep.replace('/src/lib.rs', '.a').replace('rust/','lib')
-                    objs.append('$builddir/' + mode + '/rust/release/' + lib)
-            if binary.endswith('.a'):
-                f.write('build $builddir/{}/{}: ar.{} {}\n'.format(mode, binary, mode, str.join(' ', objs)))
+                if dep.endswith('.rs'):
+                    has_rust = True
+                    idx = dep.rindex('/src/')
+                    obj = dep[:idx].replace('rust/','') + '.o'
+                    objs.append('$builddir/' + mode + '/gen/rust/' + obj)
+            if has_rust:
+                objs.append('$builddir/' + mode +'/rust-' + mode + '/librust_combined.a')
+            local_libs = '$seastar_libs_{} $libs'.format(mode)
+            if has_thrift:
+                local_libs += ' ' + thrift_libs + ' ' + maybe_static(args.staticboost, '-lboost_system')
+            if binary in tests:
+                if binary in pure_boost_tests:
+                    local_libs += ' ' + maybe_static(args.staticboost, '-lboost_unit_test_framework')
+                if binary not in tests_not_using_seastar_test_framework:
+                    local_libs += ' ' + "$seastar_testing_libs_{}".format(mode)
+                # Our code's debugging information is huge, and multiplied
+                # by many tests yields ridiculous amounts of disk space.
+                # So we strip the tests by default; The user can very
+                # quickly re-link the test unstripped by adding a "_g"
+                # to the test name, e.g., "ninja build/release/testname_g"
+                link_rule = perf_tests_link_rule if binary.startswith('test/perf/') else tests_link_rule
+                f.write('build $builddir/{}/{}: {}.{} {} | {} {}\n'.format(mode, binary, link_rule, mode, str.join(' ', objs), seastar_dep, seastar_testing_dep))
+                f.write('   libs = {}\n'.format(local_libs))
+                f.write('build $builddir/{}/{}_g: {}.{} {} | {} {}\n'.format(mode, binary, regular_link_rule, mode, str.join(' ', objs), seastar_dep, seastar_testing_dep))
+                f.write('   libs = {}\n'.format(local_libs))
            else:
-                objs.extend(['$builddir/' + mode + '/' + artifact for artifact in [
-                    'libdeflate/libdeflate.a',
-                ] + [
-                    'abseil/' + x for x in abseil_libs
-                ]])
-                objs.append('$builddir/' + mode + '/gen/utils/gz/crc_combine_table.o')
-                if binary in tests:
-                    local_libs = '$seastar_libs_{} $libs'.format(mode)
-                    if binary in pure_boost_tests:
-                        local_libs += ' ' + maybe_static(args.staticboost, '-lboost_unit_test_framework')
-                    if binary not in tests_not_using_seastar_test_framework:
-                        pc_path = pc[mode].replace('seastar.pc', 'seastar-testing.pc')
-                        local_libs += ' ' + pkg_config(pc_path, '--libs', '--static')
-                    if has_thrift:
-                        local_libs += ' ' + thrift_libs + ' ' + maybe_static(args.staticboost, '-lboost_system')
-                    # Our code's debugging information is huge, and multiplied
-                    # by many tests yields ridiculous amounts of disk space.
-                    # So we strip the tests by default; The user can very
-                    # quickly re-link the test unstripped by adding a "_g"
-                    # to the test name, e.g., "ninja build/release/testname_g"
-                    link_rule = perf_tests_link_rule if binary.startswith('test/perf/') else tests_link_rule
-                    f.write('build $builddir/{}/{}: {}.{} {} | {} {}\n'.format(mode, binary, link_rule, mode, str.join(' ', objs), seastar_dep, seastar_testing_dep))
-                    f.write('   libs = {}\n'.format(local_libs))
-                    f.write('build $builddir/{}/{}_g: {}.{} {} | {} {}\n'.format(mode, binary, regular_link_rule, mode, str.join(' ', objs), seastar_dep, seastar_testing_dep))
-                    f.write('   libs = {}\n'.format(local_libs))
-                else:
-                    f.write('build $builddir/{}/{}: {}.{} {} | {}\n'.format(mode, binary, regular_link_rule, mode, str.join(' ', objs), seastar_dep))
-                    if has_thrift:
-                        f.write('   libs =  {} {} $seastar_libs_{} $libs\n'.format(thrift_libs, maybe_static(args.staticboost, '-lboost_system'), mode))
-                    f.write(f'build $builddir/{mode}/{binary}.stripped: strip $builddir/{mode}/{binary}\n')
-                    f.write(f'build $builddir/{mode}/{binary}.debug: phony $builddir/{mode}/{binary}.stripped\n')
+                f.write('build $builddir/{}/{}: {}.{} {} | {}\n'.format(mode, binary, regular_link_rule, mode, str.join(' ', objs), seastar_dep))
+                f.write('   libs = {}\n'.format(local_libs))
+                f.write(f'build $builddir/{mode}/{binary}.stripped: strip $builddir/{mode}/{binary}\n')
+                f.write(f'build $builddir/{mode}/{binary}.debug: phony $builddir/{mode}/{binary}.stripped\n')
            for src in srcs:
                if src.endswith('.cc'):
                    obj = '$builddir/' + mode + '/' + src.replace('.cc', '.o')
@@ -1995,19 +1918,12 @@ with open(buildfile, 'w') as f:
                    thrifts.add(src)
                elif src.endswith('.g'):
                    antlr3_grammars.add(src)
-                elif src.endswith('/src/lib.rs'):
-                    hh = '$builddir/' + mode + '/gen/' + src.replace('/src/lib.rs', '.hh')
+                elif src.endswith('.rs'):
+                    idx = src.rindex('/src/')
+                    hh = '$builddir/' + mode + '/gen/' + src[:idx] + '.hh'
                    rust_headers[hh] = src
-                    staticlib = src.replace('rust/', '$builddir/' + mode + '/rust/release/lib').replace('/src/lib.rs', '.a')
-                    rust_libs[staticlib] = src
                else:
                    raise Exception('No rule for ' + src)
-        compiles['$builddir/' + mode + '/gen/utils/gz/crc_combine_table.o'] = '$builddir/' + mode + '/gen/utils/gz/crc_combine_table.cc'
-        compiles['$builddir/' + mode + '/utils/gz/gen_crc_combine_table.o'] = 'utils/gz/gen_crc_combine_table.cc'
-        f.write('build {}: run {}\n'.format('$builddir/' + mode + '/gen/utils/gz/crc_combine_table.cc',
-                                            '$builddir/' + mode + '/utils/gz/gen_crc_combine_table'))
-        f.write('build {}: link_build.{} {}\n'.format('$builddir/' + mode + '/utils/gz/gen_crc_combine_table', mode,
-                                                '$builddir/' + mode + '/utils/gz/gen_crc_combine_table.o'))
        f.write('   libs = $seastar_libs_{}\n'.format(mode))
        f.write(
            'build {mode}-objects: phony {objs}\n'.format(
@@ -2045,6 +1961,7 @@ with open(buildfile, 'w') as f:
        gen_headers += list(serializers.keys())
        gen_headers += list(ragels.keys())
        gen_headers += list(rust_headers.keys())
+        gen_headers.append('$builddir/{}/gen/rust/cxx.h'.format(mode))
        gen_headers_dep = ' '.join(gen_headers)

        for obj in compiles:
@@ -2068,10 +1985,13 @@ with open(buildfile, 'w') as f:
        for hh in rust_headers:
            src = rust_headers[hh]
            f.write('build {}: rust_header {}\n'.format(hh, src))
-        for lib in rust_libs:
-            src = rust_libs[lib]
-            package = src.replace('/src/lib.rs', '').replace('rust/','')
-            f.write('build {}: rust_lib.{} {}\n  pkg = {}\n'.format(lib, mode, src, package))
+            cc = hh.replace('.hh', '.cc')
+            f.write('build {}: rust_source {}\n'.format(cc, src))
+            obj = cc.replace('.cc', '.o')
+            f.write('build {}: cxx.{} {} || {}\n'.format(obj, mode, cc, gen_headers_dep))
+        f.write('build {}: cxxbridge_header\n'.format('$builddir/{}/gen/rust/cxx.h'.format(mode)))
+        librust = '$builddir/{}/rust-{}/librust_combined'.format(mode, mode)
+        f.write('build {}.a: rust_lib.{} rust/Cargo.lock\n  depfile={}.d\n'.format(librust, mode, librust))
        for thrift in thrifts:
            outs = ' '.join(thrift.generated('$builddir/{}/gen'.format(mode)))
            f.write('build {}: thrift.{} {}\n'.format(outs, mode, thrift.source))
@@ -2087,7 +2007,8 @@ with open(buildfile, 'w') as f:
                f.write('build {}: cxx.{} {} || {}\n'.format(obj, mode, cc, ' '.join(serializers)))
                if cc.endswith('Parser.cpp'):
                    # Unoptimized parsers end up using huge amounts of stack space and overflowing their stack
-                    flags = '-O1'
+                    flags = '-O1' if modes[mode]['optimization-level'] in ['0', 'g', 's'] else ''
+
                    if has_sanitize_address_use_after_scope:
                        flags += ' -fno-sanitize-address-use-after-scope'
                    f.write('  obj_cxxflags = %s\n' % flags)
@@ -2139,30 +2060,16 @@ with open(buildfile, 'w') as f:
        f.write(f'  mode = {mode}\n')
        f.write(f'build $builddir/dist/{mode}/debian: debbuild $builddir/{mode}/dist/tar/{scylla_product}-unstripped-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
        f.write(f'  mode = {mode}\n')
-        f.write(f'build dist-server-{mode}: phony $builddir/dist/{mode}/redhat $builddir/dist/{mode}/debian dist-server-compat-{mode} dist-server-compat-arch-{mode}\n')
-        f.write(f'build dist-server-compat-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz\n')
-        f.write(f'build dist-server-compat-arch-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-{arch}-package.tar.gz\n')
+        f.write(f'build dist-server-{mode}: phony $builddir/dist/{mode}/redhat $builddir/dist/{mode}/debian\n')
        f.write(f'build dist-server-debuginfo-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-debuginfo-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
-        f.write(f'build dist-jmx-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-jmx-{scylla_version}-{scylla_release}.noarch.tar.gz dist-jmx-rpm dist-jmx-deb dist-jmx-compat\n')
-        f.write(f'build dist-tools-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz dist-tools-rpm dist-tools-deb dist-tools-compat\n')
-        f.write(f'build dist-python3-{mode}: phony dist-python3-tar dist-python3-rpm dist-python3-deb dist-python3-compat dist-python3-compat-arch\n')
-        f.write(f'build dist-unified-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz dist-unified-compat-{mode} dist-unified-compat-arch-{mode}\n')
-        f.write(f'build dist-unified-compat-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-unified-package-{scylla_version}-{scylla_release}.tar.gz\n')
-        f.write(f'build dist-unified-compat-arch-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-unified-{arch}-package-{scylla_version}-{scylla_release}.tar.gz\n')
+        f.write(f'build dist-jmx-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-jmx-{scylla_version}-{scylla_release}.noarch.tar.gz dist-jmx-rpm dist-jmx-deb\n')
+        f.write(f'build dist-tools-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz dist-tools-rpm dist-tools-deb\n')
+        f.write(f'build dist-python3-{mode}: phony dist-python3-tar dist-python3-rpm dist-python3-deb\n')
+        f.write(f'build dist-unified-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
        f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz: unified $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-python3-{scylla_version}-{scylla_release}.{arch}.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-jmx-{scylla_version}-{scylla_release}.noarch.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz | always\n')
        f.write(f'  mode = {mode}\n')
        f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-unified-package-{scylla_version}-{scylla_release}.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
        f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-unified-{arch}-package-{scylla_version}-{scylla_release}.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
-        f.write('rule libdeflate.{mode}\n'.format(**locals()))
-        f.write('  command = make -C libdeflate BUILD_DIR=../$builddir/{mode}/libdeflate/ CFLAGS="{libdeflate_cflags}" CC={args.cc} ../$builddir/{mode}/libdeflate//libdeflate.a\n'.format(**locals()))
-        f.write('build $builddir/{mode}/libdeflate/libdeflate.a: libdeflate.{mode}\n'.format(**locals()))
-        f.write('  pool = submodule_pool\n')
-
-        for lib in abseil_libs:
-            f.write('build $builddir/{mode}/abseil/{lib}: ninja $builddir/{mode}/abseil/build.ninja\n'.format(**locals()))
-            f.write('  pool = submodule_pool\n')
-            f.write('  subdir = $builddir/{mode}/abseil\n'.format(**locals()))
-            f.write('  target = {lib}\n'.format(**locals()))

    checkheaders_mode = 'dev' if 'dev' in modes else modes.keys()[0]
    f.write('build checkheaders: phony || {}\n'.format(' '.join(['$builddir/{}/{}.o'.format(checkheaders_mode, hh) for hh in headers])))
@@ -2179,17 +2086,13 @@ with open(buildfile, 'w') as f:

    f.write(textwrap.dedent(f'''\
        build dist-unified-tar: phony {' '.join([f'$builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz' for mode in default_modes])}
-        build dist-unified-compat: phony {' '.join([f'$builddir/{mode}/dist/tar/{scylla_product}-unified-package-{scylla_version}-{scylla_release}.tar.gz' for mode in default_modes])}
-        build dist-unified-compat-arch: phony {' '.join([f'$builddir/{mode}/dist/tar/{scylla_product}-unified-{arch}-package-{scylla_version}-{scylla_release}.tar.gz' for mode in default_modes])}
-        build dist-unified: phony dist-unified-tar dist-unified-compat dist-unified-compat-arch
+        build dist-unified: phony dist-unified-tar

        build dist-server-deb: phony {' '.join(['$builddir/dist/{mode}/debian'.format(mode=mode) for mode in build_modes])}
        build dist-server-rpm: phony {' '.join(['$builddir/dist/{mode}/redhat'.format(mode=mode) for mode in build_modes])}
        build dist-server-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz'.format(mode=mode, scylla_product=scylla_product, arch=arch, scylla_version=scylla_version, scylla_release=scylla_release) for mode in default_modes])}
        build dist-server-debuginfo: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-debuginfo-{scylla_version}-{scylla_release}.{arch}.tar.gz'.format(mode=mode, scylla_product=scylla_product, arch=arch, scylla_version=scylla_version, scylla_release=scylla_release) for mode in default_modes])}
-        build dist-server-compat: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz'.format(mode=mode, scylla_product=scylla_product, arch=arch) for mode in default_modes])}
-        build dist-server-compat-arch: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-{arch}-package.tar.gz'.format(mode=mode, scylla_product=scylla_product, arch=arch) for mode in default_modes])}
-        build dist-server: phony dist-server-tar dist-server-debuginfo dist-server-compat dist-server-compat-arch dist-server-rpm dist-server-deb
+        build dist-server: phony dist-server-tar dist-server-debuginfo dist-server-rpm dist-server-deb

        rule build-submodule-reloc
          command = cd $reloc_dir && ./reloc/build_reloc.sh --version $$(<../../build/SCYLLA-PRODUCT-FILE)-$$(sed 's/-/~/' <../../build/SCYLLA-VERSION-FILE)-$$(<../../build/SCYLLA-RELEASE-FILE) --nodeps $args
@@ -2207,8 +2110,7 @@ with open(buildfile, 'w') as f:
          dir = tools/jmx
          artifact = $builddir/{scylla_product}-jmx-{scylla_version}-{scylla_release}.noarch.tar.gz
        build dist-jmx-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-jmx-{scylla_version}-{scylla_release}.noarch.tar.gz'.format(mode=mode, scylla_product=scylla_product, scylla_version=scylla_version, scylla_release=scylla_release) for mode in default_modes])}
-        build dist-jmx-compat: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-jmx-package.tar.gz'.format(mode=mode, scylla_product=scylla_product, arch=arch) for mode in default_modes])}
-        build dist-jmx: phony dist-jmx-tar dist-jmx-compat dist-jmx-rpm dist-jmx-deb
+        build dist-jmx: phony dist-jmx-tar dist-jmx-rpm dist-jmx-deb

        build tools/java/build/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz: build-submodule-reloc | build/SCYLLA-PRODUCT-FILE build/SCYLLA-VERSION-FILE build/SCYLLA-RELEASE-FILE
          reloc_dir = tools/java
@@ -2219,8 +2121,7 @@ with open(buildfile, 'w') as f:
          dir = tools/java
          artifact = $builddir/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz
        build dist-tools-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz'.format(mode=mode, scylla_product=scylla_product, scylla_version=scylla_version, scylla_release=scylla_release) for mode in default_modes])}
-        build dist-tools-compat: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-tools-package.tar.gz'.format(mode=mode, scylla_product=scylla_product, arch=arch) for mode in default_modes])}
-        build dist-tools: phony dist-tools-tar dist-tools-compat dist-tools-rpm dist-tools-deb
+        build dist-tools: phony dist-tools-tar dist-tools-rpm dist-tools-deb

        build tools/python3/build/{scylla_product}-python3-{scylla_version}-{scylla_release}.{arch}.tar.gz: build-submodule-reloc | build/SCYLLA-PRODUCT-FILE build/SCYLLA-VERSION-FILE build/SCYLLA-RELEASE-FILE
          reloc_dir = tools/python3
@@ -2232,14 +2133,10 @@ with open(buildfile, 'w') as f:
          dir = tools/python3
          artifact = $builddir/{scylla_product}-python3-{scylla_version}-{scylla_release}.{arch}.tar.gz
        build dist-python3-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-python3-{scylla_version}-{scylla_release}.{arch}.tar.gz'.format(mode=mode, scylla_product=scylla_product, arch=arch, scylla_version=scylla_version, scylla_release=scylla_release) for mode in default_modes])}
-        build dist-python3-compat: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-python3-package.tar.gz'.format(mode=mode, scylla_product=scylla_product, arch=arch) for mode in default_modes])}
-        build dist-python3-compat-arch: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-python3-{arch}-package.tar.gz'.format(mode=mode, scylla_product=scylla_product, arch=arch) for mode in default_modes])}
-        build dist-python3: phony dist-python3-tar dist-python3-compat dist-python3-compat-arch dist-python3-rpm dist-python3-deb
+        build dist-python3: phony dist-python3-tar dist-python3-rpm dist-python3-deb
        build dist-deb: phony dist-server-deb dist-python3-deb dist-jmx-deb dist-tools-deb
        build dist-rpm: phony dist-server-rpm dist-python3-rpm dist-jmx-rpm dist-tools-rpm
        build dist-tar: phony dist-unified-tar dist-server-tar dist-python3-tar dist-jmx-tar dist-tools-tar
-        build dist-compat: phony dist-unified-compat dist-server-compat dist-python3-compat
-        build dist-compat-arch: phony dist-unified-compat-arch dist-server-compat-arch dist-python3-compat-arch

        build dist: phony dist-unified dist-server dist-python3 dist-jmx dist-tools
        '''))
@@ -2284,7 +2181,7 @@ with open(buildfile, 'w') as f:
            description = List configured modes
        build mode_list: mode_list
        default {modes_list}
-        ''').format(modes_list=' '.join(default_modes), build_ninja_list=' '.join([f'build/{mode}/{dir}/build.ninja' for mode in build_modes for dir in ['seastar', 'abseil']]), **globals()))
+        ''').format(modes_list=' '.join(default_modes), build_ninja_list=' '.join([f'build/{mode}/{dir}/build.ninja' for mode in build_modes for dir in ['seastar']]), **globals()))
    unit_test_list = set(test for test in build_artifacts if test in set(tests))
    f.write(textwrap.dedent('''\
        rule unit_test_list
@@ -2313,7 +2210,7 @@ with open(buildfile, 'w') as f:
 compdb = 'compile_commands.json'
 # per-mode compdbs are built by taking the relevant entries from the
 # output of "ninja -t compdb" and combining them with the CMake-made
-# compdbs for Seastar and Abseil in the relevant mode.
+# compdbs for Seastar in the relevant mode.
 #
 # "ninja -t compdb" output has to be filtered because
 # - it contains rules for all selected modes, and several entries for
@@ -2328,7 +2225,7 @@ with tempfile.NamedTemporaryFile() as ninja_compdb:
    # build mode-specific compdbs
    for mode in selected_modes:
        mode_out = outdir + '/' + mode
-        submodule_compdbs = [mode_out + '/' + submodule + '/' + compdb for submodule in ['abseil', 'seastar']]
+        submodule_compdbs = [mode_out + '/' + submodule + '/' + compdb for submodule in ['seastar']]
        with open(mode_out + '/' + compdb, 'w+b') as combined_mode_specific_compdb:
            subprocess.run(['./scripts/merge-compdb.py', 'build/' + mode,
                            ninja_compdb.name] + submodule_compdbs, stdout=combined_mode_specific_compdb)
--- a/cql3/Cql.g
+++ b/cql3/Cql.g
@@ -51,6 +51,7 @@ options {
 #include "cql3/statements/index_prop_defs.hh"
 #include "cql3/statements/raw/use_statement.hh"
 #include "cql3/statements/raw/batch_statement.hh"
+#include "cql3/statements/raw/describe_statement.hh"
 #include "cql3/statements/list_users_statement.hh"
 #include "cql3/statements/grant_statement.hh"
 #include "cql3/statements/revoke_statement.hh"
@@ -358,6 +359,7 @@ cqlStatement returns [std::unique_ptr<raw::parsed_statement> stmt]
    | st46=listServiceLevelStatement { $stmt = std::move(st46); }
    | st47=listServiceLevelAttachStatement { $stmt = std::move(st47); }
    | st48=pruneMaterializedViewStatement  { $stmt = std::move(st48); }
+    | st49=describeStatement           { $stmt = std::move(st49); }
    ;

 /*
@@ -1368,6 +1370,59 @@ listServiceLevelAttachStatement returns [std::unique_ptr<list_service_level_atta
      { $stmt = std::make_unique<list_service_level_attachments_statement>(); }
    ;

+/**
+ * (DESCRIBE | DESC) (
+ *    CLUSTER
+ *    [FULL] SCHEMA
+ *    KEYSPACES
+ *    [ONLY] KEYSPACE <name>?
+ *    TABLES
+ *    TABLE <name>
+ *    TYPES
+ *    TYPE <name>
+ *    FUNCTIONS
+ *    FUNCTION <name>
+ *    AGGREGATES
+ *    AGGREGATE <name>
+ * ) (WITH INTERNALS)?
+ */
+describeStatement returns [std::unique_ptr<cql3::statements::raw::describe_statement> stmt]
+    @init {
+        bool fullSchema = false;
+        bool pending = false;
+        bool config = false;
+        bool only = false;
+        std::optional<sstring> keyspace;
+        sstring generic_name = "";
+    }
+    : ( K_DESCRIBE | K_DESC )
+    ( (K_CLUSTER) => K_CLUSTER                      { $stmt = cql3::statements::raw::describe_statement::cluster();                }
+    | (K_FULL { fullSchema=true; })? K_SCHEMA       { $stmt = cql3::statements::raw::describe_statement::schema(fullSchema);       }
+    | (K_KEYSPACES) => K_KEYSPACES                  { $stmt = cql3::statements::raw::describe_statement::keyspaces();              }
+    | (K_ONLY { only=true; })? K_KEYSPACE ( ks=keyspaceName { keyspace = ks; })?
+                                                    { $stmt = cql3::statements::raw::describe_statement::keyspace(keyspace, only); }
+    | (K_TABLES) => K_TABLES                        { $stmt = cql3::statements::raw::describe_statement::tables();                 }
+    | K_COLUMNFAMILY cf=columnFamilyName            { $stmt = cql3::statements::raw::describe_statement::table(cf);                }
+    | K_INDEX idx=columnFamilyName                  { $stmt = cql3::statements::raw::describe_statement::index(idx);               }
+    | K_MATERIALIZED K_VIEW view=columnFamilyName   { $stmt = cql3::statements::raw::describe_statement::view(view);               }
+    | (K_TYPES) => K_TYPES                          { $stmt = cql3::statements::raw::describe_statement::types();                  }
+    | K_TYPE tn=userTypeName                        { $stmt = cql3::statements::raw::describe_statement::type(tn);                 }
+    | (K_FUNCTIONS) => K_FUNCTIONS                  { $stmt = cql3::statements::raw::describe_statement::functions();              }
+    | K_FUNCTION fn=functionName                    { $stmt = cql3::statements::raw::describe_statement::function(fn);             }
+    | (K_AGGREGATES) => K_AGGREGATES                { $stmt = cql3::statements::raw::describe_statement::aggregates();             }
+    | K_AGGREGATE ag=functionName                   { $stmt = cql3::statements::raw::describe_statement::aggregate(ag);            }
+    | ( ( ksT=IDENT                                 { keyspace = sstring{$ksT.text}; }
+        | ksT=QUOTED_NAME                           { keyspace = sstring{$ksT.text}; }
+        | ksK=unreserved_keyword                    { keyspace = ksK; } ) 
+        '.' )?
+        ( tT=IDENT                                  { generic_name = sstring{$tT.text}; }
+        | tT=QUOTED_NAME                            { generic_name = sstring{$tT.text}; }
+        | tK=unreserved_keyword                     { generic_name = tK; } )
+                                                    { $stmt = cql3::statements::raw::describe_statement::generic(keyspace, generic_name); }
+    )
+    ( K_WITH K_INTERNALS { $stmt->with_internals_details(); } )?
+    ;
+
 /** DEFINITIONS **/

 // Column Identifiers.  These need to be treated differently from other
@@ -1419,7 +1474,7 @@ serviceLevelOrRoleName returns [sstring name]
 						 std::transform($name.begin(), $name.end(), $name.begin(), ::tolower); }
 | t=STRING_LITERAL     { $name = sstring($t.text); }
 | t=QUOTED_NAME        { $name = sstring($t.text); }
-| k=unreserved_keyword { $name = sstring($t.text); 
+| k=unreserved_keyword { $name = k;
 						 std::transform($name.begin(), $name.end(), $name.begin(), ::tolower);}
 | QMARK {add_recognition_error("Bind variables cannot be used for service levels or role names");}
 ;
@@ -1513,7 +1568,7 @@ value returns [expression value]
    | l=collectionLiteral  { $value = std::move(l); }
    | u=usertypeLiteral    { $value = std::move(u); }
    | t=tupleLiteral       { $value = std::move(t); }
-    | K_NULL               { $value = null(); }
+    | K_NULL               { $value = make_untyped_null(); }
    | e=marker             { $value = std::move(e); }
    ;

@@ -1678,7 +1733,7 @@ relation returns [expression e]
    | K_TOKEN l=tupleOfIdentifiers type=relationType t=term
        { $e = binary_operator(token{std::move(l.elements)}, type, std::move(t)); }
    | name=cident K_IS K_NOT K_NULL {
-          $e = binary_operator(unresolved_identifier{std::move(name)}, oper_t::IS_NOT, null()); }
+          $e = binary_operator(unresolved_identifier{std::move(name)}, oper_t::IS_NOT, make_untyped_null()); }
    | name=cident K_IN marker1=marker
        { $e = binary_operator(unresolved_identifier{std::move(name)}, oper_t::IN, std::move(marker1)); }
    | name=cident K_IN in_values=singleColumnInValues
@@ -1897,10 +1952,13 @@ unreserved_function_keyword returns [sstring str]
 basic_unreserved_keyword returns [sstring str]
    : k=( K_KEYS
        | K_AS
+        | K_CLUSTER
        | K_CLUSTERING
        | K_COMPACT
        | K_STORAGE
+        | K_TABLES
        | K_TYPE
+        | K_TYPES
        | K_VALUES
        | K_MAP
        | K_LIST
@@ -1924,11 +1982,14 @@ basic_unreserved_keyword returns [sstring str]
        | K_TRIGGER
        | K_DISTINCT
        | K_CONTAINS
+        | K_INTERNALS
        | K_STATIC
        | K_FROZEN
        | K_TUPLE
        | K_FUNCTION
+        | K_FUNCTIONS
        | K_AGGREGATE
+        | K_AGGREGATES
        | K_SFUNC
        | K_STYPE
        | K_REDUCEFUNC
@@ -1956,6 +2017,9 @@ basic_unreserved_keyword returns [sstring str]
        | K_LEVEL
        | K_LEVELS
        | K_PRUNE
+        | K_ONLY
+        | K_DESCRIBE
+        | K_DESC
        ) { $str = $k.text; }
    ;

@@ -2013,11 +2077,14 @@ K_TRUNCATE:    T R U N C A T E;
 K_DELETE:      D E L E T E;
 K_IN:          I N;
 K_CREATE:      C R E A T E;
+K_SCHEMA:      S C H E M A;
 K_KEYSPACE:    ( K E Y S P A C E
-                 | S C H E M A );
+                 | K_SCHEMA );
 K_KEYSPACES:   K E Y S P A C E S;
 K_COLUMNFAMILY:( C O L U M N F A M I L Y
                 | T A B L E );
+K_TABLES:      ( C O L U M N F A M I L I E S
+                 | T A B L E S );
 K_MATERIALIZED:M A T E R I A L I Z E D;
 K_VIEW:        V I E W;
 K_INDEX:       I N D E X;
@@ -2034,6 +2101,7 @@ K_ALTER:       A L T E R;
 K_RENAME:      R E N A M E;
 K_ADD:         A D D;
 K_TYPE:        T Y P E;
+K_TYPES:       T Y P E S;
 K_COMPACT:     C O M P A C T;
 K_STORAGE:     S T O R A G E;
 K_ORDER:       O R D E R;
@@ -2045,6 +2113,8 @@ K_FILTERING:   F I L T E R I N G;
 K_IF:          I F;
 K_IS:          I S;
 K_CONTAINS:    C O N T A I N S;
+K_INTERNALS:   I N T E R N A L S;
+K_ONLY:        O N L Y;

 K_GRANT:       G R A N T;
 K_ALL:         A L L;
@@ -2068,6 +2138,7 @@ K_LOGIN:       L O G I N;
 K_NOLOGIN:     N O L O G I N;
 K_OPTIONS:     O P T I O N S;

+K_CLUSTER:     C L U S T E R;
 K_CLUSTERING:  C L U S T E R I N G;
 K_ASCII:       A S C I I;
 K_BIGINT:      B I G I N T;
@@ -2107,7 +2178,9 @@ K_STATIC:      S T A T I C;
 K_FROZEN:      F R O Z E N;

 K_FUNCTION:    F U N C T I O N;
+K_FUNCTIONS:   F U N C T I O N S;
 K_AGGREGATE:   A G G R E G A T E;
+K_AGGREGATES:  A G G R E G A T E S;
 K_SFUNC:       S F U N C;
 K_STYPE:       S T Y P E;
 K_REDUCEFUNC:  R E D U C E F U N C;
--- a/cql3/attributes.cc
+++ b/cql3/attributes.cc
@@ -10,6 +10,7 @@

 #include "cql3/attributes.hh"
 #include "cql3/column_identifier.hh"
+#include <optional>

 namespace cql3 {

@@ -20,7 +21,9 @@ std::unique_ptr<attributes> attributes::none() {
 attributes::attributes(std::optional<cql3::expr::expression>&& timestamp,
                       std::optional<cql3::expr::expression>&& time_to_live,
                       std::optional<cql3::expr::expression>&& timeout)
-    : _timestamp{std::move(timestamp)}
+    : _timestamp_unset_guard(timestamp)
+    , _timestamp{std::move(timestamp)}
+    , _time_to_live_unset_guard(time_to_live)
    , _time_to_live{std::move(time_to_live)}
    , _timeout{std::move(timeout)}
 { }
@@ -38,7 +41,7 @@ bool attributes::is_timeout_set() const {
 }

 int64_t attributes::get_timestamp(int64_t now, const query_options& options) {
-    if (!_timestamp.has_value()) {
+    if (!_timestamp.has_value() || _timestamp_unset_guard.is_unset(options)) {
        return now;
    }

@@ -46,31 +49,25 @@ int64_t attributes::get_timestamp(int64_t now, const query_options& options) {
    if (tval.is_null()) {
        throw exceptions::invalid_request_exception("Invalid null value of timestamp");
    }
-    if (tval.is_unset_value()) {
-        return now;
-    }
    try {
-        return tval.view().validate_and_deserialize<int64_t>(*long_type, cql_serialization_format::internal());
+        return tval.view().validate_and_deserialize<int64_t>(*long_type);
    } catch (marshal_exception& e) {
        throw exceptions::invalid_request_exception("Invalid timestamp value");
    }
 }

-int32_t attributes::get_time_to_live(const query_options& options) {
-    if (!_time_to_live.has_value())
-        return 0;
+std::optional<int32_t> attributes::get_time_to_live(const query_options& options) {
+    if (!_time_to_live.has_value() || _time_to_live_unset_guard.is_unset(options))
+        return std::nullopt;

    cql3::raw_value tval = expr::evaluate(*_time_to_live, options);
    if (tval.is_null()) {
        throw exceptions::invalid_request_exception("Invalid null value of TTL");
    }
-    if (tval.is_unset_value()) {
-        return 0;
-    }

    int32_t ttl;
    try {
-        ttl = tval.view().validate_and_deserialize<int32_t>(*int32_type, cql_serialization_format::internal());
+        ttl = tval.view().validate_and_deserialize<int32_t>(*int32_type);
    }
    catch (marshal_exception& e) {
        throw exceptions::invalid_request_exception("Invalid TTL value");
@@ -91,8 +88,8 @@ int32_t attributes::get_time_to_live(const query_options& options) {

 db::timeout_clock::duration attributes::get_timeout(const query_options& options) const {
    cql3::raw_value timeout = expr::evaluate(*_timeout, options);
-    if (timeout.is_null() || timeout.is_unset_value()) {
-        throw exceptions::invalid_request_exception("Timeout value cannot be unset/null");
+    if (timeout.is_null()) {
+        throw exceptions::invalid_request_exception("Timeout value cannot be null");
    }
    cql_duration duration = timeout.view().deserialize<cql_duration>(*duration_type);
    if (duration.months || duration.days) {
--- a/cql3/attributes.hh
+++ b/cql3/attributes.hh
@@ -11,6 +11,7 @@
 #pragma once

 #include "cql3/expr/expression.hh"
+#include "cql3/expr/unset.hh"
 #include "db/timeout_clock.hh"

 namespace cql3 {
@@ -24,7 +25,9 @@ class prepare_context;
 */
 class attributes final {
 private:
+    expr::unset_bind_variable_guard _timestamp_unset_guard;
    std::optional<cql3::expr::expression> _timestamp;
+    expr::unset_bind_variable_guard _time_to_live_unset_guard;
    std::optional<cql3::expr::expression> _time_to_live;
    std::optional<cql3::expr::expression> _timeout;
 public:
@@ -42,7 +45,7 @@ public:

    int64_t get_timestamp(int64_t now, const query_options& options);

-    int32_t get_time_to_live(const query_options& options);
+    std::optional<int32_t> get_time_to_live(const query_options& options);

    db::timeout_clock::duration get_timeout(const query_options& options) const;

--- a/cql3/column_condition.cc
+++ b/cql3/column_condition.cc
@@ -139,10 +139,6 @@ bool column_condition::applies_to(const data_value* cell_value, const query_opti

        cql3::raw_value key_constant = expr::evaluate(*_collection_element, options);
        cql3::raw_value_view key = key_constant.view();
-        if (key.is_unset_value()) {
-            throw exceptions::invalid_request_exception(
-                    format("Invalid 'unset' value in {} element access", cell_type.cql3_type_name()));
-        }
        if (key.is_null()) {
            throw exceptions::invalid_request_exception(
                    format("Invalid null value for {} element access", cell_type.cql3_type_name()));
@@ -196,9 +192,6 @@ bool column_condition::applies_to(const data_value* cell_value, const query_opti
        // <, >, >=, <=, !=
        cql3::raw_value param = expr::evaluate(*_value, options);

-        if (param.is_unset_value()) {
-            throw exceptions::invalid_request_exception("Invalid 'unset' value in condition");
-        }
        if (param.is_null()) {
            if (_op == expr::oper_t::EQ) {
                return cell_value == nullptr;
@@ -224,9 +217,6 @@ bool column_condition::applies_to(const data_value* cell_value, const query_opti
            return (*_matcher)(bytes_view(cell_value->serialize_nonnull()));
        } else {
            auto param = expr::evaluate(*_value, options);  // LIKE pattern
-            if (param.is_unset_value()) {
-                throw exceptions::invalid_request_exception("Invalid 'unset' value in LIKE pattern");
-            }
            if (param.is_null()) {
                throw exceptions::invalid_request_exception("Invalid NULL value in LIKE pattern");
            }
@@ -309,7 +299,7 @@ column_condition::raw::prepare(data_dictionary::database db, const sstring& keys

    if (_op == expr::oper_t::LIKE) {
        auto literal_term = expr::as_if<expr::untyped_constant>(&*_value);
-        if (literal_term) {
+        if (literal_term && literal_term->partial_type != expr::untyped_constant::type_class::null) {
            // Pass matcher object
            const sstring& pattern = literal_term->raw_text;
            return column_condition::condition(receiver, std::move(collection_element_expression),
--- a/cql3/constants.hh
+++ b/cql3/constants.hh
@@ -33,9 +33,9 @@ public:
    private static final Logger logger = LoggerFactory.getLogger(Constants.class);
 #endif
 public:
-    class setter : public operation {
+    class setter : public operation_skip_if_unset {
    public:
-        using operation::operation;
+        using operation_skip_if_unset::operation_skip_if_unset;

        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override {
            auto value = expr::evaluate(*_e, params._options);
@@ -53,30 +53,26 @@ public:
        virtual void prepare_for_broadcast_tables(statements::broadcast_tables::prepared_update& query) const override;
    };

-    struct adder final : operation {
-        using operation::operation;
+    struct adder final : operation_skip_if_unset {
+        using operation_skip_if_unset::operation_skip_if_unset;

        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override {
            auto value = expr::evaluate(*_e, params._options);
            if (value.is_null()) {
                throw exceptions::invalid_request_exception("Invalid null value for counter increment");
-            } else if (value.is_unset_value()) {
-                return;
            }
            auto increment = value.view().deserialize<int64_t>(*long_type);
            m.set_cell(prefix, column, params.make_counter_update_cell(increment));
        }
    };

-    struct subtracter final : operation {
-        using operation::operation;
+    struct subtracter final : operation_skip_if_unset {
+        using operation_skip_if_unset::operation_skip_if_unset;

        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override {
            auto value = expr::evaluate(*_e, params._options);
            if (value.is_null()) {
                throw exceptions::invalid_request_exception("Invalid null value for counter increment");
-            } else if (value.is_unset_value()) {
-                return;
            }
            auto increment = value.view().deserialize<int64_t>(*long_type);
            if (increment == std::numeric_limits<int64_t>::min()) {
@@ -86,10 +82,10 @@ public:
        }
    };

-    class deleter : public operation {
+    class deleter : public operation_no_unset_support {
    public:
        deleter(const column_definition& column)
-            : operation(column, std::nullopt)
+            : operation_no_unset_support(column, std::nullopt)
        { }

        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
--- a/cql3/cql3_type.cc
+++ b/cql3/cql3_type.cc
@@ -473,27 +473,40 @@ sstring maybe_quote(const sstring& identifier) {
    return result;
 }

-sstring quote(const sstring& identifier) {
+template <char C>
+static sstring quote_with(const sstring& str) {
+    static const std::string quote_str{C};
+
    // quote empty string
-    if (identifier.empty()) {
-        return "\"\"";
+    if (str.empty()) {
+        return make_sstring(quote_str, quote_str);
    }
    size_t num_quotes = 0;
-    for (char c : identifier) {
-        num_quotes += (c == '"');
+    for (char c : str) {
+        num_quotes += (c == C);
    }
    if (num_quotes == 0) {
-        return make_sstring("\"", identifier, "\"");
+        return make_sstring(quote_str, str, quote_str);
    }
-    static const std::regex double_quote_re("\"");
+
+    static const std::string double_quote_str{C, C};
+    static const std::regex quote_re(std::string{C});
    std::string result;
-    result.reserve(2 + identifier.size() + num_quotes);
-    result.push_back('"');
-    std::regex_replace(std::back_inserter(result), identifier.begin(), identifier.end(), double_quote_re, "\"\"");
-    result.push_back('"');
+    result.reserve(2 + str.size() + num_quotes);
+    result.push_back(C);
+    std::regex_replace(std::back_inserter(result), str.begin(), str.end(), quote_re, double_quote_str);
+    result.push_back(C);
    return result;
 }

+sstring quote(const sstring& identifier) {
+    return quote_with<'"'>(identifier);
+}
+
+sstring single_quote(const sstring& str) {
+    return quote_with<'\''>(str);
+}
+
 }

 }
--- a/cql3/expr/expression.cc
+++ b/cql3/expr/expression.cc
--- a/cql3/expr/expression.hh
+++ b/cql3/expr/expression.hh
@@ -76,7 +76,6 @@ struct column_mutation_attribute;
 struct function_call;
 struct cast;
 struct field_selection;
-struct null;
 struct bind_variable;
 struct untyped_constant;
 struct constant;
@@ -96,7 +95,6 @@ concept ExpressionElement
        || std::same_as<T, function_call>
        || std::same_as<T, cast>
        || std::same_as<T, field_selection>
-        || std::same_as<T, null>
        || std::same_as<T, bind_variable>
        || std::same_as<T, untyped_constant>
        || std::same_as<T, constant>
@@ -117,7 +115,6 @@ concept invocable_on_expression
        && std::invocable<Func, function_call>
        && std::invocable<Func, cast>
        && std::invocable<Func, field_selection>
-        && std::invocable<Func, null>
        && std::invocable<Func, bind_variable>
        && std::invocable<Func, untyped_constant>
        && std::invocable<Func, constant>
@@ -138,7 +135,6 @@ concept invocable_on_expression_ref
        && std::invocable<Func, function_call&>
        && std::invocable<Func, cast&>
        && std::invocable<Func, field_selection&>
-        && std::invocable<Func, null&>
        && std::invocable<Func, bind_variable&>
        && std::invocable<Func, untyped_constant&>
        && std::invocable<Func, constant&>
@@ -147,7 +143,7 @@ concept invocable_on_expression_ref
        && std::invocable<Func, usertype_constructor&>
        ;

-/// A CQL expression -- union of all possible expression types.  bool means a Boolean constant.
+/// A CQL expression -- union of all possible expression types.
 class expression final {
    // 'impl' holds a variant of all expression types, but since 
    // variants of incomplete types are not allowed, we forward declare it
@@ -198,9 +194,7 @@ bool operator==(const expression& e1, const expression& e2);
 // An expression that doesn't contain subexpressions
 template <typename E>
 concept LeafExpression
-        = std::same_as<bool, E>
-        || std::same_as<unresolved_identifier, E> 
-        || std::same_as<null, E> 
+        = std::same_as<unresolved_identifier, E>
        || std::same_as<bind_variable, E> 
        || std::same_as<untyped_constant, E> 
        || std::same_as<constant, E>
@@ -346,12 +340,6 @@ struct field_selection {
    friend bool operator==(const field_selection&, const field_selection&) = default;
 };

-struct null {
-    data_type type; // may be null before prepare
-
-    friend bool operator==(const null&, const null&) = default;
-};
-
 struct bind_variable {
    int32_t bind_index;

@@ -365,17 +353,18 @@ struct bind_variable {
 // A constant which does not yet have a date type. It is partially typed
 // (we know if it's floating or int) but not sized.
 struct untyped_constant {
-    enum type_class { integer, floating_point, string, boolean, duration, uuid, hex };
+    enum type_class { integer, floating_point, string, boolean, duration, uuid, hex, null };
    type_class partial_type;
    sstring raw_text;

    friend bool operator==(const untyped_constant&, const untyped_constant&) = default;
 };

+untyped_constant make_untyped_null();
+
 // Represents a constant value with known value and type
 // For null and unset the type can sometimes be set to empty_type
 struct constant {
-    // A value serialized using the internal (latest) cql_serialization_format
    cql3::raw_value value;

    // Never nullptr, for NULL and UNSET might be empty_type
@@ -383,7 +372,6 @@ struct constant {

    constant(cql3::raw_value value, data_type type);
    static constant make_null(data_type val_type = empty_type);
-    static constant make_unset_value(data_type val_type = empty_type);
    static constant make_bool(bool bool_val);

    bool is_null() const;
@@ -436,7 +424,7 @@ struct usertype_constructor {
 struct expression::impl final {
    using variant_type = std::variant<
            conjunction, binary_operator, column_value, token, unresolved_identifier,
-            column_mutation_attribute, function_call, cast, field_selection, null,
+            column_mutation_attribute, function_call, cast, field_selection,
            bind_variable, untyped_constant, constant, tuple_constructor, collection_constructor,
            usertype_constructor, subscript>;
    variant_type v;
--- a/cql3/expr/prepare_expr.cc
+++ b/cql3/expr/prepare_expr.cc
@@ -123,7 +123,7 @@ usertype_constructor_prepare_expression(const usertype_constructor& u, data_dict
        auto iraw = u.elements.find(field);
        expression raw;
        if (iraw == u.elements.end()) {
-            raw = expr::null();
+            raw = expr::make_untyped_null();
        } else {
            raw = iraw->second;
            ++found_values;
@@ -246,6 +246,21 @@ map_prepare_expression(const collection_constructor& c, data_dictionary::databas

    auto key_spec = maps::key_spec_of(*receiver);
    auto value_spec = maps::value_spec_of(*receiver);
+    const map_type_impl* map_type = dynamic_cast<const map_type_impl*>(&receiver->type->without_reversed());
+    if (map_type == nullptr) {
+        on_internal_error(expr_logger,
+                          format("map_prepare_expression bad non-map receiver type: {}", receiver->type->name()));
+    }
+    data_type map_element_tuple_type = tuple_type_impl::get_instance({map_type->get_keys_type(), map_type->get_values_type()});
+
+    // In Cassandra, an empty (unfrozen) map/set/list is equivalent to the column being null. In
+    // other words a non-frozen collection only exists if it has elements.  Return nullptr right
+    // away to simplify predicate evaluation.  See also
+    // https://issues.apache.org/jira/browse/CASSANDRA-5141
+    if (map_type->is_multi_cell() && c.elements.empty()) {
+        return constant::make_null(receiver->type);
+    }
+
    std::vector<expression> values;
    values.reserve(c.elements.size());
    bool all_terminal = true;
@@ -264,7 +279,7 @@ map_prepare_expression(const collection_constructor& c, data_dictionary::databas

        values.emplace_back(tuple_constructor {
            .elements = {std::move(k), std::move(v)},
-            .type = entry_tuple.type
+            .type = map_element_tuple_type
        });
    }

@@ -567,6 +582,7 @@ operator<<(std::ostream&out, untyped_constant::type_class t)
        case untyped_constant::type_class::boolean:  return out << "BOOLEAN";
        case untyped_constant::type_class::hex:      return out << "HEX";
        case untyped_constant::type_class::duration: return out << "DURATION";
+        case untyped_constant::type_class::null:     return out << "NULL";
    }
    abort();
 }
@@ -594,8 +610,9 @@ static
 assignment_testable::test_result
 untyped_constant_test_assignment(const untyped_constant& uc, data_dictionary::database db, const sstring& keyspace, const column_specification& receiver)
 {
+    bool uc_is_null = uc.partial_type == untyped_constant::type_class::null;
    auto receiver_type = receiver.type->as_cql3_type();
-    if (receiver_type.is_collection() || receiver_type.is_user_type()) {
+    if ((receiver_type.is_collection() || receiver_type.is_user_type()) && !uc_is_null) {
        return assignment_testable::test_result::NOT_ASSIGNABLE;
    }
    if (!receiver_type.is_native()) {
@@ -660,6 +677,10 @@ untyped_constant_test_assignment(const untyped_constant& uc, data_dictionary::da
                return assignment_testable::test_result::EXACT_MATCH;
            }
            break;
+        case untyped_constant::type_class::null:
+            return receiver.type->is_counter()
+                ? assignment_testable::test_result::NOT_ASSIGNABLE
+                : assignment_testable::test_result::WEAKLY_ASSIGNABLE;
    }
    return assignment_testable::test_result::NOT_ASSIGNABLE;
 }
@@ -673,9 +694,18 @@ untyped_constant_prepare_expression(const untyped_constant& uc, data_dictionary:
        return std::nullopt;
    }
    if (!is_assignable(untyped_constant_test_assignment(uc, db, keyspace, *receiver))) {
+      if (uc.partial_type != untyped_constant::type_class::null) {
        throw exceptions::invalid_request_exception(format("Invalid {} constant ({}) for \"{}\" of type {}",
            uc.partial_type, uc.raw_text, *receiver->name, receiver->type->as_cql3_type().to_string()));
+      } else {
+        throw exceptions::invalid_request_exception("Invalid null value for counter increment/decrement");
+      }
    }
+
+    if (uc.partial_type == untyped_constant::type_class::null) {
+        return constant::make_null(receiver->type);
+    }
+
    raw_value raw_val = cql3::raw_value::make_value(untyped_constant_parsed_value(uc, receiver->type));
    return constant(std::move(raw_val), receiver->type);
 }
@@ -687,38 +717,19 @@ bind_variable_test_assignment(const bind_variable& bv, data_dictionary::database
 }

 static
-bind_variable
+std::optional<bind_variable>
 bind_variable_prepare_expression(const bind_variable& bv, data_dictionary::database db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver)
 {   
+    if (!receiver) {
+        return std::nullopt;
+    }
+
    return bind_variable {
        .bind_index = bv.bind_index,
        .receiver = receiver
    };
 }

-static
-assignment_testable::test_result
-null_test_assignment(data_dictionary::database db,
-        const sstring& keyspace,
-        const column_specification& receiver) {
-    return receiver.type->is_counter()
-        ? assignment_testable::test_result::NOT_ASSIGNABLE
-        : assignment_testable::test_result::WEAKLY_ASSIGNABLE;
-}
-
-static
-std::optional<expression>
-null_prepare_expression(data_dictionary::database db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) {
-    if (!receiver) {
-        // TODO: It is not possible to infer the type of NULL, but perhaps we can have a matcing null_type that can be cast to anything
-        return std::nullopt;
-    }
-    if (!is_assignable(null_test_assignment(db, keyspace, *receiver))) {
-        throw exceptions::invalid_request_exception("Invalid null value for counter increment/decrement");
-    }
-    return constant::make_null(receiver->type);
-}
-
 static
 sstring
 cast_display_name(const cast& c) {
@@ -864,6 +875,53 @@ test_assignment_function_call(const cql3::expr::function_call& fc, data_dictiona
    }
 }

+std::optional<expression> prepare_conjunction(const conjunction& conj,
+                                              data_dictionary::database db,
+                                              const sstring& keyspace,
+                                              const schema* schema_opt,
+                                              lw_shared_ptr<column_specification> receiver) {
+    if (receiver.get() != nullptr && receiver->type->without_reversed().get_kind() != abstract_type::kind::boolean) {
+        throw exceptions::invalid_request_exception(
+            format("AND conjunction produces a boolean value, which doesn't match the type: {} of {}",
+                   receiver->type->name(), receiver->name->text()));
+    }
+
+    lw_shared_ptr<column_specification> child_receiver;
+    if (receiver.get() != nullptr) {
+        ::shared_ptr<column_identifier> child_receiver_name =
+            ::make_shared<column_identifier>(format("AND_element({})", receiver->name->text()), true);
+        child_receiver = make_lw_shared<column_specification>(receiver->ks_name, receiver->cf_name,
+                                                              std::move(child_receiver_name), boolean_type);
+    } else {
+        ::shared_ptr<column_identifier> child_receiver_name =
+            ::make_shared<column_identifier>("AND_element(unknown)", true);
+        sstring cf_name = schema_opt ? schema_opt->cf_name() : "unknown_cf";
+        child_receiver = make_lw_shared<column_specification>(keyspace, std::move(cf_name),
+                                                              std::move(child_receiver_name), boolean_type);
+    }
+
+    std::vector<expression> prepared_children;
+
+    bool all_terminal = true;
+    for (const expression& child : conj.children) {
+        std::optional<expression> prepared_child =
+            try_prepare_expression(child, db, keyspace, schema_opt, child_receiver);
+        if (!prepared_child.has_value()) {
+            throw exceptions::invalid_request_exception(fmt::format("Could not infer type of {}", child));
+        }
+        if (!is<constant>(*prepared_child)) {
+            all_terminal = false;
+        }
+        prepared_children.push_back(std::move(*prepared_child));
+    }
+
+    conjunction result = conjunction{std::move(prepared_children)};
+    if (all_terminal) {
+        return constant(evaluate(result, evaluation_inputs{}), boolean_type);
+    }
+    return result;
+}
+
 std::optional<expression>
 try_prepare_expression(const expression& expr, data_dictionary::database db, const sstring& keyspace, const schema* schema_opt, lw_shared_ptr<column_specification> receiver) {
    return expr::visit(overloaded_functor{
@@ -873,8 +931,8 @@ try_prepare_expression(const expression& expr, data_dictionary::database db, con
        [&] (const binary_operator&) -> std::optional<expression> {
            on_internal_error(expr_logger, "binary_operators are not yet reachable via prepare_expression()");
        },
-        [&] (const conjunction&) -> std::optional<expression> {
-            on_internal_error(expr_logger, "conjunctions are not yet reachable via prepare_expression()");
+        [&] (const conjunction& conj) -> std::optional<expression> {
+            return prepare_conjunction(conj, db, keyspace, schema_opt, receiver);
        },
        [] (const column_value& cv) -> std::optional<expression> {
            return cv;
@@ -945,9 +1003,6 @@ try_prepare_expression(const expression& expr, data_dictionary::database db, con
        [&] (const field_selection&) -> std::optional<expression> {
            on_internal_error(expr_logger, "field_selections are not yet reachable via prepare_expression()");
        },
-        [&] (const null&) -> std::optional<expression> {
-            return null_prepare_expression(db, keyspace, receiver);
-        },
        [&] (const bind_variable& bv) -> std::optional<expression> {
            return bind_variable_prepare_expression(bv, db, keyspace, receiver);
        },
@@ -1009,9 +1064,6 @@ test_assignment(const expression& expr, data_dictionary::database db, const sstr
        [&] (const field_selection&) -> test_result {
            on_internal_error(expr_logger, "field_selections are not yet reachable via test_assignment()");
        },
-        [&] (const null&) -> test_result {
-            return null_test_assignment(db, keyspace, receiver);
-        },
        [&] (const bind_variable& bv) -> test_result {
            return bind_variable_test_assignment(bv, db, keyspace, receiver);
        },
@@ -1138,7 +1190,7 @@ static lw_shared_ptr<column_specification> get_lhs_receiver(const expression& pr
 // Given type of LHS and the operation finds the expected type of RHS.
 // The type will be the same as LHS for simple operations like =, but it will be different for more complex ones like IN or CONTAINS.
 static lw_shared_ptr<column_specification> get_rhs_receiver(lw_shared_ptr<column_specification>& lhs_receiver, oper_t oper) {
-    const data_type& lhs_type = lhs_receiver->type->underlying_type();
+    const data_type lhs_type = lhs_receiver->type->underlying_type();

    if (oper == oper_t::IN) {
        data_type rhs_receiver_type = list_type_impl::get_instance(std::move(lhs_type), false);
--- a/cql3/expr/restrictions.cc
+++ b/cql3/expr/restrictions.cc
@@ -144,7 +144,7 @@ void preliminary_binop_vaidation_checks(const binary_operator& binop) {
    }

    if (binop.op == oper_t::IS_NOT) {
-        bool rhs_is_null = is<null>(binop.rhs)
+        bool rhs_is_null = (is<untyped_constant>(binop.rhs) && as<untyped_constant>(binop.rhs).partial_type == untyped_constant::type_class::null)
                           || (is<constant>(binop.rhs) && as<constant>(binop.rhs).is_null());
        if (!rhs_is_null) {
            throw exceptions::invalid_request_exception(format("Unsupported \"IS NOT\" relation: {}", pretty_binop_printer));
--- a/cql3/expr/unset.hh
+++ b/cql3/expr/unset.hh
@@ -0,0 +1,30 @@
+// Copyright (C) 2023-present ScyllaDB
+// SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
+
+#pragma once
+
+#include <optional>
+#include "expression.hh"
+
+namespace cql3 {
+
+class query_options;
+
+}
+
+namespace cql3::expr {
+
+// Some expression users can behave differently if the expression is a bind variable
+// and if that bind variable is unset. unset_bind_variable_guard encapsulates the two
+// conditions.
+class unset_bind_variable_guard {
+    // Disengaged if the operand is not exactly a single bind variable.
+    std::optional<bind_variable> _var;
+public:
+    explicit unset_bind_variable_guard(const expr::expression& operand);
+    explicit unset_bind_variable_guard(std::nullopt_t) {}
+    explicit unset_bind_variable_guard(const std::optional<expr::expression>& operand);
+    bool is_unset(const query_options& qo) const;
+};
+
+}
--- a/cql3/functions/aggregate_fcts.cc
+++ b/cql3/functions/aggregate_fcts.cc
@@ -12,7 +12,7 @@
 #include "types.hh"
 #include "types/tuple.hh"
 #include "cql3/functions/scalar_function.hh"
-#include "cql_serialization_format.hh"
+#include "cql3/util.hh"
 #include "utils/big_decimal.hh"
 #include "aggregate_fcts.hh"
 #include "user_aggregate.hh"
@@ -40,10 +40,10 @@ public:
    virtual void reset() override {
        _count = 0;
    }
-    virtual opt_bytes compute(cql_serialization_format sf) override {
+    virtual opt_bytes compute() override {
        return long_type->decompose(_count);
    }
-    virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
+    virtual void add_input(const std::vector<opt_bytes>& values) override {
        ++_count;
    }
    virtual void set_accumulator(const opt_bytes& acc) override {
@@ -56,7 +56,7 @@ public:
    virtual opt_bytes get_accumulator() const override {
        return long_type->decompose(_count);
    }
-    virtual void reduce(cql_serialization_format sf, const opt_bytes& acc) override {
+    virtual void reduce(const opt_bytes& acc) override {
        if (acc) {
            auto other = value_cast<int64_t>(long_type->deserialize(bytes_view(*acc)));
            _count += other;
@@ -189,13 +189,13 @@ public:
    virtual void reset() override {
        _acc = _initcond;
    }
-    virtual opt_bytes compute(cql_serialization_format sf) override {
-        return _finalfunc ? _finalfunc->execute(sf, std::vector<bytes_opt>{_acc}) : _acc;
+    virtual opt_bytes compute() override {
+        return _finalfunc ? _finalfunc->execute(std::vector<bytes_opt>{_acc}) : _acc;
    }
-    virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
+    virtual void add_input(const std::vector<opt_bytes>& values) override {
        std::vector<bytes_opt> args{_acc};
        args.insert(args.end(), values.begin(), values.end());
-        _acc = _sfunc->execute(sf, args);
+        _acc = _sfunc->execute(args);
    }
    virtual void set_accumulator(const opt_bytes& acc) override {
        _acc = acc;
@@ -203,9 +203,9 @@ public:
    virtual opt_bytes get_accumulator() const override {
        return _acc;
    }
-    virtual void reduce(cql_serialization_format sf, const opt_bytes& acc) override {
+    virtual void reduce(const opt_bytes& acc) override {
        std::vector<bytes_opt> args{_acc, acc};
-        _acc = _rfunc->execute(sf, args);
+        _acc = _rfunc->execute(args);
    }
 };

@@ -218,10 +218,10 @@ public:
    virtual void reset() override {
        _sum = {};
    }
-    virtual opt_bytes compute(cql_serialization_format sf) override {
+    virtual opt_bytes compute() override {
        return data_type_for<Type>()->decompose(accumulator_for<Type>::narrow(_sum));
    }
-    virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
+    virtual void add_input(const std::vector<opt_bytes>& values) override {
        if (!values[0]) {
            return;
        }
@@ -237,7 +237,7 @@ public:
    virtual opt_bytes get_accumulator() const override {
        return accumulator_for<Type>::decompose(_sum);
    }
-    virtual void reduce(cql_serialization_format sf, const opt_bytes& acc) override {
+    virtual void reduce(const opt_bytes& acc) override {
        if (acc) {
            auto other = accumulator_for<Type>::deserialize(acc);
            _sum += other;
@@ -248,7 +248,7 @@ public:
 template <typename Type>
 class impl_reducible_sum_function final : public impl_sum_function_for<Type> {
 public:
-    virtual bytes_opt compute(cql_serialization_format sf) override {
+    virtual bytes_opt compute() override {
        return this->get_accumulator();
    }
 };
@@ -316,14 +316,14 @@ public:
        _sum = {};
        _count = 0;
    }
-    virtual opt_bytes compute(cql_serialization_format sf) override {
+    virtual opt_bytes compute() override {
        Type ret{};
        if (_count) {
            ret = impl_div_for_avg<Type>::div(_sum, _count);
        }
        return data_type_for<Type>()->decompose(ret);
    }
-    virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
+    virtual void add_input(const std::vector<opt_bytes>& values) override {
        if (!values[0]) {
            return;
        }
@@ -348,7 +348,7 @@ public:
        );
        return tuple_val.serialize();
    }
-    virtual void reduce(cql_serialization_format sf, const opt_bytes& acc) override {
+    virtual void reduce(const opt_bytes& acc) override {
        if (acc) {
            data_type tuple_type = tuple_type_impl::get_instance({accumulator_for<Type>::data_type(), long_type});
            auto tuple = value_cast<tuple_type_impl::native_type>(tuple_type->deserialize(bytes_view(*acc)));
@@ -362,7 +362,7 @@ public:
 template <typename Type>
 class impl_reducible_avg_function : public impl_avg_function_for<Type> {
 public:
-    virtual bytes_opt compute(cql_serialization_format sf) override {
+    virtual bytes_opt compute() override {
        return this->get_accumulator();
    }
 };
@@ -457,13 +457,13 @@ public:
    virtual void reset() override {
        _max = {};
    }
-    virtual opt_bytes compute(cql_serialization_format sf) override {
+    virtual opt_bytes compute() override {
        if (!_max) {
            return {};
        }
        return data_type_for<Type>()->decompose(data_value(Type{*_max}));
    }
-    virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
+    virtual void add_input(const std::vector<opt_bytes>& values) override {
        if (!values[0]) {
            return;
        }
@@ -487,8 +487,8 @@ public:
        }
        return {};
    }
-    virtual void reduce(cql_serialization_format sf, const opt_bytes& acc) override {
-        return add_input(sf, {acc});
+    virtual void reduce(const opt_bytes& acc) override {
+        return add_input({acc});
    }
 };

@@ -502,10 +502,10 @@ public:
    virtual void reset() override {
        _max = {};
    }
-    virtual opt_bytes compute(cql_serialization_format sf) override {
+    virtual opt_bytes compute() override {
        return _max.value_or(bytes{});
    }
-    virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
+    virtual void add_input(const std::vector<opt_bytes>& values) override {
        if (values.empty() || !values[0]) {
            return;
        }
@@ -519,11 +519,11 @@ public:
    virtual opt_bytes get_accumulator() const override {
        return _max;
    }
-    virtual void reduce(cql_serialization_format sf, const opt_bytes& acc) override {
+    virtual void reduce(const opt_bytes& acc) override {
        if (acc && !acc->length()) {
            return;
        }
-        return add_input(sf, {acc});
+        return add_input({acc});
    }
 };

@@ -598,13 +598,13 @@ public:
    virtual void reset() override {
        _min = {};
    }
-    virtual opt_bytes compute(cql_serialization_format sf) override {
+    virtual opt_bytes compute() override {
        if (!_min) {
            return {};
        }
        return data_type_for<Type>()->decompose(data_value(Type{*_min}));
    }
-    virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
+    virtual void add_input(const std::vector<opt_bytes>& values) override {
        if (!values[0]) {
            return;
        }
@@ -628,8 +628,8 @@ public:
        }
        return {};
    }
-    virtual void reduce(cql_serialization_format sf, const opt_bytes& acc) override {
-        return add_input(sf, {acc});
+    virtual void reduce(const opt_bytes& acc) override {
+        return add_input({acc});
    }
 };

@@ -643,10 +643,10 @@ public:
    virtual void reset() override {
        _min = {};
    }
-    virtual opt_bytes compute(cql_serialization_format sf) override {
+    virtual opt_bytes compute() override {
        return _min.value_or(bytes{});
    }
-    virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
+    virtual void add_input(const std::vector<opt_bytes>& values) override {
        if (values.empty() || !values[0]) {
            return;
        }
@@ -660,11 +660,11 @@ public:
    virtual opt_bytes get_accumulator() const override {
        return _min;
    }
-    virtual void reduce(cql_serialization_format sf, const opt_bytes& acc) override {
+    virtual void reduce(const opt_bytes& acc) override {
        if (acc && !acc->length()) {
            return;
        }
-        return add_input(sf, {acc});
+        return add_input({acc});
    }
 };

@@ -720,10 +720,10 @@ public:
    virtual void reset() override {
        _count = 0;
    }
-    virtual opt_bytes compute(cql_serialization_format sf) override {
+    virtual opt_bytes compute() override {
        return long_type->decompose(_count);
    }
-    virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
+    virtual void add_input(const std::vector<opt_bytes>& values) override {
        if (!values[0]) {
            return;
        }
@@ -739,7 +739,7 @@ public:
    virtual opt_bytes get_accumulator() const override {
        return long_type->decompose(_count);
    }
-    virtual void reduce(cql_serialization_format sf, const opt_bytes& acc) override {
+    virtual void reduce(const opt_bytes& acc) override {
        if (acc) {
            auto other = value_cast<int64_t>(long_type->deserialize(bytes_view(*acc)));
            _count += other;
@@ -814,6 +814,35 @@ bool user_aggregate::is_reducible() const { return _reducefunc != nullptr; }
 bool user_aggregate::requires_thread() const { return _sfunc->requires_thread() || (_finalfunc && _finalfunc->requires_thread()); }
 bool user_aggregate::has_finalfunc() const { return _finalfunc != nullptr; }

+std::ostream& user_aggregate::describe(std::ostream& os) const {
+    auto ks = cql3::util::maybe_quote(name().keyspace);
+    auto na = cql3::util::maybe_quote(name().name);
+
+    os << "CREATE AGGREGATE " << ks << "." << na << "(";
+    for (size_t i = 0; i < _arg_types.size(); i++) {
+        if (i > 0) {
+            os << ", ";
+        }
+        os << _arg_types[i]->cql3_type_name();
+    }
+    os << ")\n";
+
+    os << "SFUNC " << cql3::util::maybe_quote(_sfunc->name().name) << "\n"
+       << "STYPE " << _sfunc->return_type()->cql3_type_name();
+    if (is_reducible()) {
+        os << "\n" << "REDUCEFUNC " << cql3::util::maybe_quote(_reducefunc->name().name);
+    }
+    if (has_finalfunc()) {
+        os << "\n" << "FINALFUNC " << cql3::util::maybe_quote(_finalfunc->name().name);
+    }
+    if (_initcond) {
+        os << "\n" << "INITCOND " << _sfunc->return_type()->deserialize(bytes_view(*_initcond)).to_parsable_string();
+    }
+    os << ";";
+
+    return os;
+}
+
 shared_ptr<aggregate_function>
 aggregate_fcts::make_count_rows_function() {
    return make_shared<count_rows_function>();
--- a/cql3/functions/as_json_function.hh
+++ b/cql3/functions/as_json_function.hh
@@ -18,7 +18,6 @@

 #include "bytes_ostream.hh"
 #include "types.hh"
-#include "cql_serialization_format.hh"

 #include <boost/algorithm/cxx11/any_of.hpp>

@@ -47,7 +46,7 @@ public:

    virtual bool requires_thread() const override;

-    virtual bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override {
+    virtual bytes_opt execute(const std::vector<bytes_opt>& parameters) override {
        bytes_ostream encoded_row;
        encoded_row.write("{", 1);
        for (size_t i = 0; i < _selector_names.size(); ++i) {
--- a/cql3/functions/bytes_conversion_fcts.hh
+++ b/cql3/functions/bytes_conversion_fcts.hh
@@ -14,7 +14,6 @@
 #include "exceptions/exceptions.hh"
 #include <seastar/core/print.hh>
 #include "cql3/cql3_type.hh"
-#include "cql_serialization_format.hh"

 namespace cql3 {

@@ -28,7 +27,7 @@ shared_ptr<function>
 make_to_blob_function(data_type from_type) {
    auto name = from_type->as_cql3_type().to_string() + "asblob";
    return make_native_scalar_function<true>(name, bytes_type, { from_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& parameters) {
+            [] (const std::vector<bytes_opt>& parameters) {
        return parameters[0];
    });
 }
@@ -38,13 +37,13 @@ shared_ptr<function>
 make_from_blob_function(data_type to_type) {
    sstring name = sstring("blobas") + to_type->as_cql3_type().to_string();
    return make_native_scalar_function<true>(name, to_type, { bytes_type },
-            [name, to_type] (cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
+            [name, to_type] (const std::vector<bytes_opt>& parameters) -> bytes_opt {
        auto&& val = parameters[0];
        if (!val) {
            return val;
        }
        try {
-            to_type->validate(*val, sf);
+            to_type->validate(*val);
            return val;
        } catch (marshal_exception& e) {
            using namespace exceptions;
@@ -58,7 +57,7 @@ inline
 shared_ptr<function>
 make_varchar_as_blob_fct() {
    return make_native_scalar_function<true>("varcharasblob", bytes_type, { utf8_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
+            [] (const std::vector<bytes_opt>& parameters) -> bytes_opt {
        return parameters[0];
    });
 }
@@ -67,7 +66,7 @@ inline
 shared_ptr<function>
 make_blob_as_varchar_fct() {
    return make_native_scalar_function<true>("blobasvarchar", utf8_type, { bytes_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
+            [] (const std::vector<bytes_opt>& parameters) -> bytes_opt {
        return parameters[0];
    });
 }
--- a/cql3/functions/castas_fcts.cc
+++ b/cql3/functions/castas_fcts.cc
@@ -35,7 +35,7 @@ public:
    virtual void print(std::ostream& os) const override {
        os << "cast(" << _arg_types[0]->name() << " as " << _return_type->name() << ")";
    }
-    virtual bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override {
+    virtual bytes_opt execute(const std::vector<bytes_opt>& parameters) override {
        auto from_type = arg_types()[0];
        auto to_type = return_type();

--- a/cql3/functions/error_injection_fcts.cc
+++ b/cql3/functions/error_injection_fcts.cc
@@ -40,8 +40,8 @@ public:
        return Pure;
    }

-    bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override {
-        return _func(sf, parameters);
+    bytes_opt execute(const std::vector<bytes_opt>& parameters) override {
+        return _func(parameters);
    }
 };

@@ -61,7 +61,7 @@ make_failure_injection_function(sstring name,

 shared_ptr<function> make_enable_injection_function() {
    return make_failure_injection_function<false>("enable_injection", empty_type, { ascii_type, ascii_type },
-            [] (cql_serialization_format, const std::vector<bytes_opt>& parameters) {
+            [] (const std::vector<bytes_opt>& parameters) {
        sstring injection_name = ascii_type->get_string(parameters[0].value());
        const bool one_shot = ascii_type->get_string(parameters[1].value()) == "true";
        smp::invoke_on_all([injection_name, one_shot] () mutable {
@@ -73,7 +73,7 @@ shared_ptr<function> make_enable_injection_function() {

 shared_ptr<function> make_disable_injection_function() {
    return make_failure_injection_function<false>("disable_injection", empty_type, { ascii_type },
-            [] (cql_serialization_format, const std::vector<bytes_opt>& parameters) {
+            [] (const std::vector<bytes_opt>& parameters) {
        sstring injection_name = ascii_type->get_string(parameters[0].value());
        smp::invoke_on_all([injection_name] () mutable {
            utils::get_local_injector().disable(injection_name);
@@ -85,7 +85,7 @@ shared_ptr<function> make_disable_injection_function() {
 shared_ptr<function> make_enabled_injections_function() {
    const auto list_type_inst = list_type_impl::get_instance(ascii_type, false);
    return make_failure_injection_function<true>("enabled_injections", list_type_inst, {},
-        [list_type_inst] (cql_serialization_format, const std::vector<bytes_opt>&) -> bytes {
+        [list_type_inst] (const std::vector<bytes_opt>&) -> bytes {
            return seastar::map_reduce(smp::all_cpus(), [] (unsigned) {
                return make_ready_future<std::vector<sstring>>(utils::get_local_injector().enabled_injections());
            }, std::vector<data_value>(),
--- a/cql3/functions/functions.cc
+++ b/cql3/functions/functions.cc
@@ -13,7 +13,10 @@
 #include "cql3/lists.hh"
 #include "cql3/constants.hh"
 #include "cql3/user_types.hh"
+#include "cql3/ut_name.hh"
 #include "cql3/type_json.hh"
+#include "cql3/functions/user_function.hh"
+#include "cql3/functions/user_aggregate.hh"
 #include "data_dictionary/data_dictionary.hh"
 #include "types/map.hh"
 #include "types/set.hh"
@@ -24,6 +27,7 @@
 #include "cql3/prepare_context.hh"
 #include "user_aggregate.hh"
 #include "cql3/expr/expression.hh"
+#include <boost/range/adaptor/transformed.hpp>
 #include <boost/range/adaptors.hpp>

 #include "error_injection_fcts.hh"
@@ -49,6 +53,13 @@ bool abstract_function::requires_thread() const { return false; }

 bool as_json_function::requires_thread() const { return false; }

+static bool same_signature(const shared_ptr<function>& f1, const shared_ptr<function>& f2) {
+    if (f1 == nullptr || f2 == nullptr) {
+        return false;
+    }
+    return f1->name() == f2->name() && f1->arg_types() == f2->arg_types();
+}
+
 thread_local std::unordered_multimap<function_name, shared_ptr<function>> functions::_declared = init();

 void functions::clear_functions() noexcept {
@@ -140,22 +151,56 @@ void functions::replace_function(shared_ptr<function> func) {
    with_udf_iter(func->name(), func->arg_types(), [func] (functions::declared_t::iterator i) {
        i->second = std::move(func);
    });
+    auto scalar_func = dynamic_pointer_cast<scalar_function>(func);
+    if (!scalar_func) {
+        return;
+    }
+    for (auto& fit : _declared) {
+        auto aggregate = dynamic_pointer_cast<user_aggregate>(fit.second);
+        if (aggregate && (same_signature(aggregate->sfunc(), scalar_func)
+            || (same_signature(aggregate->finalfunc(), scalar_func))
+            || (same_signature(aggregate->reducefunc(), scalar_func))))
+        {
+            // we need to replace at least one underlying function
+            shared_ptr<scalar_function> sfunc = same_signature(aggregate->sfunc(), scalar_func) ? scalar_func : aggregate->sfunc();
+            shared_ptr<scalar_function> finalfunc = same_signature(aggregate->finalfunc(), scalar_func) ? scalar_func : aggregate->finalfunc();
+            shared_ptr<scalar_function> reducefunc = same_signature(aggregate->reducefunc(), scalar_func) ? scalar_func : aggregate->reducefunc();
+            fit.second = ::make_shared<user_aggregate>(aggregate->name(), aggregate->initcond(), sfunc, reducefunc, finalfunc);
+        }
+    }
 }

 void functions::remove_function(const function_name& name, const std::vector<data_type>& arg_types) {
    with_udf_iter(name, arg_types, [] (functions::declared_t::iterator i) { _declared.erase(i); });
 }

-std::optional<function_name> functions::used_by_user_aggregate(const function_name& name) {
+std::optional<function_name> functions::used_by_user_aggregate(shared_ptr<user_function> func) {
    for (const shared_ptr<function>& fptr : _declared | boost::adaptors::map_values) {
        auto aggregate = dynamic_pointer_cast<user_aggregate>(fptr);
-        if (aggregate && (aggregate->sfunc().name() == name || (aggregate->has_finalfunc() && aggregate->finalfunc().name() == name))) {
+        if (aggregate && (same_signature(aggregate->sfunc(), func)
+            || (same_signature(aggregate->finalfunc(), func))
+            || (same_signature(aggregate->reducefunc(), func))))
+        {
            return aggregate->name();
        }
    }
    return {};
 }

+std::optional<function_name> functions::used_by_user_function(const ut_name& user_type) {
+    for (const shared_ptr<function>& fptr : _declared | boost::adaptors::map_values) {
+        for (auto& arg_type : fptr->arg_types()) {
+            if (arg_type->references_user_type(user_type.get_keyspace(), user_type.get_user_type_name())) {
+                return fptr->name();
+            }
+        }
+        if (fptr->return_type()->references_user_type(user_type.get_keyspace(), user_type.get_user_type_name())) {
+            return fptr->name();
+        }
+    }
+    return {};
+}
+
 lw_shared_ptr<column_specification>
 functions::make_arg_spec(const sstring& receiver_ks, const sstring& receiver_cf,
        const function& fun, size_t i) {
@@ -171,7 +216,7 @@ inline
 shared_ptr<function>
 make_to_json_function(data_type t) {
    return make_native_scalar_function<true>("tojson", utf8_type, {t},
-            [t](cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
+            [t](const std::vector<bytes_opt>& parameters) -> bytes_opt {
        return utf8_type->decompose(to_json_string(*t, parameters[0]));
    });
 }
@@ -180,12 +225,12 @@ inline
 shared_ptr<function>
 make_from_json_function(data_dictionary::database db, const sstring& keyspace, data_type t) {
    return make_native_scalar_function<true>("fromjson", t, {utf8_type},
-            [&db, keyspace, t](cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
+            [&db, keyspace, t](const std::vector<bytes_opt>& parameters) -> bytes_opt {
        try {
            rjson::value json_value = rjson::parse(utf8_type->to_string(parameters[0].value()));
            bytes_opt parsed_json_value;
            if (!json_value.IsNull()) {
-                parsed_json_value.emplace(from_json_object(*t, json_value, sf));
+                parsed_json_value.emplace(from_json_object(*t, json_value));
            }
            return parsed_json_value;
        } catch(rjson::error& e) {
@@ -382,6 +427,32 @@ functions::get(data_dictionary::database db,
    return std::move(compatibles[0]);
 }

+template<typename F>
+std::vector<shared_ptr<F>> functions::get_filtered_transformed(const sstring& keyspace) {
+    auto filter = [&] (const std::pair<const function_name, shared_ptr<function>>& d) -> bool {
+        return d.first.keyspace == keyspace && dynamic_cast<F*>(d.second.get());
+    };
+    auto transformer = [] (const std::pair<const function_name, shared_ptr<function>>& d) -> shared_ptr<F> {
+        return dynamic_pointer_cast<F>(d.second);
+    };
+    
+    return boost::copy_range<std::vector<shared_ptr<F>>>(
+        _declared 
+        | boost::adaptors::filtered(filter) 
+        | boost::adaptors::transformed(transformer)
+    );
+}
+
+std::vector<shared_ptr<user_function>>
+functions::get_user_functions(const sstring& keyspace) {
+    return get_filtered_transformed<user_function>(keyspace);
+}
+
+std::vector<shared_ptr<user_aggregate>>
+functions::get_user_aggregates(const sstring& keyspace) {
+    return get_filtered_transformed<user_aggregate>(keyspace);
+}
+
 boost::iterator_range<functions::declared_t::iterator>
 functions::find(const function_name& name) {
    assert(name.has_keyspace()); // : "function name not fully qualified";
--- a/cql3/functions/functions.hh
+++ b/cql3/functions/functions.hh
@@ -27,6 +27,10 @@
 namespace cql3 {

 namespace functions {
+//forward declarations
+    class user_function;
+    class user_aggregate;
+
    using declared_t = std::unordered_multimap<function_name, shared_ptr<function>>;
    void add_agg_functions(declared_t& funcs);

@@ -57,6 +61,8 @@ public:
        const std::vector<shared_ptr<assignment_testable>> args(std::begin(provided_args), std::end(provided_args));
        return get(db, keyspace, name, args, receiver_ks, receiver_cf, receiver);
    }
+    static std::vector<shared_ptr<user_function>> get_user_functions(const sstring& keyspace);
+    static std::vector<shared_ptr<user_aggregate>> get_user_aggregates(const sstring& keyspace);
    static boost::iterator_range<declared_t::iterator> find(const function_name& name);
    static declared_t::iterator find_iter(const function_name& name, const std::vector<data_type>& arg_types);
    static shared_ptr<function> find(const function_name& name, const std::vector<data_type>& arg_types);
@@ -65,11 +71,15 @@ public:
    static void add_function(shared_ptr<function>);
    static void replace_function(shared_ptr<function>);
    static void remove_function(const function_name& name, const std::vector<data_type>& arg_types);
-    static std::optional<function_name> used_by_user_aggregate(const function_name& name);
+    static std::optional<function_name> used_by_user_aggregate(shared_ptr<user_function>);
+    static std::optional<function_name> used_by_user_function(const ut_name& user_type);
 private:
    template <typename F>
    static void with_udf_iter(const function_name& name, const std::vector<data_type>& arg_types, F&& f);

+    template <typename F>
+    static std::vector<shared_ptr<F>> get_filtered_transformed(const sstring& keyspace);
+
    // This method and matchArguments are somewhat duplicate, but this method allows us to provide more precise errors in the common
    // case where there is no override for a given function. This is thus probably worth the minor code duplication.
    static void validate_types(data_dictionary::database db,
--- a/cql3/functions/native_scalar_function.hh
+++ b/cql3/functions/native_scalar_function.hh
@@ -12,7 +12,6 @@

 #include "native_function.hh"
 #include "scalar_function.hh"
-#include "cql_serialization_format.hh"
 #include "log.hh"
 #include <seastar/core/shared_ptr.hh>

@@ -48,9 +47,9 @@ public:
    virtual bool is_pure() const override {
        return Pure;
    }
-    virtual bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override {
+    virtual bytes_opt execute(const std::vector<bytes_opt>& parameters) override {
        try {
-            return _func(sf, parameters);
+            return _func(parameters);
        } catch(exceptions::cassandra_exception&) {
            // If the function's code took the time to produce an official
            // cassandra_exception, pass it through. Otherwise, below we will
--- a/cql3/functions/scalar_function.hh
+++ b/cql3/functions/scalar_function.hh
@@ -23,12 +23,11 @@ public:
    /**
     * Applies this function to the specified parameter.
     *
-     * @param protocolVersion protocol version used for parameters and return value
     * @param parameters the input parameters
     * @return the result of applying this function to the parameter
     * @throws InvalidRequestException if this function cannot not be applied to the parameter
     */
-    virtual bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) = 0;
+    virtual bytes_opt execute(const std::vector<bytes_opt>& parameters) = 0;
 };


--- a/cql3/functions/time_uuid_fcts.hh
+++ b/cql3/functions/time_uuid_fcts.hh
@@ -24,7 +24,7 @@ inline
 shared_ptr<function>
 make_now_fct() {
    return make_native_scalar_function<false>("now", timeuuid_type, {},
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (const std::vector<bytes_opt>& values) -> bytes_opt {
        return {to_bytes(utils::UUID_gen::get_time_UUID())};
    });
 }
@@ -42,7 +42,7 @@ inline
 shared_ptr<function>
 make_min_timeuuid_fct() {
    return make_native_scalar_function<true>("mintimeuuid", timeuuid_type, { timestamp_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (const std::vector<bytes_opt>& values) -> bytes_opt {
        auto& bb = values[0];
        if (!bb) {
            return {};
@@ -60,7 +60,7 @@ inline
 shared_ptr<function>
 make_max_timeuuid_fct() {
    return make_native_scalar_function<true>("maxtimeuuid", timeuuid_type, { timestamp_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (const std::vector<bytes_opt>& values) -> bytes_opt {
        auto& bb = values[0];
        if (!bb) {
            return {};
@@ -89,7 +89,7 @@ inline
 shared_ptr<function>
 make_date_of_fct() {
    return make_native_scalar_function<true>("dateof", timestamp_type, { timeuuid_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (const std::vector<bytes_opt>& values) -> bytes_opt {
        using namespace utils;
        auto& bb = values[0];
        if (!bb) {
@@ -104,7 +104,7 @@ inline
 shared_ptr<function>
 make_unix_timestamp_of_fct() {
    return make_native_scalar_function<true>("unixtimestampof", long_type, { timeuuid_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (const std::vector<bytes_opt>& values) -> bytes_opt {
        using namespace utils;
        auto& bb = values[0];
        if (!bb) {
@@ -117,7 +117,7 @@ make_unix_timestamp_of_fct() {
 inline shared_ptr<function>
 make_currenttimestamp_fct() {
    return make_native_scalar_function<false>("currenttimestamp", timestamp_type, {},
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (const std::vector<bytes_opt>& values) -> bytes_opt {
        return {timestamp_type->decompose(db_clock::now())};
    });
 }
@@ -125,7 +125,7 @@ make_currenttimestamp_fct() {
 inline shared_ptr<function>
 make_currenttime_fct() {
    return make_native_scalar_function<false>("currenttime", time_type, {},
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (const std::vector<bytes_opt>& values) -> bytes_opt {
        constexpr int64_t milliseconds_in_day = 3600 * 24 * 1000;
        int64_t milliseconds_since_epoch = std::chrono::duration_cast<std::chrono::milliseconds>(db_clock::now().time_since_epoch()).count();
        int64_t nanoseconds_today = (milliseconds_since_epoch % milliseconds_in_day) * 1000 * 1000;
@@ -136,7 +136,7 @@ make_currenttime_fct() {
 inline shared_ptr<function>
 make_currentdate_fct() {
    return make_native_scalar_function<false>("currentdate", simple_date_type, {},
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (const std::vector<bytes_opt>& values) -> bytes_opt {
        auto to_simple_date = get_castas_fctn(simple_date_type, timestamp_type);
        return {simple_date_type->decompose(to_simple_date(db_clock::now()))};
    });
@@ -146,7 +146,7 @@ inline
 shared_ptr<function>
 make_currenttimeuuid_fct() {
    return make_native_scalar_function<false>("currenttimeuuid", timeuuid_type, {},
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (const std::vector<bytes_opt>& values) -> bytes_opt {
        return {timeuuid_type->decompose(timeuuid_native_type{utils::UUID_gen::get_time_UUID()})};
    });
 }
@@ -155,7 +155,7 @@ inline
 shared_ptr<function>
 make_timeuuidtodate_fct() {
    return make_native_scalar_function<true>("todate", simple_date_type, { timeuuid_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (const std::vector<bytes_opt>& values) -> bytes_opt {
        using namespace utils;
        auto& bb = values[0];
        if (!bb) {
@@ -171,7 +171,7 @@ inline
 shared_ptr<function>
 make_timestamptodate_fct() {
    return make_native_scalar_function<true>("todate", simple_date_type, { timestamp_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (const std::vector<bytes_opt>& values) -> bytes_opt {
        using namespace utils;
        auto& bb = values[0];
        if (!bb) {
@@ -190,7 +190,7 @@ inline
 shared_ptr<function>
 make_timeuuidtotimestamp_fct() {
    return make_native_scalar_function<true>("totimestamp", timestamp_type, { timeuuid_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (const std::vector<bytes_opt>& values) -> bytes_opt {
        using namespace utils;
        auto& bb = values[0];
        if (!bb) {
@@ -205,7 +205,7 @@ inline
 shared_ptr<function>
 make_datetotimestamp_fct() {
    return make_native_scalar_function<true>("totimestamp", timestamp_type, { simple_date_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (const std::vector<bytes_opt>& values) -> bytes_opt {
        using namespace utils;
        auto& bb = values[0];
        if (!bb) {
@@ -224,7 +224,7 @@ inline
 shared_ptr<function>
 make_timeuuidtounixtimestamp_fct() {
    return make_native_scalar_function<true>("tounixtimestamp", long_type, { timeuuid_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (const std::vector<bytes_opt>& values) -> bytes_opt {
        using namespace utils;
        auto& bb = values[0];
        if (!bb) {
@@ -242,7 +242,7 @@ inline
 shared_ptr<function>
 make_timestamptounixtimestamp_fct() {
    return make_native_scalar_function<true>("tounixtimestamp", long_type, { timestamp_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (const std::vector<bytes_opt>& values) -> bytes_opt {
        using namespace utils;
        auto& bb = values[0];
        if (!bb) {
@@ -260,7 +260,7 @@ inline
 shared_ptr<function>
 make_datetounixtimestamp_fct() {
    return make_native_scalar_function<true>("tounixtimestamp", long_type, { simple_date_type },
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
+            [] (const std::vector<bytes_opt>& values) -> bytes_opt {
        using namespace utils;
        auto& bb = values[0];
        if (!bb) {
--- a/cql3/functions/token_fct.hh
+++ b/cql3/functions/token_fct.hh
@@ -31,7 +31,7 @@ public:
                    , _schema(s) {
    }

-    bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override {
+    bytes_opt execute(const std::vector<bytes_opt>& parameters) override {
        if (std::any_of(parameters.cbegin(), parameters.cend(), [](const auto& param){ return !param; })) {
            return std::nullopt;
        }
--- a/cql3/functions/user_aggregate.hh
+++ b/cql3/functions/user_aggregate.hh
@@ -11,11 +11,12 @@
 #include "abstract_function.hh"
 #include "scalar_function.hh"
 #include "aggregate_function.hh"
+#include "data_dictionary/keyspace_element.hh"

 namespace cql3 {
 namespace functions {

-class user_aggregate : public abstract_function, public aggregate_function{
+class user_aggregate : public abstract_function, public aggregate_function, public data_dictionary::keyspace_element {
    bytes_opt _initcond;
    ::shared_ptr<scalar_function> _sfunc;
    ::shared_ptr<scalar_function> _reducefunc;
@@ -31,14 +32,19 @@ public:
    virtual bool requires_thread() const override;
    bool has_finalfunc() const;

-    const scalar_function& sfunc() const {
-        return *_sfunc;
+    virtual sstring keypace_name() const override { return name().keyspace; }
+    virtual sstring element_name() const override { return name().name; }
+    virtual sstring element_type() const override { return "aggregate"; }
+    virtual std::ostream& describe(std::ostream& os) const override;
+
+    seastar::shared_ptr<scalar_function> sfunc() const {
+        return _sfunc;
    }
-    const scalar_function& reducefunc() const {
-        return *_reducefunc;
+    seastar::shared_ptr<scalar_function> reducefunc() const {
+        return _reducefunc;
    }
-    const scalar_function& finalfunc() const {
-        return *_finalfunc;
+    seastar::shared_ptr<scalar_function> finalfunc() const {
+        return _finalfunc;
    }
    const bytes_opt& initcond() const {
        return _initcond;
--- a/cql3/functions/user_function.cc
+++ b/cql3/functions/user_function.cc
@@ -7,8 +7,8 @@
 */

 #include "user_function.hh"
+#include "cql3/util.hh"
 #include "log.hh"
-#include "cql_serialization_format.hh"
 #include "lang/wasm.hh"

 #include <seastar/core/thread.hh>
@@ -32,7 +32,7 @@ bool user_function::is_aggregate() const { return false; }

 bool user_function::requires_thread() const { return true; }

-bytes_opt user_function::execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) {
+bytes_opt user_function::execute(const std::vector<bytes_opt>& parameters) {
    const auto& types = arg_types();
    if (parameters.size() != types.size()) {
        throw std::logic_error("Wrong number of parameters");
@@ -66,5 +66,33 @@ bytes_opt user_function::execute(cql_serialization_format sf, const std::vector<
        });
 }

+std::ostream& user_function::describe(std::ostream& os) const {
+    auto ks = cql3::util::maybe_quote(name().keyspace);
+    auto na = cql3::util::maybe_quote(name().name);
+
+    os << "CREATE FUNCTION " << ks << "." << na << "(";
+    for (size_t i = 0; i < _arg_names.size(); i++) {
+        if (i > 0) {
+            os << ", ";
+        }
+        os << _arg_names[i] << " " << _arg_types[i]->cql3_type_name();
+    }
+    os << ")\n";
+
+    if (_called_on_null_input) {
+        os << "CALLED";
+    } else {
+        os << "RETURNS NULL";
+    }
+    os << " ON NULL INPUT\n"
+       << "RETURNS " << _return_type->cql3_type_name() << "\n"
+       << "LANGUAGE " << _language << "\n"
+       << "AS $$\n"
+       << _body << "\n"
+       << "$$;";
+
+    return os;
+}
+
 }
 }
--- a/cql3/functions/user_function.hh
+++ b/cql3/functions/user_function.hh
@@ -14,18 +14,19 @@
 #include "scalar_function.hh"
 #include "lang/lua.hh"
 #include "lang/wasm.hh"
+#include "data_dictionary/keyspace_element.hh"

 namespace cql3 {
 namespace functions {


-class user_function final : public abstract_function, public scalar_function {
+class user_function final : public abstract_function, public scalar_function, public data_dictionary::keyspace_element {
 public:
    struct lua_context {
        sstring bitcode;
        // FIXME: We should not need a copy in each function. It is here
        // because user_function::execute is only passed the
-        // cql_serialization_format and the runtime arguments.  We could
+        // the runtime arguments.  We could
        // avoid it by having a runtime->execute(user_function) instead,
        // but that is a large refactoring. We could also store a
        // lua_runtime in a thread_local variable, but that is one extra
@@ -58,7 +59,12 @@ public:
    virtual bool is_native() const override;
    virtual bool is_aggregate() const override;
    virtual bool requires_thread() const override;
-    virtual bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override;
+    virtual bytes_opt execute(const std::vector<bytes_opt>& parameters) override;
+
+    virtual sstring keypace_name() const override { return name().keyspace; }
+    virtual sstring element_name() const override { return name().name; }
+    virtual sstring element_type() const override { return "function"; }
+    virtual std::ostream& describe(std::ostream& os) const override;
 };

 }
--- a/cql3/functions/uuid_fcts.hh
+++ b/cql3/functions/uuid_fcts.hh
@@ -22,7 +22,7 @@ inline
 shared_ptr<function>
 make_uuid_fct() {
    return make_native_scalar_function<false>("uuid", uuid_type, {},
-            [] (cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
+            [] (const std::vector<bytes_opt>& parameters) -> bytes_opt {
        return {uuid_type->decompose(utils::make_random_uuid())};
    });
 }
--- a/cql3/lists.cc
+++ b/cql3/lists.cc
@@ -37,9 +37,6 @@ lists::setter::execute(mutation& m, const clustering_key_prefix& prefix, const u

 void
 lists::setter::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params, const column_definition& column, const cql3::raw_value& value) {
-    if (value.is_unset_value()) {
-        return;
-    }
    if (column.type->is_multi_cell()) {
        // Delete all cells first, then append new ones
        collection_mutation_view_description mut;
@@ -70,13 +67,7 @@ lists::setter_by_index::execute(mutation& m, const clustering_key_prefix& prefix
    if (index.is_null()) {
        throw exceptions::invalid_request_exception("Invalid null value for list index");
    }
-    if (index.is_unset_value()) {
-        throw exceptions::invalid_request_exception("Invalid unset value for list index");
-    }
    auto value = expr::evaluate(*_e, params._options);
-    if (value.is_unset_value()) {
-        return;
-    }

    auto idx = index.view().deserialize<int32_t>(*int32_type);
    auto&& existing_list_opt = params.get_prefetched_list(m.key(), prefix, column);
@@ -122,10 +113,6 @@ lists::setter_by_uuid::execute(mutation& m, const clustering_key_prefix& prefix,
        throw exceptions::invalid_request_exception("Invalid null value for list index");
    }

-    if (index.is_unset_value()) {
-        throw exceptions::invalid_request_exception("Invalid unset value for list index");
-    }
-
    auto ltype = static_cast<const list_type_impl*>(column.type.get());

    collection_mutation_description mut;
@@ -145,9 +132,6 @@ lists::setter_by_uuid::execute(mutation& m, const clustering_key_prefix& prefix,
 void
 lists::appender::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
    const cql3::raw_value value = expr::evaluate(*_e, params._options);
-    if (value.is_unset_value()) {
-        return;
-    }
    assert(column.type->is_multi_cell()); // "Attempted to append to a frozen list";
    do_append(value, m, prefix, column, params);
 }
@@ -161,7 +145,7 @@ lists::do_append(const cql3::raw_value& list_value,
    if (column.type->is_multi_cell()) {
        // If we append null, do nothing. Note that for Setter, we've
        // already removed the previous value so we're good here too
-        if (list_value.is_null_or_unset()) {
+        if (list_value.is_null()) {
            return;
        }

@@ -199,7 +183,7 @@ void
 lists::prepender::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
    assert(column.type->is_multi_cell()); // "Attempted to prepend to a frozen list";
    cql3::raw_value lvalue = expr::evaluate(*_e, params._options);
-    if (lvalue.is_null_or_unset()) {
+    if (lvalue.is_null()) {
        return;
    }

@@ -265,7 +249,7 @@ lists::discarder::execute(mutation& m, const clustering_key_prefix& prefix, cons
        return;
    }

-    if (lvalue.is_null_or_unset()) {
+    if (lvalue.is_null()) {
        return;
    }

@@ -304,9 +288,6 @@ lists::discarder_by_index::execute(mutation& m, const clustering_key_prefix& pre
    if (index.is_null()) {
        throw exceptions::invalid_request_exception("Invalid null value for list index");
    }
-    if (index.is_unset_value()) {
-        return;
-    }

    auto&& existing_list_opt = params.get_prefetched_list(m.key(), prefix, column);
    int32_t idx = index.view().deserialize<int32_t>(*int32_type);
--- a/cql3/lists.hh
+++ b/cql3/lists.hh
@@ -27,21 +27,21 @@ public:
    static lw_shared_ptr<column_specification> value_spec_of(const column_specification&);
    static lw_shared_ptr<column_specification> uuid_index_spec_of(const column_specification&);
 public:
-    class setter : public operation {
+    class setter : public operation_skip_if_unset {
    public:
        setter(const column_definition& column, expr::expression e)
-                : operation(column, std::move(e)) {
+                : operation_skip_if_unset(column, std::move(e)) {
        }
        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
        static void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params, const column_definition& column, const cql3::raw_value& value);
    };

-    class setter_by_index : public operation {
+    class setter_by_index : public operation_skip_if_unset {
    protected:
        expr::expression _idx;
    public:
        setter_by_index(const column_definition& column, expr::expression idx, expr::expression e)
-            : operation(column, std::move(e)), _idx(std::move(idx)) {
+            : operation_skip_if_unset(column, std::move(e)), _idx(std::move(idx)) {
        }
        virtual bool requires_read() const override;
        virtual void fill_prepare_context(prepare_context& ctx) override;
@@ -57,9 +57,9 @@ public:
        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
    };

-    class appender : public operation {
+    class appender : public operation_skip_if_unset {
    public:
-        using operation::operation;
+        using operation_skip_if_unset::operation_skip_if_unset;
        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
    };

@@ -69,25 +69,25 @@ public:
            const column_definition& column,
            const update_parameters& params);

-    class prepender : public operation {
+    class prepender : public operation_skip_if_unset {
    public:
-        using operation::operation;
+        using operation_skip_if_unset::operation_skip_if_unset;
        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
    };

-    class discarder : public operation {
+    class discarder : public operation_skip_if_unset {
    public:
        discarder(const column_definition& column, expr::expression e)
-                : operation(column, std::move(e)) {
+                : operation_skip_if_unset(column, std::move(e)) {
        }
        virtual bool requires_read() const override;
        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
    };

-    class discarder_by_index : public operation {
+    class discarder_by_index : public operation_skip_if_unset {
    public:
        discarder_by_index(const column_definition& column, expr::expression idx)
-                : operation(column, std::move(idx)) {
+                : operation_skip_if_unset(column, std::move(idx)) {
        }
        virtual bool requires_read() const override;
        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
--- a/cql3/maps.cc
+++ b/cql3/maps.cc
@@ -26,9 +26,6 @@ maps::setter::execute(mutation& m, const clustering_key_prefix& row_key, const u

 void
 maps::setter::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params, const column_definition& column, const cql3::raw_value& value) {
-    if (value.is_unset_value()) {
-        return;
-    }
    if (column.type->is_multi_cell()) {
        // Delete all cells first, then put new ones
        collection_mutation_description mut;
@@ -50,12 +47,6 @@ maps::setter_by_key::execute(mutation& m, const clustering_key_prefix& prefix, c
    assert(column.type->is_multi_cell()); // "Attempted to set a value for a single key on a frozen map"m
    auto key = expr::evaluate(_k, params._options);
    auto value = expr::evaluate(*_e, params._options);
-    if (value.is_unset_value()) {
-        return;
-    }
-    if (key.is_unset_value()) {
-        throw invalid_request_exception("Invalid unset map key");
-    }
    if (key.is_null()) {
        throw invalid_request_exception("Invalid null map key");
    }
@@ -73,9 +64,7 @@ void
 maps::putter::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
    assert(column.type->is_multi_cell()); // "Attempted to add items to a frozen map";
    cql3::raw_value value = expr::evaluate(*_e, params._options);
-    if (!value.is_unset_value()) {
-        do_put(m, prefix, params, value, column);
-    }
+    do_put(m, prefix, params, value, column);
 }

 void
@@ -111,9 +100,6 @@ maps::discarder_by_key::execute(mutation& m, const clustering_key_prefix& prefix
    if (key.is_null()) {
        throw exceptions::invalid_request_exception("Invalid null map key");
    }
-    if (key.is_unset_value()) {
-        throw exceptions::invalid_request_exception("Invalid unset map key");
-    }
    collection_mutation_description mut;
    mut.cells.emplace_back(std::move(key).to_bytes(), params.make_dead_cell());

--- a/cql3/maps.hh
+++ b/cql3/maps.hh
@@ -27,30 +27,30 @@ public:
    static lw_shared_ptr<column_specification> key_spec_of(const column_specification& column);
    static lw_shared_ptr<column_specification> value_spec_of(const column_specification& column);

-    class setter : public operation {
+    class setter : public operation_skip_if_unset {
    public:
        setter(const column_definition& column, expr::expression e)
-                : operation(column, std::move(e)) {
+                : operation_skip_if_unset(column, std::move(e)) {
        }

        virtual void execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) override;
        static void execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params, const column_definition& column, const cql3::raw_value& value);
    };

-    class setter_by_key : public operation {
+    class setter_by_key : public operation_skip_if_unset {
        expr::expression _k;
    public:
        setter_by_key(const column_definition& column, expr::expression k, expr::expression e)
-            : operation(column, std::move(e)), _k(std::move(k)) {
+            : operation_skip_if_unset(column, std::move(e)), _k(std::move(k)) {
        }
        virtual void fill_prepare_context(prepare_context& ctx) override;
        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
    };

-    class putter : public operation {
+    class putter : public operation_skip_if_unset {
    public:
        putter(const column_definition& column, expr::expression e)
-            : operation(column, std::move(e)) {
+            : operation_skip_if_unset(column, std::move(e)) {
        }
        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
    };
@@ -58,10 +58,10 @@ public:
    static void do_put(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params,
            const cql3::raw_value& value, const column_definition& column);

-    class discarder_by_key : public operation {
+    class discarder_by_key : public operation_no_unset_support {
    public:
        discarder_by_key(const column_definition& column, expr::expression k)
-                : operation(column, std::move(k)) {
+                : operation_no_unset_support(column, std::move(k)) {
        }
        virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
    };
--- a/cql3/operation.cc
+++ b/cql3/operation.cc
@@ -268,9 +268,9 @@ operation::set_counter_value_from_tuple_list::prepare(data_dictionary::database
    auto v = prepare_expression(_value, db, keyspace, nullptr, spec);

    // Will not be used elsewhere, so make it local.
-    class counter_setter : public operation {
+    class counter_setter : public operation_no_unset_support {
    public:
-        using operation::operation;
+        using operation_no_unset_support::operation_no_unset_support;

        bool is_raw_counter_shard_write() const override {
            return true;
--- a/cql3/operation.hh
+++ b/cql3/operation.hh
@@ -17,6 +17,7 @@
 #include "update_parameters.hh"
 #include "cql3/column_identifier.hh"
 #include "cql3/expr/expression.hh"
+#include "cql3/expr/unset.hh"

 #include <optional>

@@ -54,10 +55,13 @@ protected:
    // may require none of more than one expression, but most need 1 so it simplify things a bit.
    std::optional<expr::expression> _e;

+    // A guard to check if the operation should be skipped due to unset operand.
+    expr::unset_bind_variable_guard _unset_guard;
 public:
-    operation(const column_definition& column_, std::optional<expr::expression> e)
+    operation(const column_definition& column_, std::optional<expr::expression> e, expr::unset_bind_variable_guard ubvg)
        : column{column_}
        , _e(std::move(e))
+        , _unset_guard(std::move(ubvg))
    { }

    virtual ~operation() {}
@@ -87,10 +91,14 @@ public:
    }

    /**
-     * Execute the operation.
+     * Execute the operation. Check should_skip_operation() first.
     */
    virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) = 0;
-    
+
+    bool should_skip_operation(const query_options& qo) const {
+        return _unset_guard.is_unset(qo);
+    }
+
    virtual void prepare_for_broadcast_tables(statements::broadcast_tables::prepared_update&) const;

    /**
@@ -265,4 +273,18 @@ public:
    };
 };

+class operation_skip_if_unset : public operation {
+public:
+    operation_skip_if_unset(const column_definition& column, expr::expression e)
+            : operation(column, e, expr::unset_bind_variable_guard(e)) {
+    }
+};
+
+class operation_no_unset_support : public operation {
+public:
+    operation_no_unset_support(const column_definition& column, std::optional<expr::expression> e)
+            : operation(column, std::move(e), expr::unset_bind_variable_guard(std::nullopt)) {
+    }
+};
+
 }
--- a/cql3/query_options.cc
+++ b/cql3/query_options.cc
@@ -23,42 +23,43 @@ thread_local const query_options::specific_options query_options::specific_optio

 thread_local query_options query_options::DEFAULT{default_cql_config,
    db::consistency_level::ONE, std::nullopt,
-    std::vector<cql3::raw_value_view>(), false, query_options::specific_options::DEFAULT, cql_serialization_format::latest()};
+    std::vector<cql3::raw_value_view>(), false, query_options::specific_options::DEFAULT};

 query_options::query_options(const cql_config& cfg,
                           db::consistency_level consistency,
                           std::optional<std::vector<sstring_view>> names,
                           std::vector<cql3::raw_value> values,
                           std::vector<cql3::raw_value_view> value_views,
+                           cql3::unset_bind_variable_vector unset,
                           bool skip_metadata,
-                           specific_options options,
-                           cql_serialization_format sf)
+                           specific_options options
+                           )
   : _cql_config(cfg)
   , _consistency(consistency)
   , _names(std::move(names))
   , _values(std::move(values))
   , _value_views(value_views)
+   , _unset(unset)
   , _skip_metadata(skip_metadata)
   , _options(std::move(options))
-   , _cql_serialization_format(sf)
 {
 }

 query_options::query_options(const cql_config& cfg,
                             db::consistency_level consistency,
                             std::optional<std::vector<sstring_view>> names,
-                             std::vector<cql3::raw_value> values,
+                             cql3::raw_value_vector_with_unset values,
                             bool skip_metadata,
-                             specific_options options,
-                             cql_serialization_format sf)
+                             specific_options options
+                             )
    : _cql_config(cfg)
    , _consistency(consistency)
    , _names(std::move(names))
-    , _values(std::move(values))
+    , _values(std::move(values.values))
    , _value_views()
+    , _unset(std::move(values.unset))
    , _skip_metadata(skip_metadata)
    , _options(std::move(options))
-    , _cql_serialization_format(sf)
 {
    fill_value_views();
 }
@@ -66,22 +67,22 @@ query_options::query_options(const cql_config& cfg,
 query_options::query_options(const cql_config& cfg,
                             db::consistency_level consistency,
                             std::optional<std::vector<sstring_view>> names,
-                             std::vector<cql3::raw_value_view> value_views,
+                             cql3::raw_value_view_vector_with_unset value_views,
                             bool skip_metadata,
-                             specific_options options,
-                             cql_serialization_format sf)
+                             specific_options options
+                             )
    : _cql_config(cfg)
    , _consistency(consistency)
    , _names(std::move(names))
    , _values()
-    , _value_views(std::move(value_views))
+    , _value_views(std::move(value_views.values))
+    , _unset(std::move(value_views.unset))
    , _skip_metadata(skip_metadata)
    , _options(std::move(options))
-    , _cql_serialization_format(sf)
 {
 }

-query_options::query_options(db::consistency_level cl, std::vector<cql3::raw_value> values,
+query_options::query_options(db::consistency_level cl, cql3::raw_value_vector_with_unset values,
        specific_options options)
    : query_options(
          default_cql_config,
@@ -89,8 +90,7 @@ query_options::query_options(db::consistency_level cl, std::vector<cql3::raw_val
          {},
          std::move(values),
          false,
-          std::move(options),
-          cql_serialization_format::latest()
+          std::move(options)
      )
 {
 }
@@ -101,9 +101,9 @@ query_options::query_options(std::unique_ptr<query_options> qo, lw_shared_ptr<se
        std::move(qo->_names),
        std::move(qo->_values),
        std::move(qo->_value_views),
+        std::move(qo->_unset),
        qo->_skip_metadata,
-        query_options::specific_options{qo->_options.page_size, paging_state, qo->_options.serial_consistency, qo->_options.timestamp},
-        qo->_cql_serialization_format) {
+        query_options::specific_options{qo->_options.page_size, paging_state, qo->_options.serial_consistency, qo->_options.timestamp}) {

 }

@@ -113,13 +113,13 @@ query_options::query_options(std::unique_ptr<query_options> qo, lw_shared_ptr<se
        std::move(qo->_names),
        std::move(qo->_values),
        std::move(qo->_value_views),
+        std::move(qo->_unset),
        qo->_skip_metadata,
-        query_options::specific_options{page_size, paging_state, qo->_options.serial_consistency, qo->_options.timestamp},
-        qo->_cql_serialization_format) {
+        query_options::specific_options{page_size, paging_state, qo->_options.serial_consistency, qo->_options.timestamp}) {

 }

-query_options::query_options(std::vector<cql3::raw_value> values)
+query_options::query_options(cql3::raw_value_vector_with_unset values)
    : query_options(
          db::consistency_level::ONE, std::move(values))
 {}
@@ -135,12 +135,21 @@ void query_options::prepare(const std::vector<lw_shared_ptr<column_specification
    ordered_values.reserve(specs.size());
    for (auto&& spec : specs) {
        auto& spec_name = spec->name->text();
+        bool found_value_for_name = false;
        for (size_t j = 0; j < names.size(); j++) {
            if (names[j] == spec_name) {
                ordered_values.emplace_back(_value_views[j]);
+                found_value_for_name = true;
                break;
            }
        }
+
+        // No bound value was found with the name `spec_name`.
+        // This means that the user forgot to include a bound value with such name.
+        if (!found_value_for_name) {
+            throw exceptions::invalid_request_exception(
+                format("Missing value for bind marker with name: {}", spec_name));
+        }
    }
    _value_views = std::move(ordered_values);
 }
--- a/cql3/query_options.hh
+++ b/cql3/query_options.hh
@@ -11,13 +11,14 @@
 #pragma once

 #include <concepts>
+#include <initializer_list>
 #include "timestamp.hh"
 #include "bytes.hh"
 #include "db/consistency_level_type.hh"
 #include "service/query_state.hh"
 #include "service/pager/paging_state.hh"
 #include "cql3/values.hh"
-#include "cql_serialization_format.hh"
+#include "utils/small_vector.hh"

 namespace cql3 {

@@ -28,6 +29,38 @@ class column_specification;

 using computed_function_values = std::unordered_map<uint8_t, bytes_opt>;

+using unset_bind_variable_vector = utils::small_vector<bool, 16>;
+
+// Matches a raw_value_view with an unset vector to support CQL binary protocol
+// "unset" values.
+struct raw_value_view_vector_with_unset {
+    std::vector<raw_value_view> values;
+    unset_bind_variable_vector unset;
+
+    raw_value_view_vector_with_unset(std::vector<raw_value_view> values_, unset_bind_variable_vector unset_) : values(std::move(values_)), unset(std::move(unset_)) {}
+    // Constructor with no unset support, for tests and internal queries
+    raw_value_view_vector_with_unset(std::vector<raw_value_view> values_) : values(std::move(values_)) {
+        unset.resize(values.size());
+    }
+    raw_value_view_vector_with_unset() = default;
+};
+
+// Matches a raw_value with an unset vector to support CQL binary protocol
+// "unset" values.
+struct raw_value_vector_with_unset {
+    std::vector<raw_value> values;
+    unset_bind_variable_vector unset;
+
+    raw_value_vector_with_unset(std::vector<raw_value> values_, unset_bind_variable_vector unset_) : values(std::move(values_)), unset(std::move(unset_)) {}
+    // Constructor with no unset support, for tests and internal queries
+    raw_value_vector_with_unset(std::vector<raw_value> values_) : values(std::move(values_)) {
+        unset.resize(values.size());
+    }
+    // Mostly for testing.
+    raw_value_vector_with_unset(std::initializer_list<raw_value> values_) : raw_value_vector_with_unset(std::vector(values_)) {}
+    raw_value_vector_with_unset() = default;
+};
+
 /**
 * Options for a query.
 */
@@ -48,9 +81,9 @@ private:
    const std::optional<std::vector<sstring_view>> _names;
    std::vector<cql3::raw_value> _values;
    std::vector<cql3::raw_value_view> _value_views;
+    unset_bind_variable_vector _unset;
    const bool _skip_metadata;
    const specific_options _options;
-    cql_serialization_format _cql_serialization_format;
    std::optional<std::vector<query_options>> _batch_options;
    // We must use the same microsecond-precision timestamp for
    // all cells created by an LWT statement or when a statement
@@ -83,23 +116,10 @@ private:
    // evaluation sites and we only have a const reference to `query_options`.
    mutable computed_function_values _cached_pk_fn_calls;
 private:
-    /**
-     * @brief Batch query_options constructor.
-     *
-     * Requirements:
-     *   - @tparam OneMutationDataRange has a begin() and end() iterators.
-     *   - The values of @tparam OneMutationDataRange are of either raw_value_view or raw_value types.
-     *
-     * @param o Base query_options object. query_options objects for each statement in the batch will derive the values from it.
-     * @param values_ranges a vector of values ranges for each statement in the batch.
-     */
-    template<typename OneMutationDataRange>
-    requires requires (OneMutationDataRange range) {
-         std::begin(range);
-         std::end(range);
-    } && ( requires (OneMutationDataRange range) { { *range.begin() } -> std::convertible_to<raw_value_view>; } ||
-           requires (OneMutationDataRange range) { { *range.begin() } -> std::convertible_to<raw_value>; } )
-    explicit query_options(query_options&& o, std::vector<OneMutationDataRange> values_ranges);
+    // Batch constructor.
+    template <typename Values>
+    requires std::same_as<Values, raw_value_vector_with_unset> || std::same_as<Values, raw_value_view_vector_with_unset>
+    explicit query_options(query_options&& o, std::vector<Values> values_ranges);

 public:
    query_options(query_options&&) = default;
@@ -108,43 +128,30 @@ public:
    explicit query_options(const cql_config& cfg,
                           db::consistency_level consistency,
                           std::optional<std::vector<sstring_view>> names,
-                           std::vector<cql3::raw_value> values,
+                           raw_value_vector_with_unset values,
                           bool skip_metadata,
-                           specific_options options,
-                           cql_serialization_format sf);
+                           specific_options options
+                           );
    explicit query_options(const cql_config& cfg,
                           db::consistency_level consistency,
                           std::optional<std::vector<sstring_view>> names,
                           std::vector<cql3::raw_value> values,
                           std::vector<cql3::raw_value_view> value_views,
+                           unset_bind_variable_vector unset,
                           bool skip_metadata,
-                           specific_options options,
-                           cql_serialization_format sf);
+                           specific_options options
+                           );
    explicit query_options(const cql_config& cfg,
                           db::consistency_level consistency,
                           std::optional<std::vector<sstring_view>> names,
-                           std::vector<cql3::raw_value_view> value_views,
+                           raw_value_view_vector_with_unset value_views,
                           bool skip_metadata,
-                           specific_options options,
-                           cql_serialization_format sf);
+                           specific_options options
+                           );

-    /**
-     * @brief Batch query_options factory.
-     *
-     * Requirements:
-     *   - @tparam OneMutationDataRange has a begin() and end() iterators.
-     *   - The values of @tparam OneMutationDataRange are of either raw_value_view or raw_value types.
-     *
-     * @param o Base query_options object. query_options objects for each statement in the batch will derive the values from it.
-     * @param values_ranges a vector of values ranges for each statement in the batch.
-     */
-    template<typename OneMutationDataRange>
-    requires requires (OneMutationDataRange range) {
-         std::begin(range);
-         std::end(range);
-    } && ( requires (OneMutationDataRange range) { { *range.begin() } -> std::convertible_to<raw_value_view>; } ||
-           requires (OneMutationDataRange range) { { *range.begin() } -> std::convertible_to<raw_value>; } )
-    static query_options make_batch_options(query_options&& o, std::vector<OneMutationDataRange> values_ranges) {
+    template <typename Values>
+    requires std::same_as<Values, raw_value_vector_with_unset> || std::same_as<Values, raw_value_view_vector_with_unset>
+    static query_options make_batch_options(query_options&& o, std::vector<Values> values_ranges) {
        return query_options(std::move(o), std::move(values_ranges));
    }

@@ -152,8 +159,8 @@ public:
    static thread_local query_options DEFAULT;

    // forInternalUse
-    explicit query_options(std::vector<cql3::raw_value> values);
-    explicit query_options(db::consistency_level, std::vector<cql3::raw_value> values, specific_options options = specific_options::DEFAULT);
+    explicit query_options(raw_value_vector_with_unset values);
+    explicit query_options(db::consistency_level, raw_value_vector_with_unset values, specific_options options = specific_options::DEFAULT);
    explicit query_options(std::unique_ptr<query_options>, lw_shared_ptr<service::pager::paging_state> paging_state);
    explicit query_options(std::unique_ptr<query_options>, lw_shared_ptr<service::pager::paging_state> paging_state, int32_t page_size);

@@ -162,7 +169,14 @@ public:
    }

    cql3::raw_value_view get_value_at(size_t idx) const {
-        return _value_views.at(idx);
+        if (_unset.at(idx)) {
+            throw exceptions::invalid_request_exception(fmt::format("Unexpected unset value for bind variable {}", idx));
+        }
+        return _value_views[idx];
+    }
+
+    bool is_unset(size_t idx) const {
+        return _unset.at(idx);
    }

    size_t get_values_count() const {
@@ -195,18 +209,6 @@ public:
        return tstamp != api::missing_timestamp ? tstamp : state.get_timestamp();
    }

-    /**
-     * The protocol version for the query. Will be 3 if the object don't come from
-     * a native protocol request (i.e. it's been allocated locally or by CQL-over-thrift).
-     */
-    int get_protocol_version() const {
-        return _cql_serialization_format.protocol_version();
-    }
-
-    cql_serialization_format get_cql_serialization_format() const {
-        return _cql_serialization_format;
-    }
-
    const query_options::specific_options& get_specific_options() const {
        return _options;
    }
@@ -278,19 +280,15 @@ private:
    void fill_value_views();
 };

-template<typename OneMutationDataRange>
-requires requires (OneMutationDataRange range) {
-     std::begin(range);
-     std::end(range);
-} && ( requires (OneMutationDataRange range) { { *range.begin() } -> std::convertible_to<raw_value_view>; } ||
-       requires (OneMutationDataRange range) { { *range.begin() } -> std::convertible_to<raw_value>; } )
-query_options::query_options(query_options&& o, std::vector<OneMutationDataRange> values_ranges)
+template <typename Values>
+requires std::same_as<Values, raw_value_vector_with_unset> || std::same_as<Values, raw_value_view_vector_with_unset>
+query_options::query_options(query_options&& o, std::vector<Values> values_ranges)
    : query_options(std::move(o))
 {
    std::vector<query_options> tmp;
    tmp.reserve(values_ranges.size());
    std::transform(values_ranges.begin(), values_ranges.end(), std::back_inserter(tmp), [this](auto& values_range) {
-        return query_options(_cql_config, _consistency, {}, std::move(values_range), _skip_metadata, _options, _cql_serialization_format);
+        return query_options(_cql_config, _consistency, {}, std::move(values_range), _skip_metadata, _options);
    });
    _batch_options = std::move(tmp);
 }
--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -22,6 +22,7 @@
 #include "db/config.hh"
 #include "data_dictionary/data_dictionary.hh"
 #include "hashers.hh"
+#include "utils/error_injection.hh"

 namespace cql3 {

@@ -600,6 +601,14 @@ query_processor::get_statement(const sstring_view& query, const service::client_
 std::unique_ptr<raw::parsed_statement>
 query_processor::parse_statement(const sstring_view& query) {
    try {
+        {
+            const char* error_injection_key = "query_processor-parse_statement-test_failure";
+            utils::get_local_injector().inject(error_injection_key, [&]() {
+                if (query.find(error_injection_key) != sstring_view::npos) {
+                    throw std::runtime_error(error_injection_key);
+                }
+            });
+        }
        auto statement = util::do_with_parser(query,  std::mem_fn(&cql3_parser::CqlParser::query));
        if (!statement) {
            throw exceptions::syntax_exception("Parsing failed");
--- a/cql3/restrictions/statement_restrictions.cc
+++ b/cql3/restrictions/statement_restrictions.cc
@@ -153,10 +153,6 @@ static std::vector<expr::expression> extract_partition_range(
            on_internal_error(rlogger, "extract_partition_range(field_selection)");
        }

-        void operator()(const null&) {
-            on_internal_error(rlogger, "extract_partition_range(null)");
-        }
-
        void operator()(const bind_variable&) {
            on_internal_error(rlogger, "extract_partition_range(bind_variable)");
        }
@@ -278,10 +274,6 @@ static std::vector<expr::expression> extract_clustering_prefix_restrictions(
            on_internal_error(rlogger, "extract_clustering_prefix_restrictions(field_selection)");
        }

-        void operator()(const null&) {
-            on_internal_error(rlogger, "extract_clustering_prefix_restrictions(null)");
-        }
-
        void operator()(const bind_variable&) {
            on_internal_error(rlogger, "extract_clustering_prefix_restrictions(bind_variable)");
        }
@@ -777,6 +769,19 @@ bool statement_restrictions::has_unrestricted_clustering_columns() const {
    return clustering_columns_restrictions_size() < _schema->clustering_key_size();
 }

+const column_definition& statement_restrictions::unrestricted_column(column_kind kind) const {
+    const auto& restrictions = get_restrictions(kind);
+    const auto sorted_cols = expr::get_sorted_column_defs(restrictions);
+    for (size_t i = 0, count = _schema->columns_count(kind); i < count; ++i) {
+        if (i >= sorted_cols.size() || sorted_cols[i]->component_index() != i) {
+            return _schema->column_at(kind, i);
+        }
+    }
+    on_internal_error(rlogger, format(
+            "no missing columns with kind {} found in expression {}",
+            to_sstring(kind), restrictions));
+};
+
 bool statement_restrictions::clustering_columns_restrictions_have_supporting_index(
        const secondary_index::secondary_index_manager& index_manager,
        expr::allow_local_index allow_local) const {
@@ -1226,10 +1231,6 @@ struct multi_column_range_accumulator {
        on_internal_error(rlogger, "field selection encountered outside binary operator");
    }

-    void operator()(const null&) {
-        on_internal_error(rlogger, "null encountered outside binary operator");
-    }
-
    void operator()(const bind_variable&) {
        on_internal_error(rlogger, "bind variable encountered outside binary operator");
    }
@@ -1787,7 +1788,7 @@ void statement_restrictions::prepare_indexed_global(const schema& idx_tbl_schema
            oper_t::EQ,
            // TODO: This should be a unique marker whose value we set at execution time.  There is currently no
            // handy mechanism for doing that in query_options.
-            expr::constant::make_unset_value(token_column->type));
+            expr::constant::make_null(token_column->type));
 }

 void statement_restrictions::prepare_indexed_local(const schema& idx_tbl_schema) {
@@ -1929,15 +1930,28 @@ sstring statement_restrictions::to_string() const {
    return _where ? expr::to_string(*_where) : "";
 }

-static bool has_eq_null(const query_options& options, const expression& expr) {
-    return find_binop(expr, [&] (const binary_operator& binop) {
-        return binop.op == oper_t::EQ && evaluate(binop.rhs, options).is_null();
-    });
+static void validate_primary_key_restrictions(const query_options& options, const std::vector<expr::expression>& restrictions) {
+    for (const auto& r: restrictions) {
+        for_each_expression<binary_operator>(r, [&](const binary_operator& binop) {
+            if (binop.op != oper_t::EQ && binop.op != oper_t::IN) {
+                return;
+            }
+            const auto* c = as_if<column_value>(&binop.lhs);
+            if (!c) {
+                return;
+            }
+            if (evaluate(binop.rhs, options).is_null()) {
+                throw exceptions::invalid_request_exception(format("Invalid null value in condition for column {}",
+                    c->col->name_as_text()));
+            }
+        });
+    }
 }

-bool statement_restrictions::range_or_slice_eq_null(const query_options& options) const {
-    return boost::algorithm::any_of(_partition_range_restrictions, std::bind_front(has_eq_null, std::cref(options)))
-            || boost::algorithm::any_of(_clustering_prefix_restrictions, std::bind_front(has_eq_null, std::cref(options)));
+void statement_restrictions::validate_primary_key(const query_options& options) const {
+    validate_primary_key_restrictions(options, _partition_range_restrictions);
+    validate_primary_key_restrictions(options, _clustering_prefix_restrictions);
 }
+
 } // namespace restrictions
 } // namespace cql3
--- a/cql3/restrictions/statement_restrictions.hh
+++ b/cql3/restrictions/statement_restrictions.hh
@@ -240,6 +240,15 @@ public:
     * @return <code>true</code> if the clustering key has some unrestricted components, <code>false</code> otherwise.
     */
    bool has_unrestricted_clustering_columns() const;
+
+    /**
+     * Returns the first unrestricted column for restrictions of the specified kind.
+     * It's an error to call this function if there are no such columns.
+     *
+     * @param kind supported values are column_kind::partition_key and column_kind::clustering_key;
+     * @return the <code>column_definition</code> for the unrestricted column.
+     */
+    const column_definition& unrestricted_column(column_kind kind) const;
 private:
    void add_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering, bool for_view);
    void add_is_not_restriction(const expr::binary_operator& restr, schema_ptr schema, bool for_view);
@@ -525,8 +534,8 @@ public:

    sstring to_string() const;

-    /// True iff the partition range or slice is empty specifically due to a =NULL restriction.
-    bool range_or_slice_eq_null(const query_options& options) const;
+    /// Checks that the primary key restrictions don't contain null values, throws invalid_request_exception otherwise.
+    void validate_primary_key(const query_options& options) const;
 };

 }
--- a/cql3/selection/aggregate_function_selector.hh
+++ b/cql3/selection/aggregate_function_selector.hh
@@ -9,7 +9,6 @@

 #include "abstract_function_selector.hh"
 #include "cql3/functions/aggregate_function.hh"
-#include "cql_serialization_format.hh"

 #pragma once

@@ -24,20 +23,20 @@ public:
        return true;
    }

-    virtual void add_input(cql_serialization_format sf, result_set_builder& rs) override {
+    virtual void add_input(result_set_builder& rs) override {
        // Aggregation of aggregation is not supported
        size_t m = _arg_selectors.size();
        for (size_t i = 0; i < m; ++i) {
            auto&& s = _arg_selectors[i];
-            s->add_input(sf, rs);
-            _args[i] = s->get_output(sf);
+            s->add_input(rs);
+            _args[i] = s->get_output();
            s->reset();
        }
-        _aggregate->add_input(sf, _args);
+        _aggregate->add_input(_args);
    }

-    virtual bytes_opt get_output(cql_serialization_format sf) override {
-        return _aggregate->compute(sf);
+    virtual bytes_opt get_output() override {
+        return _aggregate->compute();
    }

    virtual void reset() override {
--- a/cql3/selection/field_selector.hh
+++ b/cql3/selection/field_selector.hh
@@ -13,7 +13,6 @@
 #include "selector.hh"
 #include "types.hh"
 #include "types/user.hh"
-#include "cql_serialization_format.hh"

 namespace cql3 {

@@ -59,12 +58,12 @@ public:
        return false;
    }

-    virtual void add_input(cql_serialization_format sf, result_set_builder& rs) override {
-        _selected->add_input(sf, rs);
+    virtual void add_input(result_set_builder& rs) override {
+        _selected->add_input(rs);
    }

-    virtual bytes_opt get_output(cql_serialization_format sf) override {
-        auto&& value = _selected->get_output(sf);
+    virtual bytes_opt get_output() override {
+        auto&& value = _selected->get_output();
        if (!value) {
            return std::nullopt;
        }
@@ -81,7 +80,7 @@ public:

    virtual sstring assignment_testable_source_context() const override {
        auto&& name = _type->field_name(_field);
-        auto sname = sstring(reinterpret_cast<const char*>(name.begin(), name.size()));
+        auto sname = std::string_view(reinterpret_cast<const char*>(name.data()), name.size());
        return format("{}.{}", _selected, sname);
    }

--- a/cql3/selection/scalar_function_selector.hh
+++ b/cql3/selection/scalar_function_selector.hh
@@ -11,7 +11,6 @@

 #include "abstract_function_selector.hh"
 #include "cql3/functions/scalar_function.hh"
-#include "cql_serialization_format.hh"

 namespace cql3 {

@@ -28,25 +27,25 @@ public:
        return _arg_selectors[0]->is_aggregate();
    }

-    virtual void add_input(cql_serialization_format sf, result_set_builder& rs) override {
+    virtual void add_input(result_set_builder& rs) override {
        size_t m = _arg_selectors.size();
        for (size_t i = 0; i < m; ++i) {
            auto&& s = _arg_selectors[i];
-            s->add_input(sf, rs);
+            s->add_input(rs);
        }
    }

    virtual void reset() override {
    }

-    virtual bytes_opt get_output(cql_serialization_format sf) override {
+    virtual bytes_opt get_output() override {
        size_t m = _arg_selectors.size();
        for (size_t i = 0; i < m; ++i) {
            auto&& s = _arg_selectors[i];
-            _args[i] = s->get_output(sf);
+            _args[i] = s->get_output();
            s->reset();
        }
-        return fun()->execute(sf, _args);
+        return fun()->execute(_args);
    }

    virtual bool requires_thread() const override;
--- a/cql3/selection/selectable.cc
+++ b/cql3/selection/selectable.cc
@@ -222,9 +222,6 @@ prepare_selectable(const schema& s, const expr::expression& raw_selectable) {
            return make_shared<selectable::with_field_selection>(prepare_selectable(s, fs.structure),
                    fs.field->prepare(s));
        },
-        [&] (const expr::null&) -> shared_ptr<selectable> {
-            on_internal_error(slogger, "null found its way to selector context");
-        },
        [&] (const expr::bind_variable&) -> shared_ptr<selectable> {
            on_internal_error(slogger, "bind_variable found its way to selector context");
        },
@@ -283,9 +280,6 @@ selectable_processes_selection(const expr::expression& raw_selectable) {
        [&] (const expr::field_selection& fs) -> bool {
            return true;
        },
-        [&] (const expr::null&) -> bool {
-            on_internal_error(slogger, "null found its way to selector context");
-        },
        [&] (const expr::bind_variable&) -> bool {
            on_internal_error(slogger, "bind_variable found its way to selector context");
        },
--- a/cql3/selection/selection.cc
+++ b/cql3/selection/selection.cc
@@ -131,11 +131,11 @@ protected:

        virtual bool requires_thread() const override { return false; }

-        virtual std::vector<bytes_opt> get_output_row(cql_serialization_format sf) override {
+        virtual std::vector<bytes_opt> get_output_row() override {
            return std::move(_current);
        }

-        virtual void add_input_row(cql_serialization_format sf, result_set_builder& rs) override {
+        virtual void add_input_row(result_set_builder& rs) override {
            // GROUP BY calls add_input_row() repeatedly without reset() in between, and it expects
            // the output to be the first value encountered:
            // https://cassandra.apache.org/doc/latest/cql/dml.html#grouping-results
@@ -216,18 +216,18 @@ protected:
            return _factories->does_aggregation();
        }

-        virtual std::vector<bytes_opt> get_output_row(cql_serialization_format sf) override {
+        virtual std::vector<bytes_opt> get_output_row() override {
            std::vector<bytes_opt> output_row;
            output_row.reserve(_selectors.size());
            for (auto&& s : _selectors) {
-                output_row.emplace_back(s->get_output(sf));
+                output_row.emplace_back(s->get_output());
            }
            return output_row;
        }

-        virtual void add_input_row(cql_serialization_format sf, result_set_builder& rs) override {
+        virtual void add_input_row(result_set_builder& rs) override {
            for (auto&& s : _selectors) {
-                s->add_input(sf, rs);
+                s->add_input(rs);
            }
        }
    };
@@ -292,7 +292,7 @@ selection::collect_metadata(const schema& schema, const std::vector<::shared_ptr
    return r;
 }

-result_set_builder::result_set_builder(const selection& s, gc_clock::time_point now, cql_serialization_format sf,
+result_set_builder::result_set_builder(const selection& s, gc_clock::time_point now,
                                       std::vector<size_t> group_by_cell_indices)
    : _result_set(std::make_unique<result_set>(::make_shared<metadata>(*(s.get_result_metadata()))))
    , _selectors(s.new_selectors())
@@ -300,7 +300,6 @@ result_set_builder::result_set_builder(const selection& s, gc_clock::time_point
    , _last_group(_group_by_cell_indices.size())
    , _group_began(false)
    , _now(now)
-    , _cql_serialization_format(sf)
 {
    if (s._collect_timestamps) {
        _timestamps.resize(s._columns.size(), 0);
@@ -364,7 +363,7 @@ bool result_set_builder::last_group_ended() const {
 }

 void result_set_builder::flush_selectors() {
-    _result_set->add_row(_selectors->get_output_row(_cql_serialization_format));
+    _result_set->add_row(_selectors->get_output_row());
    _selectors->reset();
 }

@@ -376,7 +375,7 @@ void result_set_builder::process_current_row(bool more_rows_coming) {
        flush_selectors();
    }
    update_last_group();
-    _selectors->add_input_row(_cql_serialization_format, *this);
+    _selectors->add_input_row(*this);
    if (more_rows_coming) {
        current->clear();
    } else {
@@ -395,7 +394,7 @@ void result_set_builder::new_row() {
 std::unique_ptr<result_set> result_set_builder::build() {
    process_current_row(/*more_rows_coming=*/false);
    if (_result_set->empty() && _selectors->is_aggregate()) {
-        _result_set->add_row(_selectors->get_output_row(_cql_serialization_format));
+        _result_set->add_row(_selectors->get_output_row());
    }
    return std::move(_result_set);
 }
@@ -435,7 +434,7 @@ bool result_set_builder::restrictions_filter::do_filter(const selection& selecti
        clustering_key_prefix ckey = clustering_key_prefix::from_exploded(clustering_key);
        // FIXME: push to upper layer so it happens once per row
        auto static_and_regular_columns = expr::get_non_pk_values(selection, static_row, row);
-        return expr::is_satisfied_by(
+        bool multi_col_clustering_satisfied = expr::is_satisfied_by(
                clustering_columns_restrictions,
                expr::evaluation_inputs{
                    .partition_key = &partition_key,
@@ -444,6 +443,9 @@ bool result_set_builder::restrictions_filter::do_filter(const selection& selecti
                    .selection = &selection,
                    .options = &_options,
                });
+        if (!multi_col_clustering_satisfied) {
+            return false;
+        }
    }

    auto static_row_iterator = static_row.iterator();
--- a/cql3/selection/selection.hh
+++ b/cql3/selection/selection.hh
@@ -49,9 +49,9 @@ public:
    * @param rs the <code>ResultSetBuilder</code>
    * @throws InvalidRequestException
    */
-    virtual void add_input_row(cql_serialization_format sf, result_set_builder& rs) = 0;
+    virtual void add_input_row(result_set_builder& rs) = 0;

-    virtual std::vector<bytes_opt> get_output_row(cql_serialization_format sf) = 0;
+    virtual std::vector<bytes_opt> get_output_row() = 0;

    virtual void reset() = 0;
 };
@@ -192,7 +192,6 @@ private:
    std::vector<api::timestamp_type> _timestamps;
    std::vector<int32_t> _ttls;
    const gc_clock::time_point _now;
-    cql_serialization_format _cql_serialization_format;
 public:
    template<typename Func>
    auto with_thread_if_needed(Func&& func) {
@@ -246,7 +245,7 @@ public:
        bool do_filter(const selection& selection, const std::vector<bytes>& pk, const std::vector<bytes>& ck, const query::result_row_view& static_row, const query::result_row_view* row) const;
    };

-    result_set_builder(const selection& s, gc_clock::time_point now, cql_serialization_format sf,
+    result_set_builder(const selection& s, gc_clock::time_point now,
                       std::vector<size_t> group_by_cell_indices = {});
    void add_empty();
    void add(bytes_opt value);
--- a/cql3/selection/selector.hh
+++ b/cql3/selection/selector.hh
@@ -38,20 +38,18 @@ public:
    /**
     * Add the current value from the specified <code>result_set_builder</code>.
     *
-     * @param protocol_version protocol version used for serialization
     * @param rs the <code>result_set_builder</code>
     * @throws InvalidRequestException if a problem occurs while add the input value
     */
-    virtual void add_input(cql_serialization_format sf, result_set_builder& rs) = 0;
+    virtual void add_input(result_set_builder& rs) = 0;

    /**
     * Returns the selector output.
     *
-     * @param protocol_version protocol version used for serialization
     * @return the selector output
     * @throws InvalidRequestException if a problem occurs while computing the output value
     */
-    virtual bytes_opt get_output(cql_serialization_format sf) = 0;
+    virtual bytes_opt get_output() = 0;

    /**
     * Returns the <code>selector</code> output type.
--- a/cql3/selection/simple_selector.hh
+++ b/cql3/selection/simple_selector.hh
@@ -63,7 +63,7 @@ public:
        , _first(true)
    { }

-    virtual void add_input(cql_serialization_format sf, result_set_builder& rs) override {
+    virtual void add_input(result_set_builder& rs) override {
        // GROUP BY calls add_input() repeatedly without reset() in between, and it expects the
        // output to be the first value encountered:
        // https://cassandra.apache.org/doc/latest/cql/dml.html#grouping-results
@@ -74,7 +74,7 @@ public:
        }
    }

-    virtual bytes_opt get_output(cql_serialization_format sf) override {
+    virtual bytes_opt get_output() override {
        return std::move(_current);
    }

--- a/cql3/selection/writetime_or_ttl_selector.hh
+++ b/cql3/selection/writetime_or_ttl_selector.hh
@@ -56,7 +56,7 @@ public:
        return ::make_shared<wtots_factory>(std::move(column_name), idx, is_writetime);
    }

-    virtual void add_input(cql_serialization_format sf, result_set_builder& rs) override {
+    virtual void add_input(result_set_builder& rs) override {
        if (_is_writetime) {
            int64_t ts = rs.timestamp_of(_idx);
            if (ts != api::missing_timestamp) {
@@ -78,7 +78,7 @@ public:
        }
    }

-    virtual bytes_opt get_output(cql_serialization_format sf) override {
+    virtual bytes_opt get_output() override {
        return _current;
    }

--- a/cql3/sets.cc
+++ b/cql3/sets.cc
@@ -21,9 +21,6 @@ sets::setter::execute(mutation& m, const clustering_key_prefix& row_key, const u

 void
 sets::setter::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params, const column_definition& column, const cql3::raw_value& value) {
-    if (value.is_unset_value()) {
-        return;
-    }
    if (column.type->is_multi_cell()) {
        // Delete all cells first, then add new ones
        collection_mutation_description mut;
@@ -36,9 +33,6 @@ sets::setter::execute(mutation& m, const clustering_key_prefix& row_key, const u
 void
 sets::adder::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) {
    const cql3::raw_value value = expr::evaluate(*_e, params._options);
-    if (value.is_unset_value()) {
-        return;
-    }
    assert(column.type->is_multi_cell()); // "Attempted to add items to a frozen set";
    do_add(m, row_key, params, value, column);
 }
@@ -79,7 +73,7 @@ sets::discarder::execute(mutation& m, const clustering_key_prefix& row_key, cons
    assert(column.type->is_multi_cell()); // "Attempted to remove items from a frozen set";

    cql3::raw_value svalue = expr::evaluate(*_e, params._options);
-    if (svalue.is_null_or_unset()) {
+    if (svalue.is_null()) {
        return;
    }

--- a/cql3/sets.hh
+++ b/cql3/sets.hh
@@ -27,19 +27,19 @@ class sets {
 public:
    static lw_shared_ptr<column_specification> value_spec_of(const column_specification& column);

-    class setter : public operation {
+    class setter : public operation_skip_if_unset {
    public:
        setter(const column_definition& column, expr::expression e)
-                : operation(column, std::move(e)) {
+                : operation_skip_if_unset(column, std::move(e)) {
        }
        virtual void execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) override;
        static void execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params, const column_definition& column, const cql3::raw_value& value);
    };

-    class adder : public operation {
+    class adder : public operation_skip_if_unset {
    public:
        adder(const column_definition& column, expr::expression e)
-            : operation(column, std::move(e)) {
+            : operation_skip_if_unset(column, std::move(e)) {
        }
        virtual void execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) override;
        static void do_add(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params,
@@ -47,18 +47,18 @@ public:
    };

    // Note that this is reused for Map subtraction too (we subtract a set from a map)
-    class discarder : public operation {
+    class discarder : public operation_skip_if_unset {
    public:
        discarder(const column_definition& column, expr::expression e)
-            : operation(column, std::move(e)) {
+            : operation_skip_if_unset(column, std::move(e)) {
        }
        virtual void execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) override;
    };

-    class element_discarder : public operation {
+    class element_discarder : public operation_no_unset_support {
    public:
        element_discarder(const column_definition& column, expr::expression e)
-            : operation(column, std::move(e)) { }
+            : operation_no_unset_support(column, std::move(e)) { }
        virtual void execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) override;
    };
 };
--- a/cql3/statements/batch_statement.cc
+++ b/cql3/statements/batch_statement.cc
@@ -161,36 +161,33 @@ future<std::vector<mutation>> batch_statement::get_mutations(query_processor& qp
        db::timeout_clock::time_point timeout, bool local, api::timestamp_type now, service::query_state& query_state) const {
    // Do not process in parallel because operations like list append/prepend depend on execution order.
    using mutation_set_type = std::unordered_set<mutation, mutation_hash_by_key, mutation_equals_by_key>;
-    return do_with(mutation_set_type(), [this, &qp, &options, timeout, now, local, &query_state] (auto& result) mutable {
-        result.reserve(_statements.size());
-        return do_for_each(boost::make_counting_iterator<size_t>(0),
-                           boost::make_counting_iterator<size_t>(_statements.size()),
-                           [this, &qp, &options, now, local, &result, timeout, &query_state] (size_t i) {
-            auto&& statement = _statements[i].statement;
-            statement->inc_cql_stats(query_state.get_client_state().is_internal());
-            auto&& statement_options = options.for_statement(i);
-            auto timestamp = _attrs->get_timestamp(now, statement_options);
-            return statement->get_mutations(qp, statement_options, timeout, local, timestamp, query_state).then([&result] (auto&& more) {
-                for (auto&& m : more) {
-                    // We want unordered_set::try_emplace(), but we don't have it
-                    auto pos = result.find(m);
-                    if (pos == result.end()) {
-                        result.emplace(std::move(m));
-                    } else {
-                        const_cast<mutation&>(*pos).apply(std::move(m)); // Won't change key
-                    }
-                }
-            });
-        }).then([&result] {
-            // can't use range adaptors, because we want to move
-            auto vresult = std::vector<mutation>();
-            vresult.reserve(result.size());
-            for (auto&& m : result) {
-                vresult.push_back(std::move(m));
+    mutation_set_type result;
+    result.reserve(_statements.size());
+    for (size_t i = 0; i != _statements.size(); ++i) {
+        auto&& statement = _statements[i].statement;
+        statement->inc_cql_stats(query_state.get_client_state().is_internal());
+        auto&& statement_options = options.for_statement(i);
+        auto timestamp = _attrs->get_timestamp(now, statement_options);
+        auto more = co_await statement->get_mutations(qp, statement_options, timeout, local, timestamp, query_state);
+
+        for (auto&& m : more) {
+            // We want unordered_set::try_emplace(), but we don't have it
+            auto pos = result.find(m);
+            if (pos == result.end()) {
+                result.emplace(std::move(m));
+            } else {
+                const_cast<mutation&>(*pos).apply(std::move(m)); // Won't change key
            }
-            return vresult;
-        });
-    });
+        }
+    }
+
+    // can't use range adaptors, because we want to move
+    auto vresult = std::vector<mutation>();
+    vresult.reserve(result.size());
+    for (auto&& m : result) {
+        vresult.push_back(std::move(m));
+    }
+    co_return vresult;
 }

 void batch_statement::verify_batch_size(query_processor& qp, const std::vector<mutation>& mutations) {
@@ -244,7 +241,7 @@ future<shared_ptr<cql_transport::messages::result_message>> batch_statement::exe

 future<shared_ptr<cql_transport::messages::result_message>> batch_statement::execute_without_checking_exception_message(
        query_processor& qp, service::query_state& state, const query_options& options) const {
-    cql3::util::validate_timestamp(options, _attrs);
+    cql3::util::validate_timestamp(qp.db().get_config(), options, _attrs);
    return batch_stage(this, seastar::ref(qp), seastar::ref(state),
                       seastar::cref(options), false, options.get_timestamp(state));
 }
@@ -261,6 +258,10 @@ future<shared_ptr<cql_transport::messages::result_message>> batch_statement::do_
    if (options.getSerialConsistency() == null)
        throw new InvalidRequestException("Invalid empty serial consistency level");
 #endif
+    for (size_t i = 0; i < _statements.size(); ++i) {
+        _statements[i].statement->restrictions().validate_primary_key(options.for_statement(i));
+    }
+
    if (_has_conditions) {
        ++_stats.cas_batches;
        _stats.statements_in_cas_batches += _statements.size();
--- a/cql3/statements/create_function_statement.cc
+++ b/cql3/statements/create_function_statement.cc
@@ -17,6 +17,7 @@
 #include "data_dictionary/data_dictionary.hh"
 #include "replica/database.hh" // for wasm
 #include "cql3/query_processor.hh"
+#include "db/config.hh"

 namespace cql3 {

@@ -47,9 +48,9 @@ shared_ptr<functions::function> create_function_statement::create(query_processo
            std::move(return_type), _called_on_null_input, std::move(ctx));
    } else if (_language == "xwasm") {
       // FIXME: need better way to test wasm compilation without real_database()
-       wasm::context ctx{db.real_database().wasm_engine(), _name.name, qp.get_wasm_instance_cache()};
+       wasm::context ctx{db.real_database().wasm_engine(), _name.name, qp.get_wasm_instance_cache(), db.get_config().wasm_udf_yield_fuel(), db.get_config().wasm_udf_total_fuel()};
       try {
-            wasm::compile(ctx, arg_names, _body);
+            wasm::precompile(ctx, arg_names, _body);
            return ::make_shared<functions::user_function>(_name, _arg_types, std::move(arg_names), _body, _language,
                std::move(return_type), _called_on_null_input, std::move(ctx));
       } catch (const wasm::exception& we) {
--- a/cql3/statements/create_index_statement.cc
+++ b/cql3/statements/create_index_statement.cc
@@ -114,9 +114,9 @@ std::vector<::shared_ptr<index_target>> create_index_statement::validate_while_e
                    format("No column definition found for column {}", target->column_name()));
        }

-        //NOTICE(sarna): Should be lifted after resolving issue #2963
-        if (cd->is_static()) {
-            throw exceptions::invalid_request_exception("Indexing static columns is not implemented yet.");
+        if (!db.features().secondary_indexes_on_static_columns && cd->is_static()) {
+            throw exceptions::invalid_request_exception("Cluster does not support secondary indexes on static columns yet,"
+                    " upgrade the whole cluster first in order to be able to create them");
        }

        if (cd->type->references_duration()) {
@@ -209,6 +209,12 @@ void create_index_statement::validate_for_local_index(const schema& schema) cons
        for (unsigned i = 1; i < _raw_targets.size(); ++i) {
            if (std::holds_alternative<index_target::raw::multiple_columns>(_raw_targets[i]->value)) {
                throw exceptions::invalid_request_exception(format("Multi-column index targets are currently only supported for partition key"));
+            } else if (auto* raw_ident = std::get_if<index_target::raw::single_column>(&_raw_targets[i]->value)) {
+                auto ident = (*raw_ident)->prepare_column_identifier(schema);
+                auto it = schema.columns_by_name().find(ident->name());
+                if (it != schema.columns_by_name().end() && it->second->is_static()) {
+                    throw exceptions::invalid_request_exception("Local indexes containing static columns are not supported.");
+                }
            }
        }
 }
@@ -232,10 +238,6 @@ void create_index_statement::validate_not_full_index(const index_target& target)

 void create_index_statement::validate_for_collection(const index_target& target, const column_definition& cd) const
 {
-    auto throw_exception = [&] {
-        const char* msg_format = "Cannot create secondary index on {} of non-frozen collection column {}";
-        throw exceptions::invalid_request_exception(format(msg_format, to_sstring(target.type), cd.name_as_text()));
-    };
    switch (target.type) {
        case index_target::target_type::full:
            throw std::logic_error("invalid target type(full) in validate_for_collection");
--- a/cql3/statements/delete_statement.cc
+++ b/cql3/statements/delete_statement.cc
@@ -48,6 +48,9 @@ void delete_statement::add_update_for_key(mutation& m, const query::clustering_r
    }

    for (auto&& op : _column_operations) {
+        if (op->should_skip_operation(params._options)) {
+            continue;
+        }
        op->execute(m, range.start() ? std::move(range.start()->value()) : clustering_key_prefix::make_empty(), params);
    }
 }
--- a/Show More
+++ b/Show More