release: prepare for 4.1.11

compaction: compaction_writer: destroy shared_sstable after the sstable_writer
sstable_writer may depend on the sstable throughout its whole lifecycle. If the sstable is freed before the sstable_writer we might hit use-after-free as in the follwing case: ``` std::_Deque_iterator<sstables::compression::segmented_offsets::bucket, sstables::compression::segmented_offsets::bucket&, sstables::compression::segmented_offsets::bucket*>::operator+=(long) at /usr/include/c++/10/bits/stl_deque.h:240 (inlined by) std::operator+(std::_Deque_iterator<sstables::compression::segmented_offsets::bucket, sstables::compression::segmented_offsets::bucket&, sstables::compression::segmented_offsets::bucket*> const&, long) at /usr/include/c++/10/bits/stl_deque.h:378 (inlined by) std::_Deque_iterator<sstables::compression::segmented_offsets::bucket, sstables::compression::segmented_offsets::bucket&, sstables::compression::segmented_offsets::bucket*>::operator[](long) const at /usr/include/c++/10/bits/stl_deque.h:252 (inlined by) std::deque<sstables::compression::segmented_offsets::bucket, std::allocator<sstables::compression::segmented_offsets::bucket> >::operator[](unsigned long) at /usr/include/c++/10/bits/stl_deque.h:1327 (inlined by) sstables::compression::segmented_offsets::push_back(unsigned long, sstables::compression::segmented_offsets::state&) at ./sstables/compress.cc:214 sstables::compression::segmented_offsets::writer::push_back(unsigned long) at ./sstables/compress.hh:123 (inlined by) compressed_file_data_sink_impl<crc32_utils, (compressed_checksum_mode)1>::put(seastar::temporary_buffer<char>) at ./sstables/compress.cc:519 seastar::output_stream<char>::put(seastar::temporary_buffer<char>) at table.cc:? (inlined by) seastar::output_stream<char>::put(seastar::temporary_buffer<char>) at ././seastar/include/seastar/core/iostream-impl.hh:432 seastar::output_stream<char>::flush() at table.cc:? seastar::output_stream<char>::close() at table.cc:? sstables::file_writer::close() at sstables.cc:? sstables::mc::writer::~writer() at writer.cc:? (inlined by) sstables::mc::writer::~writer() at ./sstables/mx/writer.cc:790 sstables::mc::writer::~writer() at writer.cc:? flat_mutation_reader::impl::consumer_adapter<stable_flattened_mutations_consumer<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> > >::~consumer_adapter() at compaction.cc:? (inlined by) std::_Optional_payload_base<sstables::compaction_writer>::_M_destroy() at /usr/include/c++/10/optional:260 (inlined by) std::_Optional_payload_base<sstables::compaction_writer>::_M_reset() at /usr/include/c++/10/optional:280 (inlined by) std::_Optional_payload<sstables::compaction_writer, false, false, false>::~_Optional_payload() at /usr/include/c++/10/optional:401 (inlined by) std::_Optional_base<sstables::compaction_writer, false, false>::~_Optional_base() at /usr/include/c++/10/optional:474 (inlined by) std::optional<sstables::compaction_writer>::~optional() at /usr/include/c++/10/optional:659 (inlined by) sstables::compacting_sstable_writer::~compacting_sstable_writer() at ./sstables/compaction.cc:229 (inlined by) compact_mutation<(emit_only_live_rows)0, (compact_for_sstables)1, sstables::compacting_sstable_writer, noop_compacted_fragments_consumer>::~compact_mutation() at ././mutation_compactor.hh:468 (inlined by) compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer>::~compact_for_compaction() at ././mutation_compactor.hh:538 (inlined by) std::default_delete<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> >::operator()(compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer>*) const at /usr/include/c++/10/bits/unique_ptr.h:85 (inlined by) std::unique_ptr<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer>, std::default_delete<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> > >::~unique_ptr() at /usr/include/c++/10/bits/unique_ptr.h:361 (inlined by) stable_flattened_mutations_consumer<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> >::~stable_flattened_mutations_consumer() at ././mutation_reader.hh:342 (inlined by) flat_mutation_reader::impl::consumer_adapter<stable_flattened_mutations_consumer<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> > >::~consumer_adapter() at ././flat_mutation_reader.hh:201 auto flat_mutation_reader::impl::consume_in_thread<stable_flattened_mutations_consumer<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> >, flat_mutation_reader::no_filter>(stable_flattened_mutations_consumer<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> >, flat_mutation_reader::no_filter, std::chrono::time_point<seastar::lowres_clock, std::chrono::duration<long, std::ratio<1l, 1000l> > >) at ././flat_mutation_reader.hh:272 (inlined by) auto flat_mutation_reader::consume_in_thread<stable_flattened_mutations_consumer<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> >, flat_mutation_reader::no_filter>(stable_flattened_mutations_consumer<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> >, flat_mutation_reader::no_filter, std::chrono::time_point<seastar::lowres_clock, std::chrono::duration<long, std::ratio<1l, 1000l> > >) at ././flat_mutation_reader.hh:383 (inlined by) auto flat_mutation_reader::consume_in_thread<stable_flattened_mutations_consumer<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> > >(stable_flattened_mutations_consumer<compact_for_compaction<sstables::compacting_sstable_writer, noop_compacted_fragments_consumer> >, std::chrono::time_point<seastar::lowres_clock, std::chrono::duration<long, std::ratio<1l, 1000l> > >) at ././flat_mutation_reader.hh:389 (inlined by) seastar::future<void> sstables::compaction::setup<noop_compacted_fragments_consumer>(noop_compacted_fragments_consumer)::{lambda(flat_mutation_reader)#1}::operator()(flat_mutation_reader)::{lambda()#1}::operator()() at ./sstables/compaction.cc:612 ``` What happens here is that: compressed_file_data_sink_impl(output_stream<char> out, sstables::compression* cm, sstables::local_compression lc) : _out(std::move(out)) , _compression_metadata(cm) , _offsets(_compression_metadata->offsets.get_writer()) , _compression(lc) , _full_checksum(ChecksumType::init_checksum()) _compression_metadata points to a buffer held by the sstable object. and _compression_metadata->offsets.get_writer returns a writer that keeps a reference to the segmented_offsets in the sstables::compression that is used in the ~writer -> close path. Fixes #7821 Signed-off-by: Benny Halevy <bhalevy@scylladb.com> Message-Id: <20201227145726.33319-1-bhalevy@scylladb.com> (cherry picked from commit 8a745a0ee0)
2021-01-05 10:13:34 +02:00 · 2021-01-04 15:12:33 +02:00 · 2020-12-24 12:42:42 +02:00 · 2020-12-16 17:20:32 +02:00 · 2020-12-16 11:59:12 +02:00 · 2020-12-15 16:52:38 +02:00
1755 changed files with 16298 additions and 5236 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,17 +1,17 @@
 [submodule "seastar"]
 	path = seastar
-	url = ../seastar
+	url = ../scylla-seastar
 	ignore = dirty
 [submodule "swagger-ui"]
 	path = swagger-ui
 	url = ../scylla-swagger-ui
 	ignore = dirty
-[submodule "xxHash"]
-	path = xxHash
-	url = ../xxHash
 [submodule "libdeflate"]
 	path = libdeflate
 	url = ../libdeflate
 [submodule "zstd"]
 	path = zstd
 	url = ../zstd
+[submodule "abseil"]
+	path = abseil
+	url = ../abseil-cpp
--- a/NOTICE.txt
+++ b/NOTICE.txt
@@ -1,5 +1,7 @@
 This project includes code developed by the Apache Software Foundation (http://www.apache.org/),
 especially Apache Cassandra.

-It also includes files from https://github.com/antonblanchard/crc32-vpmsum (author Anton Blanchard <anton@au.ibm.com>, IBM).
+It includes files from https://github.com/antonblanchard/crc32-vpmsum (author Anton Blanchard <anton@au.ibm.com>, IBM).
 These files are located in utils/arch/powerpc/crc32-vpmsum. Their license may be found in licenses/LICENSE-crc32-vpmsum.TXT.
+
+It includes modified code from https://gitbox.apache.org/repos/asf?p=cassandra-dtest.git (owned by The Apache Software Foundation)
--- a/2
+++ b/2
@@ -1,7 +1,7 @@
 #!/bin/sh

 PRODUCT=scylla
-VERSION=666.development
+VERSION=4.1.11

 if test -f version
 then
--- a/1
+++ b/1
--- a/alternator/auth.cc
+++ b/alternator/auth.cc
@@ -66,8 +66,9 @@ static std::string format_time_point(db_clock::time_point tp) {
    time_t time_point_repr = db_clock::to_time_t(tp);
    std::string time_point_str;
    time_point_str.resize(17);
+    ::tm time_buf;
    // strftime prints the terminating null character as well
-    std::strftime(time_point_str.data(), time_point_str.size(), "%Y%m%dT%H%M%SZ", std::gmtime(&time_point_repr));
+    std::strftime(time_point_str.data(), time_point_str.size(), "%Y%m%dT%H%M%SZ", ::gmtime_r(&time_point_repr, &time_buf));
    time_point_str.resize(16);
    return time_point_str;
 }
@@ -128,7 +129,7 @@ future<std::string> get_key_from_roles(cql3::query_processor& qp, std::string us
            auth::meta::roles_table::qualified_name(), auth::meta::roles_table::role_col_name);

    auto cl = auth::password_authenticator::consistency_for_user(username);
-    auto timeout = auth::internal_distributed_timeout_config();
+    auto& timeout = auth::internal_distributed_timeout_config();
    return qp.execute_internal(query, cl, timeout, {sstring(username)}, true).then_wrapped([username = std::move(username)] (future<::shared_ptr<cql3::untyped_result_set>> f) {
        auto res = f.get0();
        auto salted_hash = std::optional<sstring>();
--- a/alternator/conditions.cc
+++ b/alternator/conditions.cc
@@ -141,6 +141,11 @@ struct nonempty : public size_check {

 // Check that array has the expected number of elements
 static void verify_operand_count(const rjson::value* array, const size_check& expected, const rjson::value& op) {
+    if (!array && expected(0)) {
+        // If expected() allows an empty AttributeValueList, it is also fine
+        // that it is missing.
+        return;
+    }
    if (!array || !array->IsArray()) {
        throw api_error("ValidationException", "With ComparisonOperator, AttributeValueList must be given and an array");
    }
@@ -365,31 +370,35 @@ bool check_compare(const rjson::value* v1, const rjson::value& v2, const Compara

 struct cmp_lt {
    template <typename T> bool operator()(const T& lhs, const T& rhs) const { return lhs < rhs; }
+    // We cannot use the normal comparison operators like "<" on the bytes
+    // type, because they treat individual bytes as signed but we need to
+    // compare them as *unsigned*. So we need a specialization for bytes.
+    bool operator()(const bytes& lhs, const bytes& rhs) const { return compare_unsigned(lhs, rhs) < 0; }
    static constexpr const char* diagnostic = "LT operator";
 };

 struct cmp_le {
-    // bytes only has <, so we cannot use <=.
-    template <typename T> bool operator()(const T& lhs, const T& rhs) const { return lhs < rhs || lhs == rhs; }
+    template <typename T> bool operator()(const T& lhs, const T& rhs) const { return lhs <= rhs; }
+    bool operator()(const bytes& lhs, const bytes& rhs) const { return compare_unsigned(lhs, rhs) <= 0; }
    static constexpr const char* diagnostic = "LE operator";
 };

 struct cmp_ge {
-    // bytes only has <, so we cannot use >=.
-    template <typename T> bool operator()(const T& lhs, const T& rhs) const { return rhs < lhs || lhs == rhs; }
+    template <typename T> bool operator()(const T& lhs, const T& rhs) const { return lhs >= rhs; }
+    bool operator()(const bytes& lhs, const bytes& rhs) const { return compare_unsigned(lhs, rhs) >= 0; }
    static constexpr const char* diagnostic = "GE operator";
 };

 struct cmp_gt {
-    // bytes only has <, so we cannot use >.
-    template <typename T> bool operator()(const T& lhs, const T& rhs) const { return rhs < lhs; }
+    template <typename T> bool operator()(const T& lhs, const T& rhs) const { return lhs > rhs; }
+    bool operator()(const bytes& lhs, const bytes& rhs) const { return compare_unsigned(lhs, rhs) > 0; }
    static constexpr const char* diagnostic = "GT operator";
 };

 // True if v is between lb and ub, inclusive.  Throws if lb > ub.
 template <typename T>
 bool check_BETWEEN(const T& v, const T& lb, const T& ub) {
-    if (ub < lb) {
+    if (cmp_lt()(ub, lb)) {
        throw api_error("ValidationException",
                        format("BETWEEN operator requires lower_bound <= upper_bound, but {} > {}", lb, ub));
    }
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
@@ -187,6 +187,25 @@ static schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& r
    }
 }

+static std::tuple<bool, std::string_view, std::string_view> try_get_internal_table(std::string_view table_name) {
+    size_t it = table_name.find(executor::INTERNAL_TABLE_PREFIX);
+    if (it != 0) {
+        return {false, "", ""};
+    }
+    table_name.remove_prefix(executor::INTERNAL_TABLE_PREFIX.size());
+    size_t delim = table_name.find_first_of('.');
+    if (delim == std::string_view::npos) {
+        return {false, "", ""};
+    }
+    std::string_view ks_name = table_name.substr(0, delim);
+    table_name.remove_prefix(ks_name.size() + 1);
+    // Only internal keyspaces can be accessed to avoid leakage
+    if (!is_internal_keyspace(sstring(ks_name))) {
+        return {false, "", ""};
+    }
+    return {true, ks_name, table_name};
+}
+
 // get_table_or_view() is similar to to get_table(), except it returns either
 // a table or a materialized view from which to read, based on the TableName
 // and optional IndexName in the request. Only requests like Query and Scan
@@ -196,6 +215,17 @@ static std::pair<schema_ptr, table_or_view_type>
 get_table_or_view(service::storage_proxy& proxy, const rjson::value& request) {
    table_or_view_type type = table_or_view_type::base;
    std::string table_name = get_table_name(request);
+
+    auto [is_internal_table, internal_ks_name, internal_table_name] = try_get_internal_table(table_name);
+    if (is_internal_table) {
+        try {
+            return { proxy.get_db().local().find_schema(sstring(internal_ks_name), sstring(internal_table_name)), type };
+        } catch (no_such_column_family&) {
+            throw api_error("ResourceNotFoundException",
+                format("Requested resource not found: Internal table: {}.{} not found", internal_ks_name, internal_table_name));
+        }
+    }
+
    std::string keyspace_name = executor::KEYSPACE_NAME_PREFIX + table_name;
    const rjson::value* index_name = rjson::find(request, "IndexName");
    std::string orig_table_name;
@@ -208,12 +238,11 @@ get_table_or_view(service::storage_proxy& proxy, const rjson::value& request) {
            throw api_error("ValidationException",
                    format("Non-string IndexName '{}'", index_name->GetString()));
        }
-    }
-
-    // If no tables for global indexes were found, the index may be local
-    if (!proxy.get_db().local().has_schema(keyspace_name, table_name)) {
-        type = table_or_view_type::lsi;
-        table_name = lsi_name(orig_table_name, index_name->GetString());
+        // If no tables for global indexes were found, the index may be local
+        if (!proxy.get_db().local().has_schema(keyspace_name, table_name)) {
+            type = table_or_view_type::lsi;
+            table_name = lsi_name(orig_table_name, index_name->GetString());
+        }
    }

    try {
@@ -544,29 +573,66 @@ static bool validate_legal_tag_chars(std::string_view tag) {
    return std::all_of(tag.begin(), tag.end(), &is_legal_tag_char);
 }

+static const std::unordered_set<std::string_view> allowed_write_isolation_values = {
+    "f", "forbid", "forbid_rmw",
+    "a", "always", "always_use_lwt",
+    "o", "only_rmw_uses_lwt",
+    "u", "unsafe", "unsafe_rmw",
+};
+
 static void validate_tags(const std::map<sstring, sstring>& tags) {
-    static const std::unordered_set<std::string_view> allowed_values = {
-        "f", "forbid", "forbid_rmw",
-        "a", "always", "always_use_lwt",
-        "o", "only_rmw_uses_lwt",
-        "u", "unsafe", "unsafe_rmw",
-    };
    auto it = tags.find(rmw_operation::WRITE_ISOLATION_TAG_KEY);
    if (it != tags.end()) {
        std::string_view value = it->second;
-        elogger.warn("Allowed values count {} {}", value, allowed_values.count(value));
-        if (allowed_values.count(value) == 0) {
+        if (allowed_write_isolation_values.count(value) == 0) {
            throw api_error("ValidationException",
-                    format("Incorrect write isolation tag {}. Allowed values: {}", value, allowed_values));
+                    format("Incorrect write isolation tag {}. Allowed values: {}", value, allowed_write_isolation_values));
        }
    }
 }

+static rmw_operation::write_isolation parse_write_isolation(std::string_view value) {
+    if (!value.empty()) {
+        switch (value[0]) {
+        case 'f':
+            return rmw_operation::write_isolation::FORBID_RMW;
+        case 'a':
+            return rmw_operation::write_isolation::LWT_ALWAYS;
+        case 'o':
+            return rmw_operation::write_isolation::LWT_RMW_ONLY;
+        case 'u':
+            return rmw_operation::write_isolation::UNSAFE_RMW;
+        }
+    }
+    // Shouldn't happen as validate_tags() / set_default_write_isolation()
+    // verify allow only a closed set of values.
+    return rmw_operation::default_write_isolation;
+
+}
+// This default_write_isolation is always overwritten in main.cc, which calls
+// set_default_write_isolation().
+rmw_operation::write_isolation rmw_operation::default_write_isolation =
+        rmw_operation::write_isolation::LWT_ALWAYS;
+void rmw_operation::set_default_write_isolation(std::string_view value) {
+    if (value.empty()) {
+        throw std::runtime_error("When Alternator is enabled, write "
+                "isolation policy must be selected, using the "
+                "'--alternator-write-isolation' option. "
+                "See docs/alternator/alternator.md for instructions.");
+    }
+    if (allowed_write_isolation_values.count(value) == 0) {
+        throw std::runtime_error(format("Invalid --alternator-write-isolation "
+                "setting '{}'. Allowed values: {}.",
+                value, allowed_write_isolation_values));
+    }
+    default_write_isolation = parse_write_isolation(value);
+}
+
 // FIXME: Updating tags currently relies on updating schema, which may be subject
 // to races during concurrent updates of the same table. Once Scylla schema updates
 // are fixed, this issue will automatically get fixed as well.
 enum class update_tags_action { add_tags, delete_tags };
-static future<> update_tags(const rjson::value& tags, schema_ptr schema, std::map<sstring, sstring>&& tags_map, update_tags_action action) {
+static future<> update_tags(service::migration_manager& mm, const rjson::value& tags, schema_ptr schema, std::map<sstring, sstring>&& tags_map, update_tags_action action) {
    if (action == update_tags_action::add_tags) {
        for (auto it = tags.Begin(); it != tags.End(); ++it) {
            const rjson::value& key = (*it)["Key"];
@@ -593,24 +659,12 @@ static future<> update_tags(const rjson::value& tags, schema_ptr schema, std::ma
    }
    validate_tags(tags_map);

-    std::stringstream serialized_tags;
-    serialized_tags << '{';
-    for (auto& tag_entry : tags_map) {
-        serialized_tags << format("'{}':'{}',", tag_entry.first, tag_entry.second);
-    }
-    std::string serialized_tags_str = serialized_tags.str();
-    if (!tags_map.empty()) {
-        serialized_tags_str[serialized_tags_str.size() - 1] = '}'; // trims the last ',' delimiter
-    } else {
-        serialized_tags_str.push_back('}');
-    }
-
-    sstring req = format("ALTER TABLE \"{}\".\"{}\" WITH {} = {}",
-            schema->ks_name(), schema->cf_name(), tags_extension::NAME, serialized_tags_str);
-    return db::execute_cql(std::move(req)).discard_result();
+    schema_builder builder(schema);
+    builder.set_extensions(schema::extensions_map{{sstring(tags_extension::NAME), ::make_shared<tags_extension>(std::move(tags_map))}});
+    return mm.announce_column_family_update(builder.build(), false, std::vector<view_ptr>(), false);
 }

-static future<> add_tags(service::storage_proxy& proxy, schema_ptr schema, rjson::value& request_info) {
+static future<> add_tags(service::migration_manager& mm, service::storage_proxy& proxy, schema_ptr schema, rjson::value& request_info) {
    const rjson::value* tags = rjson::find(request_info, "Tags");
    if (!tags || !tags->IsArray()) {
        return make_exception_future<>(api_error("ValidationException", format("Cannot parse tags")));
@@ -620,7 +674,7 @@ static future<> add_tags(service::storage_proxy& proxy, schema_ptr schema, rjson
    }

    std::map<sstring, sstring> tags_map = get_tags_of_table(schema);
-    return update_tags(rjson::copy(*tags), schema, std::move(tags_map), update_tags_action::add_tags);
+    return update_tags(mm, rjson::copy(*tags), schema, std::move(tags_map), update_tags_action::add_tags);
 }

 future<executor::request_return_type> executor::tag_resource(client_state& client_state, service_permit permit, rjson::value request) {
@@ -632,7 +686,7 @@ future<executor::request_return_type> executor::tag_resource(client_state& clien
            return api_error("AccessDeniedException", "Incorrect resource identifier");
        }
        schema_ptr schema = get_table_from_arn(_proxy, std::string_view(arn->GetString(), arn->GetStringLength()));
-        add_tags(_proxy, schema, request).get();
+        add_tags(_mm, _proxy, schema, request).get();
        return json_string("");
    });
 }
@@ -653,7 +707,7 @@ future<executor::request_return_type> executor::untag_resource(client_state& cli
        schema_ptr schema = get_table_from_arn(_proxy, std::string_view(arn->GetString(), arn->GetStringLength()));

        std::map<sstring, sstring> tags_map = get_tags_of_table(schema);
-        update_tags(*tags, schema, std::move(tags_map), update_tags_action::delete_tags).get();
+        update_tags(_mm, *tags, schema, std::move(tags_map), update_tags_action::delete_tags).get();
        return json_string("");
    });
 }
@@ -681,10 +735,25 @@ future<executor::request_return_type> executor::list_tags_of_resource(client_sta
    return make_ready_future<executor::request_return_type>(make_jsonable(std::move(ret)));
 }

+static future<> wait_for_schema_agreement(db::timeout_clock::time_point deadline) {
+    return do_until([deadline] {
+        if (db::timeout_clock::now() > deadline) {
+            throw std::runtime_error("Unable to reach schema agreement");
+        }
+        return service::get_local_migration_manager().have_schema_agreement();
+    }, [] {
+        return seastar::sleep(500ms);
+    });
+}
+
 future<executor::request_return_type> executor::create_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request) {
    _stats.api_operations.create_table++;
    elogger.trace("Creating table {}", request);
    std::string table_name = get_table_name(request);
+    if (table_name.find(INTERNAL_TABLE_PREFIX) == 0) {
+        return make_ready_future<request_return_type>(api_error("ValidationException",
+                format("Prefix {} is reserved for accessing internal tables", INTERNAL_TABLE_PREFIX)));
+    }
    std::string keyspace_name = executor::KEYSPACE_NAME_PREFIX + table_name;
    const rjson::value& attribute_definitions = request["AttributeDefinitions"];

@@ -864,15 +933,17 @@ future<executor::request_return_type> executor::create_table(client_state& clien
    }

    return create_keyspace(keyspace_name).then([this, table_name, request = std::move(request), schema, view_builders = std::move(view_builders)] () mutable {
-        return futurize_apply([&] { return _mm.announce_new_column_family(schema, false); }).then([this, table_info = std::move(request), schema, view_builders = std::move(view_builders)] () mutable {
+        return futurize_invoke([&] { return _mm.announce_new_column_family(schema, false); }).then([this, table_info = std::move(request), schema, view_builders = std::move(view_builders)] () mutable {
            return parallel_for_each(std::move(view_builders), [schema] (schema_builder builder) {
                return service::get_local_migration_manager().announce_new_view(view_ptr(builder.build()));
            }).then([this, table_info = std::move(table_info), schema] () mutable {
                future<> f = make_ready_future<>();
                if (rjson::find(table_info, "Tags")) {
-                    f = add_tags(_proxy, schema, table_info);
+                    f = add_tags(_mm, _proxy, schema, table_info);
                }
-                return f.then([table_info = std::move(table_info), schema] () mutable {
+                return f.then([] {
+                    return wait_for_schema_agreement(db::timeout_clock::now() + 10s);
+                }).then([table_info = std::move(table_info), schema] () mutable {
                    rjson::value status = rjson::empty_object();
                    supplement_table_info(table_info, *schema);
                    rjson::set(status, "TableDescription", std::move(table_info));
@@ -900,15 +971,24 @@ class attribute_collector {
    void add(bytes&& name, atomic_cell&& cell) {
        collected.emplace(std::move(name), std::move(cell));
    }
+    void add(const bytes& name, atomic_cell&& cell) {
+        collected.emplace(name, std::move(cell));
+    }
 public:
    attribute_collector() : collected(attrs_type()->get_keys_type()->as_less_comparator()) { }
-    void put(bytes&& name, bytes&& val, api::timestamp_type ts) {
-        add(std::move(name), atomic_cell::make_live(*bytes_type, ts, std::move(val), atomic_cell::collection_member::yes));
+    void put(bytes&& name, const bytes& val, api::timestamp_type ts) {
+        add(std::move(name), atomic_cell::make_live(*bytes_type, ts, val, atomic_cell::collection_member::yes));

    }
+    void put(const bytes& name, const bytes& val, api::timestamp_type ts) {
+        add(name, atomic_cell::make_live(*bytes_type, ts, val, atomic_cell::collection_member::yes));
+    }
    void del(bytes&& name, api::timestamp_type ts) {
        add(std::move(name), atomic_cell::make_dead(ts, gc_clock::now()));
    }
+    void del(const bytes& name, api::timestamp_type ts) {
+        add(name, atomic_cell::make_dead(ts, gc_clock::now()));
+    }
    collection_mutation_description to_mut() {
        collection_mutation_description ret;
        for (auto&& e : collected) {
@@ -988,7 +1068,7 @@ public:
    put_or_delete_item(const rjson::value& item, schema_ptr schema, put_item);
    // put_or_delete_item doesn't keep a reference to schema (so it can be
    // moved between shards for LWT) so it needs to be given again to build():
-    mutation build(schema_ptr schema, api::timestamp_type ts);
+    mutation build(schema_ptr schema, api::timestamp_type ts) const;
    const partition_key& pk() const { return _pk; }
    const clustering_key& ck() const { return _ck; }
 };
@@ -1017,20 +1097,29 @@ put_or_delete_item::put_or_delete_item(const rjson::value& item, schema_ptr sche
    }
 }

-mutation put_or_delete_item::build(schema_ptr schema, api::timestamp_type ts) {
+mutation put_or_delete_item::build(schema_ptr schema, api::timestamp_type ts) const {
    mutation m(schema, _pk);
-    auto& row = m.partition().clustered_row(*schema, _ck);
+    // If there's no clustering key, a tombstone should be created directly
+    // on a partition, not on a clustering row - otherwise it will look like
+    // an open-ended range tombstone, which will crash on KA/LA sstable format.
+    // Ref: #6035
+    const bool use_partition_tombstone = schema->clustering_key_size() == 0;
    if (!_cells) {
-        // a DeleteItem operation:
-        row.apply(tombstone(ts, gc_clock::now()));
+        if (use_partition_tombstone) {
+            m.partition().apply(tombstone(ts, gc_clock::now()));
+        } else {
+            // a DeleteItem operation:
+            m.partition().clustered_row(*schema, _ck).apply(tombstone(ts, gc_clock::now()));
+        }
        return m;
    }
    // else, a PutItem operation:
+    auto& row = m.partition().clustered_row(*schema, _ck);
    attribute_collector attrs_collector;
    for (auto& c : *_cells) {
        const column_definition* cdef = schema->get_column_definition(c.column_name);
        if (!cdef) {
-            attrs_collector.put(std::move(c.column_name), std::move(c.value), ts);
+            attrs_collector.put(c.column_name, c.value, ts);
        } else {
            row.cells().apply(*cdef, atomic_cell::make_live(*cdef->type, ts, std::move(c.value)));
        }
@@ -1048,7 +1137,11 @@ mutation put_or_delete_item::build(schema_ptr schema, api::timestamp_type ts) {
    // Scylla proper, to implement the operation to replace an entire
    // collection ("UPDATE .. SET x = ..") - see
    // cql3::update_parameters::make_tombstone_just_before().
-    row.apply(tombstone(ts-1, gc_clock::now()));
+    if (use_partition_tombstone) {
+        m.partition().apply(tombstone(ts-1, gc_clock::now()));
+    } else {
+        row.apply(tombstone(ts-1, gc_clock::now()));
+    }
    return m;
 }

@@ -1090,13 +1183,6 @@ static lw_shared_ptr<query::read_command> previous_item_read_command(schema_ptr
    return ::make_lw_shared<query::read_command>(schema->id(), schema->version(), partition_slice, query::max_partitions);
 }

-static lw_shared_ptr<query::read_command> read_nothing_read_command(schema_ptr schema) {
-    // Note that because this read-nothing command has an empty slice,
-    // storage_proxy::query() returns immediately - without any networking.
-    auto partition_slice = query::partition_slice({}, {}, {}, query::partition_slice::option_set());
-    return ::make_lw_shared<query::read_command>(schema->id(), schema->version(), partition_slice, query::max_partitions);
-}
-
 static dht::partition_range_vector to_partition_ranges(const schema& schema, const partition_key& pk) {
    return dht::partition_range_vector{dht::partition_range(dht::decorate_key(schema, pk))};
 }
@@ -1142,10 +1228,10 @@ rmw_operation::rmw_operation(service::storage_proxy& proxy, rjson::value&& reque
    // the request).
 }

-std::optional<mutation> rmw_operation::apply(query::result& qr, const query::partition_slice& slice, api::timestamp_type ts) {
-    if (qr.row_count()) {
+std::optional<mutation> rmw_operation::apply(foreign_ptr<lw_shared_ptr<query::result>> qr, const query::partition_slice& slice, api::timestamp_type ts) {
+    if (qr->row_count()) {
        auto selection = cql3::selection::selection::wildcard(_schema);
-        auto previous_item = describe_item(_schema, slice, *selection, qr, {});
+        auto previous_item = describe_item(_schema, slice, *selection, *qr, {});
        return apply(std::make_unique<rjson::value>(std::move(previous_item)), ts);
    } else {
        return apply(std::unique_ptr<rjson::value>(), ts);
@@ -1156,22 +1242,9 @@ rmw_operation::write_isolation rmw_operation::get_write_isolation_for_schema(sch
    const auto& tags = get_tags_of_table(schema);
    auto it = tags.find(WRITE_ISOLATION_TAG_KEY);
    if (it == tags.end() || it->second.empty()) {
-        // By default, fall back to always enforcing LWT
-        return write_isolation::LWT_ALWAYS;
-    }
-    switch (it->second[0]) {
-    case 'f':
-        return write_isolation::FORBID_RMW;
-    case 'a':
-        return write_isolation::LWT_ALWAYS;
-    case 'o':
-        return write_isolation::LWT_RMW_ONLY;
-    case 'u':
-        return write_isolation::UNSAFE_RMW;
-    default:
-        // In case of an incorrect tag, fall back to the safest option: LWT_ALWAYS
-        return write_isolation::LWT_ALWAYS;
+        return default_write_isolation;
    }
+    return parse_write_isolation(it->second);
 }

 // shard_for_execute() checks whether execute() must be called on a specific
@@ -1192,7 +1265,7 @@ std::optional<shard_id> rmw_operation::shard_for_execute(bool needs_read_before_
    // find the appropriate shard to run it on:
    auto token = dht::get_token(*_schema, _pk);
    auto desired_shard = service::storage_proxy::cas_shard(*_schema, token);
-    if (desired_shard == engine().cpu_id()) {
+    if (desired_shard == this_shard_id()) {
        return {};
    }
    return desired_shard;
@@ -1202,11 +1275,6 @@ std::optional<shard_id> rmw_operation::shard_for_execute(bool needs_read_before_
 // PutItem, DeleteItem). All these return nothing by default, but can
 // optionally return Attributes if requested via the ReturnValues option.
 static future<executor::request_return_type> rmw_operation_return(rjson::value&& attributes) {
-    // As an optimization, in the simple and common case that nothing is to be
-    // returned, quickly return an empty result:
-    if (attributes.IsNull()) {
-        return make_ready_future<executor::request_return_type>(json_string(""));
-    }
    rjson::value ret = rjson::empty_object();
    if (!attributes.IsNull()) {
        rjson::set(ret, "Attributes", std::move(attributes));
@@ -1222,7 +1290,7 @@ future<executor::request_return_type> rmw_operation::execute(service::storage_pr
        stats& stats) {
    if (needs_read_before_write) {
        if (_write_isolation == write_isolation::FORBID_RMW) {
-            throw api_error("ValidationException", "Read-modify-write operations not supported");
+            throw api_error("ValidationException", "Read-modify-write operations are disabled by 'forbid_rmw' write isolation policy. Refer to https://github.com/scylladb/scylla/blob/master/docs/alternator/alternator.md#write-isolation-policies for more information.");
        }
        stats.reads_before_write++;
        if (_write_isolation == write_isolation::UNSAFE_RMW) {
@@ -1252,7 +1320,7 @@ future<executor::request_return_type> rmw_operation::execute(service::storage_pr
    auto selection = cql3::selection::selection::wildcard(schema());
    auto read_command = needs_read_before_write ?
            previous_item_read_command(schema(), _ck, selection) :
-            read_nothing_read_command(schema());
+            nullptr;
    return proxy.cas(schema(), shared_from_this(), read_command, to_partition_ranges(*schema(), _pk),
            {timeout, std::move(permit), client_state, trace_state},
            db::consistency_level::LOCAL_SERIAL, db::consistency_level::LOCAL_QUORUM, timeout, timeout).then([this, read_command] (bool is_applied) mutable {
@@ -1331,7 +1399,7 @@ public:
               check_needs_read_before_write(_condition_expression) ||
               _returnvalues == returnvalues::ALL_OLD;
    }
-    virtual std::optional<mutation> apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) override {
+    virtual std::optional<mutation> apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) const override {
        std::unordered_set<std::string> used_attribute_values;
        std::unordered_set<std::string> used_attribute_names;
        if (!verify_expected(_request, previous_item) ||
@@ -1343,6 +1411,7 @@ public:
            // efficient than throwing an exception.
            return {};
        }
+        _return_attributes = {};
        if (_returnvalues == returnvalues::ALL_OLD && previous_item) {
            // previous_item is supposed to have been created with
            // describe_item(), so has the "Item" attribute:
@@ -1409,7 +1478,7 @@ public:
                check_needs_read_before_write(_condition_expression) ||
                _returnvalues == returnvalues::ALL_OLD;
    }
-    virtual std::optional<mutation> apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) override {
+    virtual std::optional<mutation> apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) const override {
        std::unordered_set<std::string> used_attribute_values;
        std::unordered_set<std::string> used_attribute_names;
        if (!verify_expected(_request, previous_item) ||
@@ -1421,6 +1490,7 @@ public:
            // efficient than throwing an exception.
            return {};
        }
+        _return_attributes = {};
        if (_returnvalues == returnvalues::ALL_OLD && previous_item) {
            rjson::value* item = rjson::find(*previous_item, "Item");
            if (item) {
@@ -1502,9 +1572,9 @@ public:
    put_or_delete_item_cas_request(schema_ptr s, std::vector<put_or_delete_item>&& b) :
        schema(std::move(s)), _mutation_builders(std::move(b)) { }
    virtual ~put_or_delete_item_cas_request() = default;
-    virtual std::optional<mutation> apply(query::result& qr, const query::partition_slice& slice, api::timestamp_type ts) override {
+    virtual std::optional<mutation> apply(foreign_ptr<lw_shared_ptr<query::result>> qr, const query::partition_slice& slice, api::timestamp_type ts) override {
        std::optional<mutation> ret;
-        for (put_or_delete_item& mutation_builder : _mutation_builders) {
+        for (const put_or_delete_item& mutation_builder : _mutation_builders) {
            // We assume all these builders have the same partition.
            if (ret) {
                ret->apply(mutation_builder.build(schema, ts));
@@ -1519,9 +1589,8 @@ public:
 static future<> cas_write(service::storage_proxy& proxy, schema_ptr schema, dht::decorated_key dk, std::vector<put_or_delete_item>&& mutation_builders,
        service::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit) {
    auto timeout = default_timeout();
-    auto read_command = read_nothing_read_command(schema);
    auto op = seastar::make_shared<put_or_delete_item_cas_request>(schema, std::move(mutation_builders));
-    return proxy.cas(schema, op, read_command, to_partition_ranges(dk),
+    return proxy.cas(schema, op, nullptr, to_partition_ranges(dk),
            {timeout, std::move(permit), client_state, trace_state},
            db::consistency_level::LOCAL_SERIAL, db::consistency_level::LOCAL_QUORUM,
            timeout, timeout).discard_result();
@@ -1597,7 +1666,7 @@ static future<> do_batch_write(service::storage_proxy& proxy,
        return parallel_for_each(std::move(key_builders), [&proxy, &client_state, &stats, trace_state, ssg, permit = std::move(permit)] (auto& e) {
            stats.write_using_lwt++;
            auto desired_shard = service::storage_proxy::cas_shard(*e.first.schema, e.first.dk.token());
-            if (desired_shard == engine().cpu_id()) {
+            if (desired_shard == this_shard_id()) {
                return cas_write(proxy, e.first.schema, e.first.dk, std::move(e.second), client_state, trace_state, permit);
            } else {
                stats.shard_bounce_for_lwt++;
@@ -2329,7 +2398,7 @@ public:

    update_item_operation(service::storage_proxy& proxy, rjson::value&& request);
    virtual ~update_item_operation() = default;
-    virtual std::optional<mutation> apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) override;
+    virtual std::optional<mutation> apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) const override;
    bool needs_read_before_write() const;
 };

@@ -2393,7 +2462,7 @@ update_item_operation::needs_read_before_write() const {
 }

 std::optional<mutation>
-update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) {
+update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) const {
    std::unordered_set<std::string> used_attribute_values;
    std::unordered_set<std::string> used_attribute_names;
    if (!verify_expected(_request, previous_item) ||
@@ -2773,6 +2842,7 @@ future<executor::request_return_type> executor::batch_get_item(client_state& cli
            [] (std::vector<std::tuple<std::string, std::optional<rjson::value>>> responses) {
        rjson::value response = rjson::empty_object();
        rjson::set(response, "Responses", rjson::empty_object());
+        rjson::set(response, "UnprocessedKeys", rjson::empty_object());
        for (auto& t : responses) {
            if (!response["Responses"].HasMember(std::get<0>(t).c_str())) {
                rjson::set_with_string_name(response["Responses"], std::get<0>(t), rjson::empty_array());
@@ -2889,6 +2959,7 @@ static future<executor::request_return_type> do_query(schema_ptr schema,
        uint32_t limit,
        db::consistency_level cl,
        ::shared_ptr<cql3::restrictions::statement_restrictions> filtering_restrictions,
+        query::partition_slice::option_set custom_opts,
        service::client_state& client_state,
        cql3::cql_stats& cql_stats,
        tracing::trace_state_ptr trace_state,
@@ -2908,8 +2979,12 @@ static future<executor::request_return_type> do_query(schema_ptr schema,

    auto regular_columns = boost::copy_range<query::column_id_vector>(
            schema->regular_columns() | boost::adaptors::transformed([] (const column_definition& cdef) { return cdef.id; }));
+    auto static_columns = boost::copy_range<query::column_id_vector>(
+            schema->static_columns() | boost::adaptors::transformed([] (const column_definition& cdef) { return cdef.id; }));
    auto selection = cql3::selection::selection::wildcard(schema);
-    auto partition_slice = query::partition_slice(std::move(ck_bounds), {}, std::move(regular_columns), selection->get_query_options());
+    query::partition_slice::option_set opts = selection->get_query_options();
+    opts.add(custom_opts);
+    auto partition_slice = query::partition_slice(std::move(ck_bounds), std::move(static_columns), std::move(regular_columns), opts);
    auto command = ::make_lw_shared<query::read_command>(schema->id(), schema->version(), partition_slice, query::max_partitions);

    auto query_state_ptr = std::make_unique<service::query_state>(client_state, trace_state, std::move(permit));
@@ -2939,11 +3014,38 @@ static future<executor::request_return_type> do_query(schema_ptr schema,
    });
 }

+static dht::token token_for_segment(int segment, int total_segments) {
+    assert(total_segments > 1 && segment >= 0 && segment < total_segments);
+    uint64_t delta = std::numeric_limits<uint64_t>::max() / total_segments;
+    return dht::token::from_int64(std::numeric_limits<int64_t>::min() + delta * segment);
+}
+
+static dht::partition_range get_range_for_segment(int segment, int total_segments) {
+    if (total_segments == 1) {
+        return dht::partition_range::make_open_ended_both_sides();
+    }
+    if (segment == 0) {
+        dht::token ending_token = token_for_segment(1, total_segments);
+        return dht::partition_range::make_ending_with(
+                dht::partition_range::bound(dht::ring_position::ending_at(ending_token), false));
+    } else if (segment == total_segments - 1) {
+        dht::token starting_token = token_for_segment(segment, total_segments);
+        return dht::partition_range::make_starting_with(
+                dht::partition_range::bound(dht::ring_position::starting_at(starting_token)));
+    } else {
+        dht::token starting_token = token_for_segment(segment, total_segments);
+        dht::token ending_token = token_for_segment(segment + 1, total_segments);
+        return dht::partition_range::make(
+            dht::partition_range::bound(dht::ring_position::starting_at(starting_token)),
+            dht::partition_range::bound(dht::ring_position::ending_at(ending_token), false)
+        );
+    }
+}
+
 // TODO(sarna):
 // 1. Paging must have 1MB boundary according to the docs. IIRC we do have a replica-side reply size limit though - verify.
 // 2. Filtering - by passing appropriately created restrictions to pager as a last parameter
 // 3. Proper timeouts instead of gc_clock::now() and db::no_timeout
-// 4. Implement parallel scanning via Segments
 future<executor::request_return_type> executor::scan(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request) {
    _stats.api_operations.scan++;
    elogger.trace("Scanning {}", request);
@@ -2954,10 +3056,21 @@ future<executor::request_return_type> executor::scan(client_state& client_state,
        return make_ready_future<request_return_type>(api_error("ValidationException",
                "FilterExpression is not yet implemented in alternator"));
    }
-    if (get_int_attribute(request, "Segment") || get_int_attribute(request, "TotalSegments")) {
-        // FIXME: need to support parallel scan. See issue #5059.
-        return make_ready_future<request_return_type>(api_error("ValidationException",
-                "Scan Segment/TotalSegments is not yet implemented in alternator"));
+    auto segment = get_int_attribute(request, "Segment");
+    auto total_segments = get_int_attribute(request, "TotalSegments");
+    if (segment || total_segments) {
+        if (!segment || !total_segments) {
+            return make_ready_future<request_return_type>(api_error("ValidationException",
+                    "Both Segment and TotalSegments attributes need to be present for a parallel scan"));
+        }
+        if (*segment < 0 || *segment >= *total_segments) {
+            return make_ready_future<request_return_type>(api_error("ValidationException",
+                    "Segment must be non-negative and less than TotalSegments"));
+        }
+        if (*total_segments < 0 || *total_segments > 1000000) {
+            return make_ready_future<request_return_type>(api_error("ValidationException",
+                    "TotalSegments must be non-negative and less or equal to 1000000"));
+        }
    }

    rjson::value* exclusive_start_key = rjson::find(request, "ExclusiveStartKey");
@@ -2976,7 +3089,12 @@ future<executor::request_return_type> executor::scan(client_state& client_state,

    auto attrs_to_get = calculate_attrs_to_get(request);

-    dht::partition_range_vector partition_ranges{dht::partition_range::make_open_ended_both_sides()};
+    dht::partition_range_vector partition_ranges;
+    if (segment) {
+        partition_ranges.push_back(get_range_for_segment(*segment, *total_segments));
+    } else {
+        partition_ranges.push_back(dht::partition_range::make_open_ended_both_sides());
+    }
    std::vector<query::clustering_range> ck_bounds{query::clustering_range::make_open_ended_both_sides()};

    ::shared_ptr<cql3::restrictions::statement_restrictions> filtering_restrictions;
@@ -2986,14 +3104,15 @@ future<executor::request_return_type> executor::scan(client_state& client_state,
        partition_ranges = filtering_restrictions->get_partition_key_ranges(query_options);
        ck_bounds = filtering_restrictions->get_clustering_bounds(query_options);
    }
-    return do_query(schema, exclusive_start_key, std::move(partition_ranges), std::move(ck_bounds), std::move(attrs_to_get), limit, cl, std::move(filtering_restrictions), client_state, _stats.cql_stats, trace_state, std::move(permit));
+    return do_query(schema, exclusive_start_key, std::move(partition_ranges), std::move(ck_bounds), std::move(attrs_to_get), limit, cl,
+            std::move(filtering_restrictions), query::partition_slice::option_set(), client_state, _stats.cql_stats, trace_state, std::move(permit));
 }

 static dht::partition_range calculate_pk_bound(schema_ptr schema, const column_definition& pk_cdef, comparison_operator_type op, const rjson::value& attrs) {
    if (attrs.Size() != 1) {
        throw api_error("ValidationException", format("Only a single attribute is allowed for a hash key restriction: {}", attrs));
    }
-    bytes raw_value = pk_cdef.type->from_string(attrs[0][type_to_string(pk_cdef.type)].GetString());
+    bytes raw_value = get_key_from_typed_value(attrs[0], pk_cdef);
    partition_key pk = partition_key::from_singular(*schema, pk_cdef.type->deserialize(raw_value));
    auto decorated_key = dht::decorate_key(*schema, pk);
    if (op != comparison_operator_type::EQ) {
@@ -3018,7 +3137,7 @@ static query::clustering_range calculate_ck_bound(schema_ptr schema, const colum
    if (attrs.Size() != expected_attrs_size) {
        throw api_error("ValidationException", format("{} arguments expected for a sort key restriction: {}", expected_attrs_size, attrs));
    }
-    bytes raw_value = ck_cdef.type->from_string(attrs[0][type_to_string(ck_cdef.type)].GetString());
+    bytes raw_value = get_key_from_typed_value(attrs[0], ck_cdef);
    clustering_key ck = clustering_key::from_single_value(*schema, raw_value);
    switch (op) {
    case comparison_operator_type::EQ:
@@ -3032,7 +3151,7 @@ static query::clustering_range calculate_ck_bound(schema_ptr schema, const colum
    case comparison_operator_type::GT:
        return query::clustering_range::make_starting_with(query::clustering_range::bound(ck, false));
    case comparison_operator_type::BETWEEN: {
-        bytes raw_upper_limit = ck_cdef.type->from_string(attrs[1][type_to_string(ck_cdef.type)].GetString());
+        bytes raw_upper_limit = get_key_from_typed_value(attrs[1], ck_cdef);
        clustering_key upper_limit = clustering_key::from_single_value(*schema, raw_upper_limit);
        return query::clustering_range::make(query::clustering_range::bound(ck), query::clustering_range::bound(upper_limit));
    }
@@ -3045,9 +3164,7 @@ static query::clustering_range calculate_ck_bound(schema_ptr schema, const colum
        if (!ck_cdef.type->is_compatible_with(*utf8_type)) {
            throw api_error("ValidationException", format("BEGINS_WITH operator cannot be applied to type {}", type_to_string(ck_cdef.type)));
        }
-        std::string raw_upper_limit_str = attrs[0][type_to_string(ck_cdef.type)].GetString();
-        bytes raw_upper_limit = ck_cdef.type->from_string(raw_upper_limit_str);
-        return get_clustering_range_for_begins_with(std::move(raw_upper_limit), ck, schema, ck_cdef.type);
+        return get_clustering_range_for_begins_with(std::move(raw_value), ck, schema, ck_cdef.type);
    }
    default:
        throw api_error("ValidationException", format("Unknown primary key bound passed: {}", int(op)));
@@ -3429,11 +3546,7 @@ future<executor::request_return_type> executor::query(client_state& client_state
    if (rjson::find(request, "FilterExpression")) {
        return make_ready_future<request_return_type>(api_error("ValidationException", "FilterExpression is not yet implemented in alternator"));
    }
-    bool forward = get_bool_attribute(request, "ScanIndexForward", true);
-    if (!forward) {
-        // FIXME: need to support the !forward (i.e., reverse sort order) case. See issue #5153.
-        return make_ready_future<request_return_type>(api_error("ValidationException", "ScanIndexForward=false is not yet implemented in alternator"));
-    }
+    const bool forward = get_bool_attribute(request, "ScanIndexForward", true);

    rjson::value* key_conditions = rjson::find(request, "KeyConditions");
    rjson::value* key_condition_expression = rjson::find(request, "KeyConditionExpression");
@@ -3476,7 +3589,10 @@ future<executor::request_return_type> executor::query(client_state& client_state
    }
    verify_all_are_used(request, "ExpressionAttributeValues", used_attribute_values, "KeyConditionExpression");
    verify_all_are_used(request, "ExpressionAttributeNames", used_attribute_names, "KeyConditionExpression");
-    return do_query(schema, exclusive_start_key, std::move(partition_ranges), std::move(ck_bounds), std::move(attrs_to_get), limit, cl, std::move(filtering_restrictions), client_state, _stats.cql_stats, std::move(trace_state), std::move(permit));
+    query::partition_slice::option_set opts;
+    opts.set_if<query::partition_slice::option::reversed>(!forward);
+    return do_query(schema, exclusive_start_key, std::move(partition_ranges), std::move(ck_bounds), std::move(attrs_to_get), limit, cl,
+            std::move(filtering_restrictions), opts, client_state, _stats.cql_stats, std::move(trace_state), std::move(permit));
 }

 future<executor::request_return_type> executor::list_tables(client_state& client_state, service_permit permit, rjson::value request) {
@@ -3567,12 +3683,12 @@ static std::map<sstring, sstring> get_network_topology_options(int rf) {
 // manually create the keyspace to override this predefined behavior.
 future<> executor::create_keyspace(std::string_view keyspace_name) {
    sstring keyspace_name_str(keyspace_name);
-    return gms::get_up_endpoint_count().then([this, keyspace_name_str = std::move(keyspace_name_str)] (int up_endpoint_count) {
+    return gms::get_all_endpoint_count().then([this, keyspace_name_str = std::move(keyspace_name_str)] (int endpoint_count) {
        int rf = 3;
-        if (up_endpoint_count < rf) {
+        if (endpoint_count < rf) {
            rf = 1;
-            elogger.warn("Creating keyspace '{}' for Alternator with unsafe RF={} because cluster only has {} live nodes.",
-                    keyspace_name_str, rf, up_endpoint_count);
+            elogger.warn("Creating keyspace '{}' for Alternator with unsafe RF={} because cluster only has {} nodes.",
+                    keyspace_name_str, rf, endpoint_count);
        }
        auto opts = get_network_topology_options(rf);
        auto ksm = keyspace_metadata::new_keyspace(keyspace_name_str, "org.apache.cassandra.locator.NetworkTopologyStrategy", std::move(opts), true);
--- a/alternator/executor.hh
+++ b/alternator/executor.hh
@@ -50,6 +50,7 @@ public:
    stats _stats;
    static constexpr auto ATTRS_COLUMN_NAME = ":attrs";
    static constexpr auto KEYSPACE_NAME_PREFIX = "alternator_";
+    static constexpr std::string_view INTERNAL_TABLE_PREFIX = ".scylla.alternator.";

    executor(service::storage_proxy& proxy, service::migration_manager& mm, smp_service_group ssg)
        : _proxy(proxy), _mm(mm), _ssg(ssg) {}
--- a/alternator/rjson.cc
+++ b/alternator/rjson.cc
@@ -123,7 +123,7 @@ protected:

 std::string print(const rjson::value& value) {
    string_buffer buffer;
-    guarded_yieldable_json_handler<writer, false> writer(buffer, 39);
+    guarded_yieldable_json_handler<writer, false> writer(buffer, 78);
    value.Accept(writer);
    return std::string(buffer.GetString());
 }
@@ -133,7 +133,7 @@ rjson::value copy(const rjson::value& value) {
 }

 rjson::value parse(std::string_view str) {
-    guarded_yieldable_json_handler<document, false> d(39);
+    guarded_yieldable_json_handler<document, false> d(78);
    d.Parse(str.data(), str.size());
    if (d.HasParseError()) {
        throw rjson::error(format("Parsing JSON failed: {}", GetParseError_En(d.GetParseError())));
@@ -143,7 +143,7 @@ rjson::value parse(std::string_view str) {
 }

 rjson::value parse_yieldable(std::string_view str) {
-    guarded_yieldable_json_handler<document, true> d(39);
+    guarded_yieldable_json_handler<document, true> d(78);
    d.Parse(str.data(), str.size());
    if (d.HasParseError()) {
        throw rjson::error(format("Parsing JSON failed: {}", GetParseError_En(d.GetParseError())));
--- a/alternator/rmw_operation.hh
+++ b/alternator/rmw_operation.hh
@@ -63,6 +63,10 @@ public:

    static write_isolation get_write_isolation_for_schema(schema_ptr schema);

+    static write_isolation default_write_isolation;
+public:
+    static void set_default_write_isolation(std::string_view mode);
+
 protected:
    // The full request JSON
    rjson::value _request;
@@ -83,7 +87,11 @@ protected:
    // When _returnvalues != NONE, apply() should store here, in JSON form,
    // the values which are to be returned in the "Attributes" field.
    // The default null JSON means do not return an Attributes field at all.
-    rjson::value _return_attributes;
+    // This field is marked "mutable" so that the const apply() can modify
+    // it (see explanation below), but note that because apply() may be
+    // called more than once, if apply() will sometimes set this field it
+    // must set it (even if just to the default empty value) every time.
+    mutable rjson::value _return_attributes;
 public:
    // The constructor of a rmw_operation subclass should parse the request
    // and try to discover as many input errors as it can before really
@@ -96,9 +104,14 @@ public:
    // conditional expression, apply() should return an empty optional.
    // apply() may throw if it encounters input errors not discovered during
    // the constructor.
-    virtual std::optional<mutation> apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) = 0;
+    // apply() may be called more than once in case of contention, so it must
+    // not change the state saved in the object (issue #7218 was caused by
+    // violating this). We mark apply() "const" to let the compiler validate
+    // this for us. The output-only field _return_attributes is marked
+    // "mutable" above so that apply() can still write to it.
+    virtual std::optional<mutation> apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) const = 0;
    // Convert the above apply() into the signature needed by cas_request:
-    virtual std::optional<mutation> apply(query::result& qr, const query::partition_slice& slice, api::timestamp_type ts) override;
+    virtual std::optional<mutation> apply(foreign_ptr<lw_shared_ptr<query::result>> qr, const query::partition_slice& slice, api::timestamp_type ts) override;
    virtual ~rmw_operation() = default;
    schema_ptr schema() const { return _schema; }
    const rjson::value& request() const { return _request; }
--- a/alternator/serialization.cc
+++ b/alternator/serialization.cc
@@ -121,7 +121,7 @@ struct to_json_visitor {
    }
    // default
    void operator()(const abstract_type& t) const {
-        rjson::set_with_string_name(deserialized, type_ident, rjson::parse(t.to_string(bytes(bv))));
+        rjson::set_with_string_name(deserialized, type_ident, rjson::parse(to_json_string(t, bytes(bv))));
    }
 };

@@ -153,7 +153,9 @@ std::string type_to_string(data_type type) {
    };
    auto it = types.find(type);
    if (it == types.end()) {
-        throw std::runtime_error(format("Unknown type {}", type->name()));
+        // fall back to string, in order to be able to present
+        // internal Scylla types in a human-readable way
+        return "S";
    }
    return it->second;
 }
@@ -205,8 +207,11 @@ rjson::value json_key_column_value(bytes_view cell, const column_definition& col
        auto s = to_json_string(*decimal_type, bytes(cell));
        return rjson::from_string(s);
    } else {
-        // We shouldn't get here, we shouldn't see such key columns.
-        throw std::runtime_error(format("Unexpected key type: {}", column.type->name()));
+        // Support for arbitrary key types is useful for parsing values of virtual tables,
+        // which can involve any type supported by Scylla.
+        // In order to guarantee that the returned type is parsable by alternator clients,
+        // they are represented simply as strings.
+        return rjson::from_string(column.type->to_string(bytes(cell)));
    }
 }

--- a/alternator/server.cc
+++ b/alternator/server.cc
@@ -69,7 +69,7 @@ class api_handler : public handler_base {
 public:
    api_handler(const std::function<future<executor::request_return_type>(std::unique_ptr<request> req)>& _handle) : _f_handle(
         [this, _handle](std::unique_ptr<request> req, std::unique_ptr<reply> rep) {
-         return seastar::futurize_apply(_handle, std::move(req)).then_wrapped([this, rep = std::move(rep)](future<executor::request_return_type> resf) mutable {
+         return seastar::futurize_invoke(_handle, std::move(req)).then_wrapped([this, rep = std::move(rep)](future<executor::request_return_type> resf) mutable {
             if (resf.failed()) {
                 // Exceptions of type api_error are wrapped as JSON and
                 // returned to the client as expected. Other types of
@@ -409,15 +409,19 @@ future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std:
                _http_server.set_content_length_limit(server::content_length_limit);
                _http_server.listen(socket_address{addr, *port}).get();
                _enabled_servers.push_back(std::ref(_http_server));
-                slogger.info("Alternator HTTP server listening on {} port {}", addr, *port);
            }
            if (https_port) {
                set_routes(_https_server._routes);
                _https_server.set_content_length_limit(server::content_length_limit);
-                _https_server.set_tls_credentials(creds->build_server_credentials());
+                _https_server.set_tls_credentials(creds->build_reloadable_server_credentials([](const std::unordered_set<sstring>& files, std::exception_ptr ep) {
+                    if (ep) {
+                        slogger.warn("Exception loading {}: {}", files, ep);
+                    } else {
+                        slogger.info("Reloaded {}", files);
+                    }
+                }).get0());
                _https_server.listen(socket_address{addr, *https_port}).get();
                _enabled_servers.push_back(std::ref(_https_server));
-                slogger.info("Alternator HTTPS server listening on {} port {}", addr, *https_port);
            }
        } catch (...) {
            slogger.error("Failed to set up Alternator HTTP server on {} port {}, TLS port {}: {}",
--- a/api/api-doc/column_family.json
+++ b/api/api-doc/column_family.json
@@ -380,16 +380,54 @@
         "operations":[
            {
               "method":"GET",
-               "summary":"check if the auto compaction disabled",
+               "summary":"check if the auto_compaction property is enabled for a given table",
               "type":"boolean",
-               "nickname":"is_auto_compaction_disabled",
+               "nickname":"get_auto_compaction",
               "produces":[
                  "application/json"
               ],
               "parameters":[
                  {
                     "name":"name",
-                     "description":"The column family name in keyspace:name format",
+                     "description":"The table name in keyspace:name format",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"path"
+                  }
+               ]
+            },
+            {
+               "method":"POST",
+               "summary":"Enable table auto compaction",
+               "type":"void",
+               "nickname":"enable_auto_compaction",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"name",
+                     "description":"The table name in keyspace:name format",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"path"
+                  }
+               ]
+            },
+            {
+               "method":"DELETE",
+               "summary":"Disable table auto compaction",
+               "type":"void",
+               "nickname":"disable_auto_compaction",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"name",
+                     "description":"The table name in keyspace:name format",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
--- a/api/cache_service.cc
+++ b/api/cache_service.cc
@@ -208,9 +208,11 @@ void set_cache_service(http_context& ctx, routes& r) {
    });

    cs::get_row_capacity.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, uint64_t(0), [](const column_family& cf) {
-            return cf.get_row_cache().get_cache_tracker().region().occupancy().used_space();
-        }, std::plus<uint64_t>());
+        return ctx.db.map_reduce0([](database& db) -> uint64_t {
+            return db.row_cache_tracker().region().occupancy().used_space();
+        }, uint64_t(0), std::plus<uint64_t>()).then([](const int64_t& res) {
+            return make_ready_future<json::json_return_type>(res);
+        });
    });

    cs::get_row_hits.set(r, [&ctx] (std::unique_ptr<request> req) {
@@ -251,15 +253,19 @@ void set_cache_service(http_context& ctx, routes& r) {
    cs::get_row_size.set(r, [&ctx] (std::unique_ptr<request> req) {
        // In origin row size is the weighted size.
        // We currently do not support weights, so we use num entries instead
-        return map_reduce_cf(ctx, 0, [](const column_family& cf) {
-            return cf.get_row_cache().partitions();
-        }, std::plus<uint64_t>());
+        return ctx.db.map_reduce0([](database& db) -> uint64_t {
+            return db.row_cache_tracker().partitions();
+        }, uint64_t(0), std::plus<uint64_t>()).then([](const int64_t& res) {
+            return make_ready_future<json::json_return_type>(res);
+        });
    });

    cs::get_row_entries.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, 0, [](const column_family& cf) {
-            return cf.get_row_cache().partitions();
-        }, std::plus<uint64_t>());
+        return ctx.db.map_reduce0([](database& db) -> uint64_t {
+            return db.row_cache_tracker().partitions();
+        }, uint64_t(0), std::plus<uint64_t>()).then([](const int64_t& res) {
+            return make_ready_future<json::json_return_type>(res);
+        });
    });

    cs::get_counter_capacity.set(r, [] (std::unique_ptr<request> req) {
--- a/api/column_family.cc
+++ b/api/column_family.cc
@@ -804,14 +804,14 @@ void set_column_family(http_context& ctx, routes& r) {

    cf::get_cas_propose.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
-            return cf.get_stats().estimated_cas_propose;
+            return cf.get_stats().estimated_cas_accept;
        },
        utils::estimated_histogram_merge, utils_json::estimated_histogram());
    });

    cf::get_cas_commit.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
-            return cf.get_stats().estimated_cas_commit;
+            return cf.get_stats().estimated_cas_learn;
        },
        utils::estimated_histogram_merge, utils_json::estimated_histogram());
    });
@@ -839,11 +839,26 @@ void set_column_family(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(res);
    });

-    cf::is_auto_compaction_disabled.set(r, [] (const_req req) {
-        // FIXME
-        // currently auto compaction is disable
-        // it should be changed when it would have an API
-        return true;
+    cf::get_auto_compaction.set(r, [&ctx] (const_req req) {
+        const utils::UUID& uuid = get_uuid(req.param["name"], ctx.db.local());
+        column_family& cf = ctx.db.local().find_column_family(uuid);
+        return !cf.is_auto_compaction_disabled_by_user();
+    });
+
+    cf::enable_auto_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
+        return foreach_column_family(ctx, req->param["name"], [](column_family &cf) {
+            cf.enable_auto_compaction();
+        }).then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
+    });
+
+    cf::disable_auto_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
+        return foreach_column_family(ctx, req->param["name"], [](column_family &cf) {
+            cf.disable_auto_compaction();
+        }).then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
    });

    cf::get_built_indexes.set(r, [&ctx](std::unique_ptr<request> req) {
--- a/api/error_injection.cc
+++ b/api/error_injection.cc
@@ -37,8 +37,9 @@ void set_error_injection(http_context& ctx, routes& r) {
        sstring injection = req->param["injection"];
        bool one_shot = req->get_query_param("one_shot") == "True";
        auto& errinj = utils::get_local_injector();
-        errinj.enable_on_all(injection, one_shot);
-        return make_ready_future<json::json_return_type>(json::json_void());
+        return errinj.enable_on_all(injection, one_shot).then([] {
+            return make_ready_future<json::json_return_type>(json::json_void());
+        });
    });

    hf::get_enabled_injections_on_all.set(r, [](std::unique_ptr<request> req) {
@@ -51,14 +52,16 @@ void set_error_injection(http_context& ctx, routes& r) {
        sstring injection = req->param["injection"];

        auto& errinj = utils::get_local_injector();
-        errinj.disable_on_all(injection);
-        return make_ready_future<json::json_return_type>(json::json_void());
+        return errinj.disable_on_all(injection).then([] {
+            return make_ready_future<json::json_return_type>(json::json_void());
+        });
    });

    hf::disable_on_all.set(r, [](std::unique_ptr<request> req) {
        auto& errinj = utils::get_local_injector();
-        errinj.disable_on_all();
-        return make_ready_future<json::json_return_type>(json::json_void());
+        return errinj.disable_on_all().then([] {
+            return make_ready_future<json::json_return_type>(json::json_void());
+        });
    });

 }
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -54,26 +54,22 @@ static sstring validate_keyspace(http_context& ctx, const parameters& param) {
    throw bad_param_exception("Keyspace " + param["keyspace"] + " Does not exist");
 }

-static std::vector<ss::token_range> describe_ring(const sstring& keyspace) {
-    std::vector<ss::token_range> res;
-    for (auto d : service::get_local_storage_service().describe_ring(keyspace)) {
-        ss::token_range r;
-        r.start_token = d._start_token;
-        r.end_token = d._end_token;
-        r.endpoints = d._endpoints;
-        r.rpc_endpoints = d._rpc_endpoints;
-        for (auto det : d._endpoint_details) {
-            ss::endpoint_detail ed;
-            ed.host = det._host;
-            ed.datacenter = det._datacenter;
-            if (det._rack != "") {
-                ed.rack = det._rack;
-            }
-            r.endpoint_details.push(ed);
+static ss::token_range token_range_endpoints_to_json(const dht::token_range_endpoints& d) {
+    ss::token_range r;
+    r.start_token = d._start_token;
+    r.end_token = d._end_token;
+    r.endpoints = d._endpoints;
+    r.rpc_endpoints = d._rpc_endpoints;
+    for (auto det : d._endpoint_details) {
+        ss::endpoint_detail ed;
+        ed.host = det._host;
+        ed.datacenter = det._datacenter;
+        if (det._rack != "") {
+            ed.rack = det._rack;
        }
-        res.push_back(r);
+        r.endpoint_details.push(ed);
    }
-    return res;
+    return r;
 }

 using ks_cf_func = std::function<future<json::json_return_type>(http_context&, std::unique_ptr<request>, sstring, std::vector<sstring>)>;
@@ -89,6 +85,23 @@ static auto wrap_ks_cf(http_context &ctx, ks_cf_func f) {
    };
 }

+future<> set_tables_autocompaction(http_context& ctx, const sstring &keyspace, std::vector<sstring> tables, bool enabled) {
+    if (tables.empty()) {
+        tables = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
+    }
+    return ctx.db.invoke_on_all([keyspace, tables, enabled] (database& db) {
+        return parallel_for_each(tables, [&db, keyspace, enabled](const sstring& table) mutable {
+            column_family& cf = db.find_column_family(keyspace, table);
+            if (enabled) {
+                cf.enable_auto_compaction();
+            } else {
+                cf.disable_auto_compaction();
+            }
+            return make_ready_future<>();
+        });
+    });
+}
+
 void set_storage_service(http_context& ctx, routes& r) {
    ss::local_hostid.set(r, [](std::unique_ptr<request> req) {
        return db::system_keyspace::get_local_host_id().then([](const utils::UUID& id) {
@@ -175,13 +188,13 @@ void set_storage_service(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(res);
    });

-    ss::describe_any_ring.set(r, [&ctx](const_req req) {
-        return describe_ring("");
+    ss::describe_any_ring.set(r, [&ctx](std::unique_ptr<request> req) {
+        return make_ready_future<json::json_return_type>(stream_range_as_array(service::get_local_storage_service().describe_ring(""), token_range_endpoints_to_json));
    });

-    ss::describe_ring.set(r, [&ctx](const_req req) {
-        auto keyspace = validate_keyspace(ctx, req.param);
-        return describe_ring(keyspace);
+    ss::describe_ring.set(r, [&ctx](std::unique_ptr<request> req) {
+        auto keyspace = validate_keyspace(ctx, req->param);
+        return make_ready_future<json::json_return_type>(stream_range_as_array(service::get_local_storage_service().describe_ring(keyspace), token_range_endpoints_to_json));
    });

    ss::get_host_id_map.set(r, [&ctx](const_req req) {
@@ -256,8 +269,8 @@ void set_storage_service(http_context& ctx, routes& r) {
                for (auto cf : column_families) {
                    column_families_vec.push_back(&db.find_column_family(keyspace, cf));
                }
-                return parallel_for_each(column_families_vec, [&cm] (column_family* cf) {
-                    return cm.perform_cleanup(cf);
+                return parallel_for_each(column_families_vec, [&cm, &db] (column_family* cf) {
+                    return cm.perform_cleanup(db, cf);
                });
            }).then([]{
                return make_ready_future<json::json_return_type>(0);
@@ -648,7 +661,7 @@ void set_storage_service(http_context& ctx, routes& r) {

    ss::set_trace_probability.set(r, [](std::unique_ptr<request> req) {
        auto probability = req->get_query_param("probability");
-        return futurize<json::json_return_type>::apply([probability] {
+        return futurize_invoke([probability] {
            double real_prob = std::stod(probability.c_str());
            return tracing::tracing::tracing_instance().invoke_on_all([real_prob] (auto& local_tracing) {
                local_tracing.set_trace_probability(real_prob);
@@ -703,19 +716,19 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::enable_auto_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
        auto keyspace = validate_keyspace(ctx, req->param);
-        auto column_family = req->get_query_param("cf");
-        return make_ready_future<json::json_return_type>(json_void());
+        auto tables = split_cf(req->get_query_param("cf"));
+        return set_tables_autocompaction(ctx, keyspace, tables, true).then([]{
+            return make_ready_future<json::json_return_type>(json_void());
+        });
    });

    ss::disable_auto_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
        auto keyspace = validate_keyspace(ctx, req->param);
-        auto column_family = req->get_query_param("cf");
-        return make_ready_future<json::json_return_type>(json_void());
+        auto tables = split_cf(req->get_query_param("cf"));
+        return set_tables_autocompaction(ctx, keyspace, tables, false).then([]{
+            return make_ready_future<json::json_return_type>(json_void());
+        });
    });

    ss::deliver_hints.set(r, [](std::unique_ptr<request> req) {
@@ -1000,6 +1013,9 @@ void set_snapshot(http_context& ctx, routes& r) {
        if (column_family.empty()) {
            resp = service::get_local_storage_service().take_snapshot(tag, keynames);
        } else {
+            if (keynames.empty()) {
+                throw httpd::bad_param_exception("The keyspace of column families must be specified");
+            }
            if (keynames.size() > 1) {
                throw httpd::bad_param_exception("Only one keyspace allowed when specifying a column family");
            }
--- a/api/system.cc
+++ b/api/system.cc
@@ -22,6 +22,7 @@
 #include "api/api-doc/system.json.hh"
 #include "api/api.hh"

+#include <seastar/core/reactor.hh>
 #include <seastar/http/exception.hh>
 #include "log.hh"

--- a/auth/common.cc
+++ b/auth/common.cc
@@ -65,16 +65,16 @@ static future<> create_metadata_table_if_missing_impl(
        std::string_view cql,
        ::service::migration_manager& mm) {
    static auto ignore_existing = [] (seastar::noncopyable_function<future<>()> func) {
-        return futurize_apply(std::move(func)).handle_exception_type([] (exceptions::already_exists_exception& ignored) { });
+        return futurize_invoke(std::move(func)).handle_exception_type([] (exceptions::already_exists_exception& ignored) { });
    };
    auto& db = qp.db();
-    auto parsed_statement = static_pointer_cast<cql3::statements::raw::cf_statement>(
-            cql3::query_processor::parse_statement(cql));
+    auto parsed_statement = cql3::query_processor::parse_statement(cql);
+    auto& parsed_cf_statement = static_cast<cql3::statements::raw::cf_statement&>(*parsed_statement);

-    parsed_statement->prepare_keyspace(meta::AUTH_KS);
+    parsed_cf_statement.prepare_keyspace(meta::AUTH_KS);

    auto statement = static_pointer_cast<cql3::statements::create_table_statement>(
-            parsed_statement->prepare(db, qp.get_cql_stats())->statement);
+            parsed_cf_statement.prepare(db, qp.get_cql_stats())->statement);

    const auto schema = statement->get_cf_meta_data(qp.db());
    const auto uuid = generate_legacy_id(schema->ks_name(), schema->cf_name());
@@ -92,7 +92,7 @@ future<> create_metadata_table_if_missing(
        cql3::query_processor& qp,
        std::string_view cql,
        ::service::migration_manager& mm) noexcept {
-    return futurize_apply(create_metadata_table_if_missing_impl, table_name, qp, cql, mm);
+    return futurize_invoke(create_metadata_table_if_missing_impl, table_name, qp, cql, mm);
 }

 future<> wait_for_schema_agreement(::service::migration_manager& mm, const database& db, seastar::abort_source& as) {
--- a/auth/common.hh
+++ b/auth/common.hh
@@ -27,9 +27,10 @@
 #include <seastar/core/future.hh>
 #include <seastar/core/abort_source.hh>
 #include <seastar/util/noncopyable_function.hh>
-#include <seastar/core/reactor.hh>
+#include <seastar/core/seastar.hh>
 #include <seastar/core/resource.hh>
 #include <seastar/core/sstring.hh>
+#include <seastar/core/smp.hh>

 #include "log.hh"
 #include "seastarx.hh"
@@ -61,7 +62,7 @@ extern const sstring AUTH_PACKAGE_NAME;

 template <class Task>
 future<> once_among_shards(Task&& f) {
-    if (engine().cpu_id() == 0u) {
+    if (this_shard_id() == 0u) {
        return f();
    }

--- a/auth/default_authorizer.cc
+++ b/auth/default_authorizer.cc
@@ -51,7 +51,7 @@ extern "C" {

 #include <boost/algorithm/string/join.hpp>
 #include <boost/range.hpp>
-#include <seastar/core/reactor.hh>
+#include <seastar/core/seastar.hh>

 #include "auth/authenticated_user.hh"
 #include "auth/common.hh"
--- a/auth/password_authenticator.cc
+++ b/auth/password_authenticator.cc
@@ -48,7 +48,7 @@
 #include <optional>

 #include <boost/algorithm/cxx11/all_of.hpp>
-#include <seastar/core/reactor.hh>
+#include <seastar/core/seastar.hh>

 #include "auth/authenticated_user.hh"
 #include "auth/common.hh"
@@ -230,7 +230,7 @@ future<authenticated_user> password_authenticator::authenticate(
    // obsolete prepared statements pretty quickly.
    // Rely on query processing caching statements instead, and lets assume
    // that a map lookup string->statement is not gonna kill us much.
-    return futurize_apply([this, username, password] {
+    return futurize_invoke([this, username, password] {
        static const sstring query = format("SELECT {} FROM {} WHERE {} = ?",
                SALTED_HASH,
                meta::roles_table::qualified_name(),
--- a/auth/role_manager.hh
+++ b/auth/role_manager.hh
@@ -33,6 +33,7 @@

 #include "auth/resource.hh"
 #include "seastarx.hh"
+#include "exceptions/exceptions.hh"

 namespace auth {

@@ -52,9 +53,9 @@ struct role_config_update final {
 ///
 /// A logical argument error for a role-management operation.
 ///
-class roles_argument_exception : public std::invalid_argument {
+class roles_argument_exception : public exceptions::invalid_request_exception {
 public:
-    using std::invalid_argument::invalid_argument;
+    using exceptions::invalid_request_exception::invalid_request_exception;
 };

 class role_already_exists : public roles_argument_exception {
--- a/auth/service.cc
+++ b/auth/service.cc
@@ -419,7 +419,7 @@ future<> create_role(
            return make_ready_future<>();
        }

-        return futurize_apply(
+        return futurize_invoke(
                &validate_authentication_options_are_supported,
                options,
                ser.underlying_authenticator().supported_options()).then([&ser, name, &options] {
@@ -443,7 +443,7 @@ future<> alter_role(
            return make_ready_future<>();
        }

-        return futurize_apply(
+        return futurize_invoke(
                &validate_authentication_options_are_supported,
                options,
                ser.underlying_authenticator().supported_options()).then([&ser, name, &options] {
--- a/auth/transitional.cc
+++ b/auth/transitional.cc
@@ -158,7 +158,7 @@ public:
            }

            virtual future<authenticated_user> get_authenticated_user() const {
-                return futurize_apply([this] {
+                return futurize_invoke([this] {
                    return _sasl->get_authenticated_user().handle_exception([](auto ep) {
                        try {
                            std::rethrow_exception(ep);
--- a/cache_flat_mutation_reader.hh
+++ b/cache_flat_mutation_reader.hh
@@ -176,7 +176,7 @@ public:
        return make_ready_future<>();
    }
    virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override {
-        throw std::bad_function_call();
+        return make_exception_future<>(make_backtraced_exception_ptr<std::bad_function_call>());
    }
 };

--- a/cdc/cdc_partitioner.cc
+++ b/cdc/cdc_partitioner.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2020 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cdc_partitioner.hh"
+#include "dht/token.hh"
+#include "schema.hh"
+#include "sstables/key.hh"
+#include "utils/class_registrator.hh"
+#include "cdc/generation.hh"
+#include "keys.hh"
+
+static const sstring cdc_partitioner_name = "com.scylladb.dht.CDCPartitioner";
+
+namespace cdc {
+
+const sstring cdc_partitioner::name() const {
+    return cdc_partitioner_name;
+}
+
+static dht::token to_token(int64_t value) {
+    return dht::token(dht::token::kind::key, value);
+}
+
+static dht::token to_token(bytes_view key) {
+    // Key should be 16 B long, of which first 8 B are used for token calculation
+    if (key.size() != 2*sizeof(int64_t)) {
+        return dht::minimum_token();
+    }
+    return to_token(stream_id::token_from_bytes(key));
+}
+
+dht::token
+cdc_partitioner::get_token(const sstables::key_view& key) const {
+    return to_token(bytes_view(key));
+}
+
+dht::token
+cdc_partitioner::get_token(const schema& s, partition_key_view key) const {
+    auto exploded_key = key.explode(s);
+    return to_token(exploded_key[0]);
+}
+
+using registry = class_registrator<dht::i_partitioner, cdc_partitioner>;
+static registry registrator(cdc_partitioner_name);
+static registry registrator_short_name("CDCPartitioner");
+
+}
--- a/cdc/cdc_partitioner.hh
+++ b/cdc/cdc_partitioner.hh
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2020 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <seastar/core/sstring.hh>
+
+#include "bytes.hh"
+#include "dht/i_partitioner.hh"
+
+class schema;
+class partition_key_view;
+
+namespace sstables {
+
+class key_view;
+
+}
+
+namespace cdc {
+
+struct cdc_partitioner final : public dht::i_partitioner {
+    cdc_partitioner() = default;
+    virtual const sstring name() const override;
+    virtual dht::token get_token(const schema& s, partition_key_view key) const override;
+    virtual dht::token get_token(const sstables::key_view& key) const override;
+};
+
+
+}
--- a/cdc/generation.cc
+++ b/cdc/generation.cc
@@ -80,7 +80,7 @@ bool stream_id::operator<(const stream_id& o) const {
    return _value < o._value;
 }

-static int64_t bytes_to_int64(const bytes& b, size_t offset) {
+static int64_t bytes_to_int64(bytes_view b, size_t offset) {
    assert(b.size() >= offset + sizeof(int64_t));
    int64_t res;
    std::copy_n(b.begin() + offset, sizeof(int64_t), reinterpret_cast<int8_t *>(&res));
@@ -88,13 +88,17 @@ static int64_t bytes_to_int64(const bytes& b, size_t offset) {
 }

 int64_t stream_id::first() const {
-    return bytes_to_int64(_value, 0);
+    return token_from_bytes(_value);
 }

 int64_t stream_id::second() const {
    return bytes_to_int64(_value, sizeof(int64_t));
 }

+int64_t stream_id::token_from_bytes(bytes_view b) {
+    return bytes_to_int64(b, 0);
+}
+
 const bytes& stream_id::to_bytes() const {
    return _value;
 }
@@ -119,176 +123,110 @@ const std::vector<token_range_description>& topology_description::entries() cons
    return _entries;
 }

-static stream_id make_random_stream_id() {
+static stream_id create_stream_id(dht::token t) {
    static thread_local std::mt19937_64 rand_gen(std::random_device().operator()());
    static thread_local std::uniform_int_distribution<int64_t> rand_dist(std::numeric_limits<int64_t>::min());

-    return {rand_dist(rand_gen), rand_dist(rand_gen)};
+    return {dht::token::to_int64(t), rand_dist(rand_gen)};
 }

-/* Given:
- * 1. a set of tokens which split the token ring into token ranges (vnodes),
- * 2. information on how each token range is distributed among its owning node's shards
- * this function tries to generate a set of CDC stream identifiers such that for each
- * shard and vnode pair there exists a stream whose token falls into this
- * vnode and is owned by this shard.
- *
- * It then builds a cdc::topology_description which maps tokens to these
- * found stream identifiers, such that if token T is owned by shard S in vnode V,
- * it gets mapped to the stream identifier generated for (S, V).
- */
-// Run in seastar::async context.
-topology_description generate_topology_description(
-        const db::config& cfg,
-        const std::unordered_set<dht::token>& bootstrap_tokens,
-        const locator::token_metadata& token_metadata,
-        const gms::gossiper& gossiper) {
-    if (bootstrap_tokens.empty()) {
-        throw std::runtime_error(
-                "cdc: bootstrap tokens is empty in generate_topology_description");
+class topology_description_generator final {
+    const db::config& _cfg;
+    const std::unordered_set<dht::token>& _bootstrap_tokens;
+    const locator::token_metadata& _token_metadata;
+    const gms::gossiper& _gossiper;
+
+    // Compute a set of tokens that split the token ring into vnodes
+    auto get_tokens() const {
+        auto tokens = _token_metadata.sorted_tokens();
+        auto it = tokens.insert(
+                tokens.end(), _bootstrap_tokens.begin(), _bootstrap_tokens.end());
+        std::sort(it, tokens.end());
+        std::inplace_merge(tokens.begin(), it, tokens.end());
+        tokens.erase(std::unique(tokens.begin(), tokens.end()), tokens.end());
+        return tokens;
    }

-    auto tokens = token_metadata.sorted_tokens();
-    tokens.insert(tokens.end(), bootstrap_tokens.begin(), bootstrap_tokens.end());
-    std::sort(tokens.begin(), tokens.end());
-    tokens.erase(std::unique(tokens.begin(), tokens.end()), tokens.end());
-
-    std::vector<token_range_description> entries(tokens.size());
-    int spots_to_fill = 0;
-
-    for (size_t i = 0; i < tokens.size(); ++i) {
-        auto& entry = entries[i];
-        entry.token_range_end = tokens[i];
-
-        if (bootstrap_tokens.count(entry.token_range_end) > 0) {
-            entry.streams.resize(smp::count);
-            entry.sharding_ignore_msb = cfg.murmur3_partitioner_ignore_msb_bits();
+    // Fetch sharding parameters for a node that owns vnode ending with this.end
+    // Returns <shard_count, ignore_msb> pair.
+    std::pair<size_t, uint8_t> get_sharding_info(dht::token end) const {
+        if (_bootstrap_tokens.count(end) > 0) {
+            return {smp::count, _cfg.murmur3_partitioner_ignore_msb_bits()};
        } else {
-            auto endpoint = token_metadata.get_endpoint(entry.token_range_end);
+            auto endpoint = _token_metadata.get_endpoint(end);
            if (!endpoint) {
-                throw std::runtime_error(format("Can't find endpoint for token {}", entry.token_range_end));
-            }
-            auto sc = get_shard_count(*endpoint, gossiper);
-            entry.streams.resize(sc > 0 ? sc : 1);
-            entry.sharding_ignore_msb = get_sharding_ignore_msb(*endpoint, gossiper);
-        }
-
-        spots_to_fill += entry.streams.size();
-    }
-
-    auto schema = schema_builder("fake_ks", "fake_table")
-        .with_column("stream_id", bytes_type, column_kind::partition_key)
-        .build();
-
-    auto quota = std::chrono::seconds(spots_to_fill / 2000 + 1);
-    auto start_time = std::chrono::system_clock::now();
-
-    // For each pair (i, j), 0 <= i < streams.size(), 0 <= j < streams[i].size(),
-    // try to find a stream (stream[i][j]) such that the token of this stream will get mapped to this stream
-    // (refer to the comments above topology_description's definition to understand how it describes the mapping).
-    // We find the streams by randomly generating them and checking into which pairs they get mapped.
-    // NOTE: this algorithm is temporary and will be replaced after per-table-partitioner feature gets merged in.
-    repeat([&] {
-        for (int i = 0; i < 500; ++i) {
-            auto stream_id = make_random_stream_id();
-            auto token = dht::get_token(*schema, stream_id.to_partition_key(*schema));
-
-            // Find the token range into which our stream_id's token landed.
-            auto it = std::lower_bound(tokens.begin(), tokens.end(), token);
-            auto& entry = entries[it != tokens.end() ? std::distance(tokens.begin(), it) : 0];
-
-            auto shard_id = dht::shard_of(entry.streams.size(), entry.sharding_ignore_msb, token);
-            assert(shard_id < entry.streams.size());
-
-            if (!entry.streams[shard_id].is_set()) {
-                --spots_to_fill;
-                entry.streams[shard_id] = stream_id;
-            }
-        }
-
-        if (!spots_to_fill) {
-            return stop_iteration::yes;
-        }
-
-        auto now = std::chrono::system_clock::now();
-        auto passed = std::chrono::duration_cast<std::chrono::seconds>(now - start_time);
-        if (passed > quota) {
-            return stop_iteration::yes;
-        }
-
-        return stop_iteration::no;
-    }).get();
-
-    if (spots_to_fill) {
-        // We were not able to generate stream ids for each (token range, shard) pair.
-
-        // For each range that has a stream, for each shard for this range that doesn't have a stream,
-        // use the stream id of the next shard for this range.
-
-        // For each range that doesn't have any stream,
-        // use streams of the first range to the left which does have a stream.
-
-        cdc_log.warn("Generation of CDC streams failed to create streams for some (vnode, shard) pair."
-                     " This can lead to worse performance.");
-
-        stream_id some_stream;
-        size_t idx = 0;
-        for (; idx < entries.size(); ++idx) {
-            for (auto s: entries[idx].streams) {
-                if (s.is_set()) {
-                    some_stream = s;
-                    break;
-                }
-            }
-            if (some_stream.is_set()) {
-                break;
-            }
-        }
-
-        assert(idx != entries.size() && some_stream.is_set());
-
-        // Iterate over all ranges in the clockwise direction, starting with the one we found a stream for.
-        for (size_t off = 0; off < entries.size(); ++off) {
-            auto& ss = entries[(idx + off) % entries.size()].streams;
-
-            int last_set_stream_idx = ss.size() - 1;
-            while (last_set_stream_idx > -1 && !ss[last_set_stream_idx].is_set()) {
-                --last_set_stream_idx;
-            }
-
-            if (last_set_stream_idx == -1) {
-                cdc_log.warn(
-                        "CDC wasn't able to generate any stream for vnode ({}, {}]. We'll use another vnode's streams"
-                        " instead. This might lead to inconsistencies.",
-                        tokens[(idx + off + entries.size() - 1) % entries.size()], tokens[(idx + off) % entries.size()]);
-
-                ss[0] = some_stream;
-                last_set_stream_idx = 0;
-            }
-
-            some_stream = ss[last_set_stream_idx];
-
-            // Replace 'unset' stream ids with indexes below last_set_stream_idx
-            for (int s_idx = last_set_stream_idx - 1; s_idx > -1; --s_idx) {
-                if (ss[s_idx].is_set()) {
-                    some_stream = ss[s_idx];
-                } else {
-                    ss[s_idx] = some_stream;
-                }
-            }
-            // Replace 'unset' stream ids with indexes above last_set_stream_idx
-            for (int s_idx = ss.size() - 1; s_idx > last_set_stream_idx; --s_idx) {
-                if (ss[s_idx].is_set()) {
-                    some_stream = ss[s_idx];
-                } else {
-                    ss[s_idx] = some_stream;
-                }
+                throw std::runtime_error(
+                        format("Can't find endpoint for token {}", end));
            }
+            auto sc = get_shard_count(*endpoint, _gossiper);
+            return {sc > 0 ? sc : 1, get_sharding_ignore_msb(*endpoint, _gossiper)};
        }
    }

-    return {std::move(entries)};
-}
+    token_range_description create_description(dht::token start, dht::token end) const {
+        token_range_description desc;
+
+        desc.token_range_end = end;
+
+        auto [shard_count, ignore_msb] = get_sharding_info(end);
+        desc.streams.reserve(shard_count);
+        desc.sharding_ignore_msb = ignore_msb;
+
+        dht::sharder sharder(shard_count, ignore_msb);
+        for (size_t shard_idx = 0; shard_idx < shard_count; ++shard_idx) {
+            auto t = dht::find_first_token_for_shard(sharder, start, end, shard_idx);
+            desc.streams.push_back(create_stream_id(t));
+        }
+
+        return desc;
+    }
+public:
+    topology_description_generator(
+            const db::config& cfg,
+            const std::unordered_set<dht::token>& bootstrap_tokens,
+            const locator::token_metadata& token_metadata,
+            const gms::gossiper& gossiper)
+        : _cfg(cfg)
+        , _bootstrap_tokens(bootstrap_tokens)
+        , _token_metadata(token_metadata)
+        , _gossiper(gossiper)
+    {
+        if (_bootstrap_tokens.empty()) {
+            throw std::runtime_error(
+                    "cdc: bootstrap tokens is empty in generate_topology_description");
+        }
+    }
+
+    /*
+     * Generate a set of CDC stream identifiers such that for each shard
+     * and vnode pair there exists a stream whose token falls into this vnode
+     * and is owned by this shard. It is sometimes not possible to generate
+     * a CDC stream identifier for some (vnode, shard) pair because not all
+     * shards have to own tokens in a vnode. Small vnode can be totally owned
+     * by a single shard. In such case, a stream identifier that maps to
+     * end of the vnode is generated.
+     *
+     * Then build a cdc::topology_description which maps tokens to generated
+     * stream identifiers, such that if token T is owned by shard S in vnode V,
+     * it gets mapped to the stream identifier generated for (S, V).
+     */
+    // Run in seastar::async context.
+    topology_description generate() const {
+        const auto tokens = get_tokens();
+
+        std::vector<token_range_description> vnode_descriptions;
+        vnode_descriptions.reserve(tokens.size());
+
+        vnode_descriptions.push_back(
+                create_description(tokens.back(), tokens.front()));
+        for (size_t idx = 1; idx < tokens.size(); ++idx) {
+            vnode_descriptions.push_back(
+                    create_description(tokens[idx - 1], tokens[idx]));
+        }
+
+        return {std::move(vnode_descriptions)};
+    }
+};

 bool should_propose_first_generation(const gms::inet_address& me, const gms::gossiper& g) {
    auto my_host_id = g.get_host_id(me);
@@ -321,7 +259,7 @@ db_clock::time_point make_new_cdc_generation(
        bool for_testing) {
    assert(!bootstrap_tokens.empty());

-    auto gen = generate_topology_description(cfg, bootstrap_tokens, tm, g);
+    auto gen = topology_description_generator(cfg, bootstrap_tokens, tm, g).generate();

    // Begin the race.
    auto ts = db_clock::now() + (
@@ -335,12 +273,7 @@ db_clock::time_point make_new_cdc_generation(
 std::optional<db_clock::time_point> get_streams_timestamp_for(const gms::inet_address& endpoint, const gms::gossiper& g) {
    auto streams_ts_string = g.get_application_state_value(endpoint, gms::application_state::CDC_STREAMS_TIMESTAMP);
    cdc_log.trace("endpoint={}, streams_ts_string={}", endpoint, streams_ts_string);
-
-    if (streams_ts_string.empty()) {
-        return {};
-    }
-
-    return db_clock::time_point(db_clock::duration(std::stoll(streams_ts_string)));
+    return gms::versioned_value::cdc_streams_timestamp_from_string(streams_ts_string);
 }

 // Run inside seastar::async context.
--- a/cdc/generation.hh
+++ b/cdc/generation.hh
@@ -77,6 +77,7 @@ public:
    const bytes& to_bytes() const;

    partition_key to_partition_key(const schema& log_schema) const;
+    static int64_t token_from_bytes(bytes_view);
 };

 /* Describes a mapping of tokens to CDC streams in a token range.
@@ -129,7 +130,7 @@ bool should_propose_first_generation(const gms::inet_address& me, const gms::gos
 */
 future<db_clock::time_point> get_local_streams_timestamp();

-/* Generate a new set of CDC streams and insert it into the distributed cdc_topology_description table.
+/* Generate a new set of CDC streams and insert it into the distributed cdc_generations table.
 * Returns the timestamp of this new generation.
 *
 * Should be called when starting the node for the first time (i.e., joining the ring).
@@ -158,9 +159,9 @@ db_clock::time_point make_new_cdc_generation(
 std::optional<db_clock::time_point> get_streams_timestamp_for(const gms::inet_address& endpoint, const gms::gossiper&);

 /* Inform CDC users about a generation of streams (identified by the given timestamp)
- * by inserting it into the cdc_description table.
+ * by inserting it into the cdc_streams table.
 *
- * Assumes that the cdc_topology_description table contains this generation.
+ * Assumes that the cdc_generations table contains this generation.
 *
 * Returning from this function does not mean that the table update was successful: the function
 * might run an asynchronous task in the background.
--- a/cdc/log.cc
+++ b/cdc/log.cc
@@ -239,7 +239,8 @@ public:
    future<std::tuple<std::vector<mutation>, lw_shared_ptr<cdc::operation_result_tracker>>> augment_mutation_call(
        lowres_clock::time_point timeout,
        std::vector<mutation>&& mutations,
-        tracing::trace_state_ptr tr_state
+        tracing::trace_state_ptr tr_state,
+        db::consistency_level write_cl
    );

    template<typename Iter>
@@ -390,6 +391,7 @@ bytes log_data_column_deleted_elements_name_bytes(const bytes& column_name) {

 static schema_ptr create_log_schema(const schema& s, std::optional<utils::UUID> uuid) {
    schema_builder b(s.ks_name(), log_name(s.cf_name()));
+    b.with_partitioner("com.scylladb.dht.CDCPartitioner");
    b.set_comment(sprint("CDC log for %s.%s", s.ks_name(), s.cf_name()));
    b.with_column(log_meta_column_name_bytes("stream_id"), bytes_type, column_kind::partition_key);
    b.with_column(log_meta_column_name_bytes("time"), timeuuid_type, column_kind::clustering_key);
@@ -399,9 +401,9 @@ static schema_ptr create_log_schema(const schema& s, std::optional<utils::UUID>
    auto add_columns = [&] (const schema::const_iterator_range_type& columns, bool is_data_col = false) {
        for (const auto& column : columns) {
            auto type = column.type;
-            if (is_data_col) {
+            if (is_data_col && type->is_multi_cell()) {
                type = visit(*type, make_visitor(
-                    // lists are represented as map<timeuuid, value_type>. Otherwise we cannot express delta
+                    // non-frozen lists are represented as map<timeuuid, value_type>. Otherwise we cannot express delta
                    [] (const list_type_impl& type) -> data_type {
                        return map_type_impl::get_instance(type.name_comparator(), type.value_comparator(), false);
                    },
@@ -410,7 +412,6 @@ static schema_ptr create_log_schema(const schema& s, std::optional<utils::UUID>
                        return type.freeze();
                    }
                ));
-                type = type->freeze();
            }
            b.with_column(log_data_column_name_bytes(column.name()), type);
            if (is_data_col) {
@@ -715,6 +716,19 @@ private:
    const column_definition& _op_col;
    const column_definition& _ttl_col;
    ttl_opt _cdc_ttl_opt;
+    /**
+     * #6070
+     * When mutation splitting was added, non-atomic column assignments were broken
+     * into two invocation of transform. This means the second (actual data assignment)
+     * does not know about the tombstone in first one -> postimage is created as if 
+     * we were _adding_ to the collection, not replacing it. 
+     * 
+     * Not pretty, but to handle this we use the knowledge that we always get 
+     * invoked in timestamp order -> tombstone first, then assign.
+     * So we simply keep track of non-atomic columns deleted across calls 
+     * and filter out preimage data post this.
+     */
+    std::unordered_set<const column_definition*> _non_atomic_column_deletes;

    clustering_key set_pk_columns(const partition_key& pk, api::timestamp_type ts, bytes decomposed_tuuid, int batch_no, mutation& m) const {
        const auto log_ck = clustering_key::from_exploded(
@@ -816,18 +830,18 @@ public:

    // TODO: is pre-image data based on query enough. We only have actual column data. Do we need
    // more details like tombstones/ttl? Probably not but keep in mind.
-    std::tuple<mutation, stats::part_type_set> transform(const mutation& m, const cql3::untyped_result_set* rs, api::timestamp_type ts, bytes tuuid, int& batch_no) const {
+    std::tuple<mutation, stats::part_type_set> transform(const mutation& m, const cql3::untyped_result_set* rs, api::timestamp_type ts, bytes tuuid, int& batch_no) {
        auto stream_id = _ctx._cdc_metadata.get_stream(ts, m.token());
        mutation res(_log_schema, stream_id.to_partition_key(*_log_schema));
+        const auto preimage = _schema->cdc_options().preimage();
        const auto postimage = _schema->cdc_options().postimage();
        stats::part_type_set touched_parts;
        auto& p = m.partition();
        if (p.partition_tombstone()) {
            // Partition deletion
            touched_parts.set<stats::part_type::PARTITION_DELETE>();
-            auto log_ck = set_pk_columns(m.key(), ts, tuuid, 0, res);
+            auto log_ck = set_pk_columns(m.key(), ts, tuuid, batch_no++, res);
            set_operation(log_ck, ts, operation::partition_delete, res);
-            ++batch_no;
        } else if (!p.row_tombstones().empty()) {
            // range deletion
            touched_parts.set<stats::part_type::RANGE_TOMBSTONE>();
@@ -849,37 +863,30 @@ public:
                    }
                };
                {
-                    auto log_ck = set_pk_columns(m.key(), ts, tuuid, batch_no, res);
+                    auto log_ck = set_pk_columns(m.key(), ts, tuuid, batch_no++, res);
                    set_bound(log_ck, rt.start);
                    const auto start_operation = rt.start_kind == bound_kind::incl_start
                            ? operation::range_delete_start_inclusive
                            : operation::range_delete_start_exclusive;
                    set_operation(log_ck, ts, start_operation, res);
-                    ++batch_no;
                }
                {
-                    auto log_ck = set_pk_columns(m.key(), ts, tuuid, batch_no, res);
+                    auto log_ck = set_pk_columns(m.key(), ts, tuuid, batch_no++, res);
                    set_bound(log_ck, rt.end);
                    const auto end_operation = rt.end_kind == bound_kind::incl_end
                            ? operation::range_delete_end_inclusive
                            : operation::range_delete_end_exclusive;
                    set_operation(log_ck, ts, end_operation, res);
-                    ++batch_no;
                }
            }
        } else {
            // should be insert, update or deletion
            auto process_cells = [&](const row& r, column_kind ckind, const clustering_key& log_ck, std::optional<clustering_key> pikey, const cql3::untyped_result_set_row* pirow, std::optional<clustering_key> poikey) -> std::optional<gc_clock::duration> {
-                if (postimage && !poikey) {
-                    poikey = set_pk_columns(m.key(), ts, tuuid, ++batch_no, res);
-                    set_operation(*poikey, ts, operation::post_image, res);
-                }
                std::optional<gc_clock::duration> ttl;
                std::unordered_set<column_id> columns_assigned;
                r.for_each_cell([&](column_id id, const atomic_cell_or_collection& cell) {
                    auto& cdef = _schema->column_at(ckind, id);
                    auto* dst = _log_schema->get_column_definition(log_data_column_name_bytes(cdef.name()));
-                    auto has_pirow = pirow && pirow->has(cdef.name_as_text());
                    bool is_column_delete = true;
                    bytes_opt value;
                    bytes_opt deleted_elements = std::nullopt;
@@ -1000,29 +1007,30 @@ public:
                        }
                    }

-                    if (is_column_delete) {
-                        res.set_cell(log_ck, log_data_column_deleted_name_bytes(cdef.name()), data_value(true), ts, _cdc_ttl_opt);
-                    }
-                    if (value) {
-                        res.set_cell(log_ck, *dst, atomic_cell::make_live(*dst->type, ts, *value, _cdc_ttl_opt));
-                    }
+                    bytes_opt prev = get_preimage_col_value(cdef, pirow);

-                    bytes_opt prev;
-
-                    if (has_pirow) {
-                        prev = get_preimage_col_value(cdef, pirow);
+                    if (prev && pikey) {
                        assert(std::addressof(res.partition().clustered_row(*_log_schema, *pikey)) != std::addressof(res.partition().clustered_row(*_log_schema, log_ck)));
                        assert(pikey->explode() != log_ck.explode());
                        res.set_cell(*pikey, *dst, atomic_cell::make_live(*dst->type, ts, *prev, _cdc_ttl_opt));
                    }

-                    if (postimage) {
+                    if (is_column_delete) {
+                        res.set_cell(log_ck, log_data_column_deleted_name_bytes(cdef.name()), data_value(true), ts, _cdc_ttl_opt);
+                        if (!cdef.is_atomic()) {
+                            _non_atomic_column_deletes.insert(&cdef);
+                        }
+                        // don't merge with pre-image iff column delete
+                        prev = std::nullopt;
+                    }
+
+                    if (value) {
+                        res.set_cell(log_ck, *dst, atomic_cell::make_live(*dst->type, ts, *value, _cdc_ttl_opt));
+                    }
+
+                    if (poikey) {
                        // keep track of actually assigning this already
                        columns_assigned.emplace(id);
-                        // don't merge with pre-image iff column delete
-                        if (is_column_delete) {
-                            prev = std::nullopt;
-                        }
                        if (cdef.is_atomic() && !is_column_delete && value) {
                            res.set_cell(*poikey, *dst, atomic_cell::make_live(*dst->type, ts, *value, _cdc_ttl_opt));
                        } else if (!cdef.is_atomic() && (value || (deleted_elements && prev))) {
@@ -1035,10 +1043,10 @@ public:
                });

                // fill in all columns not already processed. Note that column nulls are also marked.
-                if (postimage && pirow) {
+                if (poikey && pirow) {
                    for (auto& cdef : _schema->columns(ckind)) {
                        if (!columns_assigned.count(cdef.id)) {
-                            auto v = pirow->get_view_opt(cdef.name_as_text());
+                            auto v = get_preimage_col_value(cdef, pirow);
                            if (v) {
                                auto dst = _log_schema->get_column_definition(log_data_column_name_bytes(cdef.name()));
                                res.set_cell(*poikey, *dst, atomic_cell::make_live(*dst->type, ts, *v, _cdc_ttl_opt));
@@ -1057,16 +1065,18 @@ public:

                if (rs && !rs->empty()) {
                    // For static rows, only one row from the result set is needed
-                    pikey = set_pk_columns(m.key(), ts, tuuid, batch_no, res);
-                    set_operation(*pikey, ts, operation::pre_image, res);
                    pirow = &rs->front();
-                    ++batch_no;
                }

-                auto log_ck = set_pk_columns(m.key(), ts, tuuid, batch_no, res);
+                if (preimage && pirow) {
+                    pikey = set_pk_columns(m.key(), ts, tuuid, batch_no++, res);
+                    set_operation(*pikey, ts, operation::pre_image, res);
+                }
+
+                auto log_ck = set_pk_columns(m.key(), ts, tuuid, batch_no++, res);

                if (postimage) {
-                     poikey = set_pk_columns(m.key(), ts, tuuid, ++batch_no, res);
+                     poikey = set_pk_columns(m.key(), ts, tuuid, batch_no++, res);
                     set_operation(*poikey, ts, operation::post_image, res);
                }

@@ -1077,7 +1087,6 @@ public:
                if (ttl) {
                    set_ttl(log_ck, ts, *ttl, res);
                }
-                ++batch_no;
            } else {
                touched_parts.set_if<stats::part_type::CLUSTERING_ROW>(!p.clustered_rows().empty());
                for (const rows_entry& r : p.clustered_rows()) {
@@ -1098,19 +1107,21 @@ public:
                                }
                            }
                            if (match) {
-                                pikey = set_pk_columns(m.key(), ts, tuuid, batch_no, res);
-                                set_operation(*pikey, ts, operation::pre_image, res);
                                pirow = &utr;
-                                ++batch_no;
                                break;
                            }
                        }
                    }

-                    auto log_ck = set_pk_columns(m.key(), ts, tuuid, batch_no, res);
+                    if (preimage && pirow) {
+                        pikey = set_pk_columns(m.key(), ts, tuuid, batch_no++, res);
+                        set_operation(*pikey, ts, operation::pre_image, res);
+                    }
+
+                    auto log_ck = set_pk_columns(m.key(), ts, tuuid, batch_no++, res);

                    if (postimage) {
-                        poikey = set_pk_columns(m.key(), ts, tuuid, ++batch_no, res);
+                        poikey = set_pk_columns(m.key(), ts, tuuid, batch_no++, res);
                        set_operation(*poikey, ts, operation::post_image, res);
                    }

@@ -1120,7 +1131,7 @@ public:
                        auto cdef = _log_schema->get_column_definition(log_data_column_name_bytes(column.name()));
                        res.set_cell(log_ck, *cdef, atomic_cell::make_live(*column.type, ts, bytes_view(ck_value[pos]), _cdc_ttl_opt));

-                        if (pirow) {
+                        if (pikey) {
                            assert(pirow->has(column.name_as_text()));
                            res.set_cell(*pikey, *cdef, atomic_cell::make_live(*column.type, ts, bytes_view(ck_value[pos]), _cdc_ttl_opt));
                        }
@@ -1135,12 +1146,12 @@ public:
                    if (r.row().deleted_at()) {
                        touched_parts.set<stats::part_type::ROW_DELETE>();
                        cdc_op = operation::row_delete;
-                        if (pirow) {
+                        if (pirow && pikey) {
                            for (const column_definition& column: _schema->regular_columns()) {
                                assert(pirow->has(column.name_as_text()));
                                auto& cdef = *_log_schema->get_column_definition(log_data_column_name_bytes(column.name()));
-                                auto value = get_preimage_col_value(column, pirow);
-                                res.set_cell(*pikey, cdef, atomic_cell::make_live(*column.type, ts, bytes_view(value), _cdc_ttl_opt));
+                                auto value = get_preimage_col_value(column, pirow);                                
+                                res.set_cell(*pikey, cdef, atomic_cell::make_live(*column.type, ts, bytes_view(*value), _cdc_ttl_opt));
                            }
                        }
                    } else {
@@ -1157,7 +1168,6 @@ public:
                        }
                    }
                    set_operation(log_ck, ts, cdc_op, res);
-                    ++batch_no;
                }
            }
        }
@@ -1165,7 +1175,13 @@ public:
        return std::make_tuple(std::move(res), touched_parts);
    }

-    static bytes get_preimage_col_value(const column_definition& cdef, const cql3::untyped_result_set_row *pirow) {
+    bytes_opt get_preimage_col_value(const column_definition& cdef, const cql3::untyped_result_set_row *pirow) {
+        /**
+         * #6070 - see comment for _non_atomic_column_deletes
+         */
+        if (!pirow || !pirow->has(cdef.name_as_text()) || _non_atomic_column_deletes.count(&cdef)) {
+            return std::nullopt;
+        }
        return cdef.is_atomic()
            ? pirow->get_blob(cdef.name_as_text())
            : visit(*cdef.type, make_visitor(
@@ -1194,7 +1210,7 @@ public:

    future<lw_shared_ptr<cql3::untyped_result_set>> pre_image_select(
            service::client_state& client_state,
-            db::consistency_level cl,
+            db::consistency_level write_cl,
            const mutation& m)
    {
        auto& p = m.partition();
@@ -1275,7 +1291,10 @@ public:
        auto partition_slice = query::partition_slice(std::move(bounds), std::move(static_columns), std::move(regular_columns), std::move(opts));
        auto command = ::make_lw_shared<query::read_command>(_schema->id(), _schema->version(), partition_slice, row_limit);

-        return _ctx._proxy.query(_schema, std::move(command), std::move(partition_ranges), cl, service::storage_proxy::coordinator_query_options(default_timeout(), empty_service_permit(), client_state)).then(
+        const auto select_cl = adjust_cl(write_cl);
+
+      try {
+        return _ctx._proxy.query(_schema, std::move(command), std::move(partition_ranges), select_cl, service::storage_proxy::coordinator_query_options(default_timeout(), empty_service_permit(), client_state)).then(
                [s = _schema, partition_slice = std::move(partition_slice), selection = std::move(selection)] (service::storage_proxy::coordinator_query_result qr) -> lw_shared_ptr<cql3::untyped_result_set> {
                    cql3::selection::result_set_builder builder(*selection, gc_clock::now(), cql_serialization_format::latest());
                    query::result_view::consume(*qr.query_result, partition_slice, cql3::selection::result_set_builder::visitor(builder, *s, *selection));
@@ -1285,6 +1304,25 @@ public:
                    }
                    return make_lw_shared<cql3::untyped_result_set>(*result_set);
        });
+      } catch (exceptions::unavailable_exception& e) {
+        // `query` can throw `unavailable_exception`, which is seen by clients as ~ "NoHostAvailable". 
+        // So, we'll translate it to a `read_failure_exception` with custom message.
+        cdc_log.debug("Preimage: translating a (read) `unavailable_exception` to `request_execution_exception` - {}", e);
+        throw exceptions::read_failure_exception("CDC preimage query could not achieve the CL.",
+                e.consistency, e.alive, 0, e.required, false);
+      }
+    }
+
+    /** For preimage query use the same CL as for base write, except for CLs ANY and ALL. */
+    static db::consistency_level adjust_cl(db::consistency_level write_cl) {
+        if (write_cl == db::consistency_level::ANY) {
+            return db::consistency_level::ONE;
+        } else if (write_cl == db::consistency_level::ALL || write_cl == db::consistency_level::SERIAL) {
+	        return db::consistency_level::QUORUM;
+        } else if (write_cl == db::consistency_level::LOCAL_SERIAL) {
+	        return db::consistency_level::LOCAL_QUORUM;
+        }
+        return write_cl;
    }
 };

@@ -1300,7 +1338,7 @@ transform_mutations(std::vector<mutation>& muts, decltype(muts.size()) batch_siz
 } // namespace cdc

 future<std::tuple<std::vector<mutation>, lw_shared_ptr<cdc::operation_result_tracker>>>
-cdc::cdc_service::impl::augment_mutation_call(lowres_clock::time_point timeout, std::vector<mutation>&& mutations, tracing::trace_state_ptr tr_state) {
+cdc::cdc_service::impl::augment_mutation_call(lowres_clock::time_point timeout, std::vector<mutation>&& mutations, tracing::trace_state_ptr tr_state, db::consistency_level write_cl) {
    // we do all this because in the case of batches, we can have mixed schemas.
    auto e = mutations.end();
    auto i = std::find_if(mutations.begin(), e, [](const mutation& m) {
@@ -1315,8 +1353,8 @@ cdc::cdc_service::impl::augment_mutation_call(lowres_clock::time_point timeout,
    mutations.reserve(2 * mutations.size());

    return do_with(std::move(mutations), service::query_state(service::client_state::for_internal_calls(), empty_service_permit()), operation_details{},
-            [this, timeout, i, tr_state = std::move(tr_state)] (std::vector<mutation>& mutations, service::query_state& qs, operation_details& details) {
-        return transform_mutations(mutations, 1, [this, &mutations, timeout, &qs, tr_state = tr_state, &details] (int idx) mutable {
+            [this, timeout, i, tr_state = std::move(tr_state), write_cl] (std::vector<mutation>& mutations, service::query_state& qs, operation_details& details) {
+        return transform_mutations(mutations, 1, [this, &mutations, timeout, &qs, tr_state = tr_state, &details, write_cl] (int idx) mutable {
            auto& m = mutations[idx];
            auto s = m.schema();

@@ -1332,7 +1370,7 @@ cdc::cdc_service::impl::augment_mutation_call(lowres_clock::time_point timeout,
                // iff a batch contains several modifications to the same table. Otoh, batch is rare(?)
                // so this is premature.
                tracing::trace(tr_state, "CDC: Selecting preimage for {}", m.decorated_key());
-                f = trans.pre_image_select(qs.get_client_state(), db::consistency_level::LOCAL_QUORUM, m).then_wrapped([this] (future<lw_shared_ptr<cql3::untyped_result_set>> f) {
+                f = trans.pre_image_select(qs.get_client_state(), write_cl, m).then_wrapped([this] (future<lw_shared_ptr<cql3::untyped_result_set>> f) {
                    auto& cdc_stats = _ctxt._proxy.get_cdc_stats();
                    cdc_stats.counters_total.preimage_selects++;
                    if (f.failed()) {
@@ -1344,7 +1382,7 @@ cdc::cdc_service::impl::augment_mutation_call(lowres_clock::time_point timeout,
                tracing::trace(tr_state, "CDC: Preimage not enabled for the table, not querying current value of {}", m.decorated_key());
            }

-            return f.then([trans = std::move(trans), &mutations, idx, tr_state = std::move(tr_state), &details] (lw_shared_ptr<cql3::untyped_result_set> rs) {
+            return f.then([trans = std::move(trans), &mutations, idx, tr_state = std::move(tr_state), &details] (lw_shared_ptr<cql3::untyped_result_set> rs) mutable {
                auto& m = mutations[idx];
                auto& s = m.schema();
                details.had_preimage |= s->cdc_options().preimage();
@@ -1389,6 +1427,6 @@ bool cdc::cdc_service::needs_cdc_augmentation(const std::vector<mutation>& mutat
 }

 future<std::tuple<std::vector<mutation>, lw_shared_ptr<cdc::operation_result_tracker>>>
-cdc::cdc_service::augment_mutation_call(lowres_clock::time_point timeout, std::vector<mutation>&& mutations, tracing::trace_state_ptr tr_state) {
-    return _impl->augment_mutation_call(timeout, std::move(mutations), std::move(tr_state));
+cdc::cdc_service::augment_mutation_call(lowres_clock::time_point timeout, std::vector<mutation>&& mutations, tracing::trace_state_ptr tr_state, db::consistency_level write_cl) {
+    return _impl->augment_mutation_call(timeout, std::move(mutations), std::move(tr_state), write_cl);
 }
--- a/cdc/log.hh
+++ b/cdc/log.hh
@@ -91,7 +91,8 @@ public:
    future<std::tuple<std::vector<mutation>, lw_shared_ptr<operation_result_tracker>>> augment_mutation_call(
        lowres_clock::time_point timeout,
        std::vector<mutation>&& mutations,
-        tracing::trace_state_ptr tr_state
+        tracing::trace_state_ptr tr_state,
+        db::consistency_level write_cl
        );
    bool needs_cdc_augmentation(const std::vector<mutation>&) const;
 };
--- a/checked-file-impl.hh
+++ b/checked-file-impl.hh
@@ -22,7 +22,7 @@
 #pragma once

 #include "seastar/core/file.hh"
-#include "seastar/core/reactor.hh"
+#include "seastar/core/seastar.hh"
 #include "utils/disk-error-handler.hh"

 #include "seastarx.hh"
@@ -147,7 +147,7 @@ inline open_checked_directory(const io_error_handler& error_handler,
                              sstring name)
 {
    return do_io_check(error_handler, [&] {
-        return engine().open_directory(name).then([&] (file f) {
+        return open_directory(name).then([&] (file f) {
            return make_ready_future<file>(make_checked_file(error_handler, f));
        });
    });
--- a/clocks-impl.cc
+++ b/clocks-impl.cc
@@ -30,10 +30,12 @@ std::atomic<int64_t> clocks_offset;

 std::ostream& operator<<(std::ostream& os, db_clock::time_point tp) {
    auto t = db_clock::to_time_t(tp);
-    return os << std::put_time(std::gmtime(&t), "%Y/%m/%d %T");
+    ::tm t_buf;
+    return os << std::put_time(::gmtime_r(&t, &t_buf), "%Y/%m/%d %T");
 }

 std::string format_timestamp(api::timestamp_type ts) {
    auto t = std::time_t(std::chrono::duration_cast<std::chrono::seconds>(api::timestamp_clock::duration(ts)).count());
-    return format("{}", std::put_time(std::gmtime(&t), "%Y/%m/%d %T"));
+    ::tm t_buf;
+    return format("{}", std::put_time(::gmtime_r(&t, &t_buf), "%Y/%m/%d %T"));
 }
--- a/compaction_strategy.hh
+++ b/compaction_strategy.hh
@@ -140,6 +140,9 @@ public:
    uint64_t adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate);

    reader_consumer make_interposer_consumer(const mutation_source_metadata& ms_meta, reader_consumer end_consumer);
+
+    // Returns whether or not interposer consumer is used by a given strategy.
+    bool use_interposer_consumer() const;
 };

 // Creates a compaction_strategy object from one of the strategies available.
--- a/compound_compat.hh
+++ b/compound_compat.hh
@@ -27,6 +27,9 @@
 #include "schema.hh"
 #include "sstables/version.hh"

+//FIXME: de-inline methods and define this as static in a .cc file.
+extern logging::logger compound_logger;
+
 //
 // This header provides adaptors between the representation used by our compound_type<>
 // and representation used by Origin.
@@ -337,8 +340,9 @@ public:
    class iterator : public std::iterator<std::input_iterator_tag, const component_view> {
        bytes_view _v;
        component_view _current;
+        bool _strict_mode = true;
    private:
-        void read_current() {
+        void do_read_current() {
            size_type len;
            {
                if (_v.empty()) {
@@ -354,11 +358,23 @@ public:
            _v.remove_prefix(len);
            _current = component_view(std::move(value), to_eoc(read_simple<eoc_type>(_v)));
        }
-    public:
+        void read_current() {
+            try {
+                do_read_current();
+            } catch (marshal_exception&) {
+                if (_strict_mode) {
+                    on_internal_error(compound_logger, std::current_exception());
+                } else {
+                    throw;
+                }
+            }
+        }
+
        struct end_iterator_tag {};

-        iterator(const bytes_view& v, bool is_compound, bool is_static)
-                : _v(v) {
+        // In strict-mode de-serialization errors will invoke `on_internal_error()`.
+        iterator(const bytes_view& v, bool is_compound, bool is_static, bool strict_mode = true)
+                : _v(v), _strict_mode(strict_mode) {
            if (is_static) {
                _v.remove_prefix(2);
            }
@@ -372,6 +388,7 @@ public:

        iterator(end_iterator_tag) : _v(nullptr, 0) {}

+    public:
        iterator& operator++() {
            read_current();
            return *this;
@@ -387,6 +404,9 @@ public:
        const value_type* operator->() const { return &_current; }
        bool operator!=(const iterator& i) const { return _v.begin() != i._v.begin(); }
        bool operator==(const iterator& i) const { return _v.begin() == i._v.begin(); }
+
+        friend class composite;
+        friend class composite_view;
    };

    iterator begin() const {
@@ -555,6 +575,21 @@ public:
        return composite::is_static(_bytes, _is_compound);
    }

+    bool is_valid() const {
+        try {
+            auto it = composite::iterator(_bytes, _is_compound, is_static(), false);
+            const auto end = composite::iterator(composite::iterator::end_iterator_tag());
+            size_t s = 0;
+            for (; it != end; ++it) {
+                auto& c = *it;
+                s += c.first.size() + sizeof(composite::size_type) + sizeof(composite::eoc_type);
+            }
+            return s == _bytes.size();
+        } catch (marshal_exception&) {
+            return false;
+        }
+    }
+
    explicit operator bytes_view() const {
        return _bytes;
    }
--- a/configure.py
+++ b/configure.py
@@ -253,11 +253,11 @@ modes = {
    },
    'release': {
        'cxxflags': '',
-        'cxx_ld_flags': '-O3 -Wstack-usage=%s' % (1024*29),
+        'cxx_ld_flags': '-O3 -Wstack-usage=%s' % (1024*13),
    },
    'dev': {
        'cxxflags': '-DSEASTAR_ENABLE_ALLOC_FAILURE_INJECTION -DSCYLLA_ENABLE_ERROR_INJECTION',
-        'cxx_ld_flags': '-O1 -Wstack-usage=%s' % (1024*29),
+        'cxx_ld_flags': '-O1 -Wstack-usage=%s' % (1024*21),
    },
    'sanitize': {
        'cxxflags': '-DDEBUG -DDEBUG_LSA_SANITIZER -DSCYLLA_ENABLE_ERROR_INJECTION',
@@ -381,6 +381,7 @@ scylla_tests = set([
    'test/boost/view_schema_ckey_test',
    'test/boost/vint_serialization_test',
    'test/boost/virtual_reader_test',
+    'test/boost/stall_free_test',
    'test/manual/ec2_snitch_test',
    'test/manual/gce_snitch_test',
    'test/manual/gossip',
@@ -418,6 +419,7 @@ perf_tests = set([
 apps = set([
    'scylla',
    'test/tools/cql_repl',
+    'tools/scylla_types',
 ])

 tests = scylla_tests | perf_tests
@@ -438,6 +440,7 @@ arg_parser.add_argument('--so', dest='so', action='store_true',
                        help='Build shared object (SO) instead of executable')
 arg_parser.add_argument('--mode', action='append', choices=list(modes.keys()), dest='selected_modes')
 arg_parser.add_argument('--with', dest='artifacts', action='append', choices=all_artifacts, default=[])
+arg_parser.add_argument('--with-seastar', action='store', dest='seastar_path', default='seastar', help='Path to Seastar sources')
 arg_parser.add_argument('--cflags', action='store', dest='user_cflags', default='',
                        help='Extra flags for the C++ compiler')
 arg_parser.add_argument('--ldflags', action='store', dest='user_ldflags', default='',
@@ -468,8 +471,6 @@ arg_parser.add_argument('--tests-debuginfo', action='store', dest='tests_debugin
                        help='Enable(1)/disable(0)compiler debug information generation for tests')
 arg_parser.add_argument('--python', action='store', dest='python', default='python3',
                        help='Python3 path')
-add_tristate(arg_parser, name='hwloc', dest='hwloc', help='hwloc support')
-add_tristate(arg_parser, name='xen', dest='xen', help='Xen support')
 arg_parser.add_argument('--split-dwarf', dest='split_dwarf', action='store_true', default=False,
                        help='use of split dwarf (https://gcc.gnu.org/wiki/DebugFission) to speed up linking')
 arg_parser.add_argument('--enable-gcc6-concepts', dest='gcc6_concepts', action='store_true', default=False,
@@ -540,6 +541,7 @@ scylla_core = (['database.cc',
                'sstables/compaction_strategy.cc',
                'sstables/size_tiered_compaction_strategy.cc',
                'sstables/leveled_compaction_strategy.cc',
+                'sstables/time_window_compaction_strategy.cc',
                'sstables/compaction_manager.cc',
                'sstables/integrity_checked_file_impl.cc',
                'sstables/prepended_input_stream.cc',
@@ -548,6 +550,7 @@ scylla_core = (['database.cc',
                'transport/event_notifier.cc',
                'transport/server.cc',
                'transport/messages/result_message.cc',
+                'cdc/cdc_partitioner.cc',
                'cdc/log.cc',
                'cdc/split.cc',
                'cdc/generation.cc',
@@ -786,6 +789,7 @@ scylla_core = (['database.cc',
                'utils/like_matcher.cc',
                'utils/error_injection.cc',
                'mutation_writer/timestamp_based_splitting_writer.cc',
+                'mutation_writer/shard_based_splitting_writer.cc',
                'lua.cc',
                ] + [Antlr3Grammar('cql3/Cql.g')] + [Thrift('interface/cassandra.thrift', 'Cassandra')]
               )
@@ -897,6 +901,7 @@ scylla_tests_dependencies = scylla_core + idls + scylla_tests_generic_dependenci
    'test/lib/cql_assertions.cc',
    'test/lib/result_set_assertions.cc',
    'test/lib/mutation_source_test.cc',
+    'test/lib/sstable_utils.cc',
    'test/lib/data_model.cc',
    'test/lib/exception_utils.cc',
    'test/lib/random_schema.cc',
@@ -905,6 +910,8 @@ scylla_tests_dependencies = scylla_core + idls + scylla_tests_generic_dependenci
 deps = {
    'scylla': idls + ['main.cc', 'release.cc', 'build_id.cc'] + scylla_core + api + alternator + redis,
    'test/tools/cql_repl': idls + ['test/tools/cql_repl.cc'] + scylla_core + scylla_tests_generic_dependencies,
+    #FIXME: we don't need all of scylla_core here, only the types module, need to modularize scylla_core.
+    'tools/scylla_types': idls + ['tools/scylla_types.cc'] + scylla_core,
 }

 pure_boost_tests = set([
@@ -952,11 +959,9 @@ tests_not_using_seastar_test_framework = set([
    'test/perf/perf_hash',
    'test/perf/perf_mutation',
    'test/perf/perf_row_cache_update',
-    'test/perf/perf_sstable',
    'test/unit/lsa_async_eviction_test',
    'test/unit/lsa_sync_eviction_test',
    'test/unit/row_cache_alloc_stress_test',
-    'test/unit/row_cache_stress_test',
    'test/manual/sstable_scan_footprint_test',
 ]) | pure_boost_tests

@@ -978,13 +983,10 @@ perf_tests_seastar_deps = [
 for t in perf_tests:
    deps[t] = [t + '.cc'] + scylla_tests_dependencies + perf_tests_seastar_deps

-deps['test/boost/sstable_test'] += ['test/lib/sstable_utils.cc', 'test/lib/normalizing_reader.cc']
-deps['test/boost/sstable_datafile_test'] += ['test/lib/sstable_utils.cc', 'test/lib/normalizing_reader.cc']
-deps['test/boost/sstable_resharding_test'] += ['test/lib/sstable_utils.cc' ]
-deps['test/boost/mutation_reader_test'] += ['test/lib/sstable_utils.cc', 'test/lib/dummy_partitioner.cc' ]
-deps['test/boost/multishard_combining_reader_as_mutation_source_test'] += ['test/lib/sstable_utils.cc', 'test/lib/dummy_partitioner.cc' ]
-deps['test/boost/sstable_mutation_test'] += ['test/lib/sstable_utils.cc']
-deps['test/boost/sstable_conforms_to_mutation_source_test'] += ['test/lib/sstable_utils.cc']
+deps['test/boost/sstable_test'] += ['test/lib/normalizing_reader.cc']
+deps['test/boost/sstable_datafile_test'] += ['test/lib/normalizing_reader.cc']
+deps['test/boost/mutation_reader_test'] += ['test/lib/dummy_sharder.cc' ]
+deps['test/boost/multishard_combining_reader_as_mutation_source_test'] += ['test/lib/dummy_sharder.cc' ]

 deps['test/boost/bytes_ostream_test'] = [
    "test/boost/bytes_ostream_test.cc",
@@ -1234,11 +1236,11 @@ def configure_seastar(build_dir, mode):
    if args.alloc_failure_injector:
        seastar_cmake_args += ['-DSeastar_ALLOC_FAILURE_INJECTION=ON']

-    seastar_cmd = ['cmake', '-G', 'Ninja', os.path.relpath('seastar', seastar_build_dir)] + seastar_cmake_args
+    seastar_cmd = ['cmake', '-G', 'Ninja', os.path.relpath(args.seastar_path, seastar_build_dir)] + seastar_cmake_args
    cmake_dir = seastar_build_dir
    if args.dpdk:
        # need to cook first
-        cmake_dir = 'seastar' # required by cooking.sh
+        cmake_dir = args.seastar_path # required by cooking.sh
        relative_seastar_build_dir = os.path.join('..', seastar_build_dir)  # relative to seastar/
        seastar_cmd = ['./cooking.sh', '-i', 'dpdk', '-d', relative_seastar_build_dir, '--'] + seastar_cmd[4:]

@@ -1265,9 +1267,9 @@ def query_seastar_flags(pc_file, link_static_cxx=False):
    return cflags, libs

 for mode in build_modes:
-    seastar_cflags, seastar_libs = query_seastar_flags(pc[mode], link_static_cxx=args.staticcxx)
-    modes[mode]['seastar_cflags'] = seastar_cflags
-    modes[mode]['seastar_libs'] = seastar_libs
+    seastar_pc_cflags, seastar_pc_libs = query_seastar_flags(pc[mode], link_static_cxx=args.staticcxx)
+    modes[mode]['seastar_cflags'] = seastar_pc_cflags
+    modes[mode]['seastar_libs'] = seastar_pc_libs

 # We need to use experimental features of the zstd library (to use our own allocators for the (de)compression context),
 # which are available only when the library is linked statically.
@@ -1288,16 +1290,58 @@ def configure_zstd(build_dir, mode):
    os.makedirs(zstd_build_dir, exist_ok=True)
    subprocess.check_call(zstd_cmd, shell=False, cwd=zstd_build_dir)

+def configure_abseil(build_dir, mode):
+    abseil_build_dir = os.path.join(build_dir, mode, 'abseil')
+
+    abseil_cflags = seastar_cflags + ' ' + modes[mode]['cxx_ld_flags']
+    cmake_mode = MODE_TO_CMAKE_BUILD_TYPE[mode]
+    abseil_cmake_args = [
+        '-DCMAKE_BUILD_TYPE={}'.format(cmake_mode),
+        '-DCMAKE_INSTALL_PREFIX={}'.format(build_dir + '/inst'), # just to avoid a warning from absl
+        '-DCMAKE_C_COMPILER={}'.format(args.cc),
+        '-DCMAKE_CXX_COMPILER={}'.format(args.cxx),
+        '-DCMAKE_CXX_FLAGS_{}={}'.format(cmake_mode.upper(), abseil_cflags),
+    ]
+
+    abseil_cmd = ['cmake', '-G', 'Ninja', os.path.relpath('abseil', abseil_build_dir)] + abseil_cmake_args
+
+    os.makedirs(abseil_build_dir, exist_ok=True)
+    subprocess.check_call(abseil_cmd, shell=False, cwd=abseil_build_dir)
+
+abseil_libs = ['absl/' + lib for lib in [
+    'container/libabsl_hashtablez_sampler.a',
+    'container/libabsl_raw_hash_set.a',
+    'synchronization/libabsl_synchronization.a',
+    'synchronization/libabsl_graphcycles_internal.a',
+    'debugging/libabsl_stacktrace.a',
+    'debugging/libabsl_symbolize.a',
+    'debugging/libabsl_debugging_internal.a',
+    'debugging/libabsl_demangle_internal.a',
+    'time/libabsl_time.a',
+    'time/libabsl_time_zone.a',
+    'numeric/libabsl_int128.a',
+    'hash/libabsl_city.a',
+    'hash/libabsl_hash.a',
+    'base/libabsl_malloc_internal.a',
+    'base/libabsl_spinlock_wait.a',
+    'base/libabsl_base.a',
+    'base/libabsl_dynamic_annotations.a',
+    'base/libabsl_raw_logging_internal.a',
+    'base/libabsl_exponential_biased.a',
+    'base/libabsl_throw_delegate.a']]
+
 args.user_cflags += " " + pkg_config('jsoncpp', '--cflags')
 args.user_cflags += ' -march=' + args.target
 libs = ' '.join([maybe_static(args.staticyamlcpp, '-lyaml-cpp'), '-latomic', '-llz4', '-lz', '-lsnappy', pkg_config('jsoncpp', '--libs'),
                 ' -lstdc++fs', ' -lcrypt', ' -lcryptopp', ' -lpthread',
                 maybe_static(args.staticboost, '-lboost_date_time -lboost_regex -licuuc'), ])

-xxhash_dir = 'xxHash'
+pkgconfig_libs = [
+    'libxxhash',
+]

-if not os.path.exists(xxhash_dir) or not os.listdir(xxhash_dir):
-    raise Exception(xxhash_dir + ' is empty. Run "git submodule update --init".')
+args.user_cflags += ' ' + ' '.join([pkg_config(lib, '--cflags') for lib in pkgconfig_libs])
+libs += ' ' + ' '.join([pkg_config(lib, '--libs') for lib in pkgconfig_libs])

 if not args.staticboost:
    args.user_cflags += ' -DBOOST_TEST_DYN_LINK'
@@ -1316,10 +1360,11 @@ if any(filter(thrift_version.startswith, thrift_boost_versions)):
 for pkg in pkgs:
    args.user_cflags += ' ' + pkg_config(pkg, '--cflags')
    libs += ' ' + pkg_config(pkg, '--libs')
+args.user_cflags += '-I abseil'
 user_cflags = args.user_cflags + ' -fvisibility=hidden'
 user_ldflags = args.user_ldflags + ' -fvisibility=hidden'
 if args.staticcxx:
-    user_ldflags += " -static-libgcc -static-libstdc++"
+    user_ldflags += " -static-libstdc++"
 if args.staticthrift:
    thrift_libs = "-Wl,-Bstatic -lthrift -Wl,-Bdynamic"
 else:
@@ -1346,6 +1391,9 @@ else:
 for mode in build_modes:
    configure_zstd(outdir, mode)

+for mode in build_modes:
+    configure_abseil(outdir, mode)
+
 # configure.py may run automatically from an already-existing build.ninja.
 # If the user interrupts configure.py in the middle, we need build.ninja
 # to remain in a valid state.  So we write our output to a temporary
@@ -1369,7 +1417,7 @@ with open(buildfile_tmp, 'w') as f:
            command = echo -e $text > $out
            description = GEN $out
        rule swagger
-            command = seastar/scripts/seastar-json2code.py -f $in -o $out
+            command = {args.seastar_path}/scripts/seastar-json2code.py -f $in -o $out
            description = SWAGGER $out
        rule serializer
            command = {python} ./idl-compiler.py --ns ser -f $in -o $out
@@ -1441,9 +1489,12 @@ with open(buildfile_tmp, 'w') as f:
                        build/{mode}/gen/${{stem}}Parser.cpp
                description = ANTLR3 $in
            rule checkhh.{mode}
-              command = $cxx -MD -MT $out -MF $out.d {seastar_cflags} $cxxflags $cxxflags_{mode} $obj_cxxflags -x c++ --include=$in -c -o $out /dev/null
+              command = $cxx -MD -MT $out -MF $out.d {seastar_cflags} $cxxflags $cxxflags_{mode} $obj_cxxflags --include $in -c -o $out build/{mode}/gen/empty.cc
              description = CHECKHH $in
              depfile = $out.d
+            rule test.{mode}
+              command = ./test.py --mode={mode}
+              description = TEST {mode}
            ''').format(mode=mode, antlr3_exec=antlr3_exec, fmt_lib=fmt_lib, **modeval))
        f.write(
            'build {mode}: phony {artifacts}\n'.format(
@@ -1480,6 +1531,8 @@ with open(buildfile_tmp, 'w') as f:
                objs.extend(['$builddir/' + mode + '/' + artifact for artifact in [
                    'libdeflate/libdeflate.a',
                    'zstd/lib/libzstd.a',
+                ] + [
+                    'abseil/' + x for x in abseil_libs
                ]])
                objs.append('$builddir/' + mode + '/gen/utils/gz/crc_combine_table.o')
                if binary in tests:
@@ -1543,6 +1596,17 @@ with open(buildfile_tmp, 'w') as f:
            )
        )

+        f.write(
+            'build {mode}-test: test.{mode} {test_executables} $builddir/{mode}/test/tools/cql_repl\n'.format(
+                mode=mode,
+                test_executables=' '.join(['$builddir/{}/{}'.format(mode, binary) for binary in tests]),
+            )
+        )
+        f.write(
+            'build {mode}-check: phony {mode}-headers {mode}-test\n'.format(
+                mode=mode,
+            )
+        )

        gen_headers = []
        for th in thrifts:
@@ -1561,7 +1625,7 @@ with open(buildfile_tmp, 'w') as f:
                f.write('    cxxflags = {seastar_cflags} $cxxflags $cxxflags_{mode} {extra_cxxflags}\n'.format(mode=mode, extra_cxxflags=extra_cxxflags[src], **modeval))
        for hh in swaggers:
            src = swaggers[hh]
-            f.write('build {}: swagger {} | seastar/scripts/seastar-json2code.py\n'.format(hh, src))
+            f.write('build {}: swagger {} | {}/scripts/seastar-json2code.py\n'.format(hh, src, args.seastar_path))
        for hh in serializers:
            src = serializers[hh]
            f.write('build {}: serializer {} | idl-compiler.py\n'.format(hh, src))
@@ -1587,8 +1651,9 @@ with open(buildfile_tmp, 'w') as f:
                    if has_sanitize_address_use_after_scope:
                        flags += ' -fno-sanitize-address-use-after-scope'
                    f.write('  obj_cxxflags = %s\n' % flags)
+        f.write(f'build build/{mode}/gen/empty.cc: gen\n')
        for hh in headers:
-            f.write('build $builddir/{mode}/{hh}.o: checkhh.{mode} {hh} || {gen_headers_dep}\n'.format(
+            f.write('build $builddir/{mode}/{hh}.o: checkhh.{mode} {hh} | build/{mode}/gen/empty.cc || {gen_headers_dep}\n'.format(
                    mode=mode, hh=hh, gen_headers_dep=gen_headers_dep))

        f.write('build build/{mode}/seastar/libseastar.a: ninja | always\n'
@@ -1621,14 +1686,27 @@ with open(buildfile_tmp, 'w') as f:
        f.write('  subdir = build/{mode}/zstd\n'.format(**locals()))
        f.write('  target = libzstd.a\n'.format(**locals()))

+        for lib in abseil_libs:
+            f.write('build build/{mode}/abseil/{lib}: ninja\n'.format(**locals()))
+            f.write('  pool = submodule_pool\n')
+            f.write('  subdir = build/{mode}/abseil\n'.format(**locals()))
+            f.write('  target = {lib}\n'.format(**locals()))
+
    mode = 'dev' if 'dev' in modes else modes[0]
    f.write('build checkheaders: phony || {}\n'.format(' '.join(['$builddir/{}/{}.o'.format(mode, hh) for hh in headers])))

+    f.write(
+            'build test: phony {}\n'.format(' '.join(['{mode}-test'.format(mode=mode) for mode in modes]))
+    )
+    f.write(
+            'build check: phony {}\n'.format(' '.join(['{mode}-check'.format(mode=mode) for mode in modes]))
+    )
+
    f.write(textwrap.dedent('''\
        rule configure
          command = {python} configure.py $configure_args
          generator = 1
-        build build.ninja: configure | configure.py SCYLLA-VERSION-GEN seastar/CMakeLists.txt
+        build build.ninja: configure | configure.py SCYLLA-VERSION-GEN {args.seastar_path}/CMakeLists.txt
        rule cscope
            command = find -name '*.[chS]' -o -name "*.cc" -o -name "*.hh" | cscope -bq -i-
            description = CSCOPE
--- a/cql3/Cql.g
+++ b/cql3/Cql.g
@@ -105,7 +105,7 @@ options {
 using namespace cql3::statements;
 using namespace cql3::selection;
 using cql3::cql3_type;
-using conditions_type = std::vector<std::pair<::shared_ptr<cql3::column_identifier::raw>,::shared_ptr<cql3::column_condition::raw>>>;
+using conditions_type = std::vector<std::pair<::shared_ptr<cql3::column_identifier::raw>,lw_shared_ptr<cql3::column_condition::raw>>>;
 using operations_type = std::vector<std::pair<::shared_ptr<cql3::column_identifier::raw>,::shared_ptr<cql3::operation::raw_update>>>;

 // ANTLR forces us to define a default-initialized return value
@@ -319,63 +319,63 @@ struct uninitialized {

 /** STATEMENTS **/

-query returns [shared_ptr<raw::parsed_statement> stmnt]
-    : st=cqlStatement (';')* EOF { $stmnt = st; }
+query returns [std::unique_ptr<raw::parsed_statement> stmnt]
+    : st=cqlStatement (';')* EOF { $stmnt = std::move(st); }
    ;

-cqlStatement returns [shared_ptr<raw::parsed_statement> stmt]
+cqlStatement returns [std::unique_ptr<raw::parsed_statement> stmt]
    @after{ if (stmt) { stmt->set_bound_variables(_bind_variables); } }
-    : st1= selectStatement             { $stmt = st1; }
-    | st2= insertStatement             { $stmt = st2; }
-    | st3= updateStatement             { $stmt = st3; }
-    | st4= batchStatement              { $stmt = st4; }
-    | st5= deleteStatement             { $stmt = st5; }
-    | st6= useStatement                { $stmt = st6; }
-    | st7= truncateStatement           { $stmt = st7; }
-    | st8= createKeyspaceStatement     { $stmt = st8; }
-    | st9= createTableStatement        { $stmt = st9; }
-    | st10=createIndexStatement        { $stmt = st10; }
-    | st11=dropKeyspaceStatement       { $stmt = st11; }
-    | st12=dropTableStatement          { $stmt = st12; }
-    | st13=dropIndexStatement          { $stmt = st13; }
-    | st14=alterTableStatement         { $stmt = st14; }
-    | st15=alterKeyspaceStatement      { $stmt = st15; }
-    | st16=grantStatement              { $stmt = st16; }
-    | st17=revokeStatement             { $stmt = st17; }
-    | st18=listPermissionsStatement    { $stmt = st18; }
-    | st19=createUserStatement         { $stmt = st19; }
-    | st20=alterUserStatement          { $stmt = st20; }
-    | st21=dropUserStatement           { $stmt = st21; }
-    | st22=listUsersStatement          { $stmt = st22; }
+    : st1= selectStatement             { $stmt = std::move(st1); }
+    | st2= insertStatement             { $stmt = std::move(st2); }
+    | st3= updateStatement             { $stmt = std::move(st3); }
+    | st4= batchStatement              { $stmt = std::move(st4); }
+    | st5= deleteStatement             { $stmt = std::move(st5); }
+    | st6= useStatement                { $stmt = std::move(st6); }
+    | st7= truncateStatement           { $stmt = std::move(st7); }
+    | st8= createKeyspaceStatement     { $stmt = std::move(st8); }
+    | st9= createTableStatement        { $stmt = std::move(st9); }
+    | st10=createIndexStatement        { $stmt = std::move(st10); }
+    | st11=dropKeyspaceStatement       { $stmt = std::move(st11); }
+    | st12=dropTableStatement          { $stmt = std::move(st12); }
+    | st13=dropIndexStatement          { $stmt = std::move(st13); }
+    | st14=alterTableStatement         { $stmt = std::move(st14); }
+    | st15=alterKeyspaceStatement      { $stmt = std::move(st15); }
+    | st16=grantStatement              { $stmt = std::move(st16); }
+    | st17=revokeStatement             { $stmt = std::move(st17); }
+    | st18=listPermissionsStatement    { $stmt = std::move(st18); }
+    | st19=createUserStatement         { $stmt = std::move(st19); }
+    | st20=alterUserStatement          { $stmt = std::move(st20); }
+    | st21=dropUserStatement           { $stmt = std::move(st21); }
+    | st22=listUsersStatement          { $stmt = std::move(st22); }
 #if 0
    | st23=createTriggerStatement      { $stmt = st23; }
    | st24=dropTriggerStatement        { $stmt = st24; }
 #endif
-    | st25=createTypeStatement         { $stmt = st25; }
-    | st26=alterTypeStatement          { $stmt = st26; }
-    | st27=dropTypeStatement           { $stmt = st27; }
-    | st28=createFunctionStatement     { $stmt = st28; }
-    | st29=dropFunctionStatement       { $stmt = st29; }
+    | st25=createTypeStatement         { $stmt = std::move(st25); }
+    | st26=alterTypeStatement          { $stmt = std::move(st26); }
+    | st27=dropTypeStatement           { $stmt = std::move(st27); }
+    | st28=createFunctionStatement     { $stmt = std::move(st28); }
+    | st29=dropFunctionStatement       { $stmt = std::move(st29); }
 #if 0
    | st30=createAggregateStatement    { $stmt = st30; }
    | st31=dropAggregateStatement      { $stmt = st31; }
 #endif
-    | st32=createViewStatement         { $stmt = st32; }
-    | st33=alterViewStatement          { $stmt = st33; }
-    | st34=dropViewStatement           { $stmt = st34; }
-    | st35=listRolesStatement          { $stmt = st35; }
-    | st36=grantRoleStatement          { $stmt = st36; }
-    | st37=revokeRoleStatement         { $stmt = st37; }
-    | st38=dropRoleStatement           { $stmt = st38; }
-    | st39=createRoleStatement         { $stmt = st39; }
-    | st40=alterRoleStatement          { $stmt = st40; }
+    | st32=createViewStatement         { $stmt = std::move(st32); }
+    | st33=alterViewStatement          { $stmt = std::move(st33); }
+    | st34=dropViewStatement           { $stmt = std::move(st34); }
+    | st35=listRolesStatement          { $stmt = std::move(st35); }
+    | st36=grantRoleStatement          { $stmt = std::move(st36); }
+    | st37=revokeRoleStatement         { $stmt = std::move(st37); }
+    | st38=dropRoleStatement           { $stmt = std::move(st38); }
+    | st39=createRoleStatement         { $stmt = std::move(st39); }
+    | st40=alterRoleStatement          { $stmt = std::move(st40); }
    ;

 /*
 * USE <KEYSPACE>;
 */
-useStatement returns [::shared_ptr<raw::use_statement> stmt]
-    : K_USE ks=keyspaceName { $stmt = ::make_shared<raw::use_statement>(ks); }
+useStatement returns [std::unique_ptr<raw::use_statement> stmt]
+    : K_USE ks=keyspaceName { $stmt = std::make_unique<raw::use_statement>(ks); }
    ;

 /**
@@ -384,7 +384,7 @@ useStatement returns [::shared_ptr<raw::use_statement> stmt]
 * WHERE KEY = "key1" AND COL > 1 AND COL < 100
 * LIMIT <NUMBER>;
 */
-selectStatement returns [shared_ptr<raw::select_statement> expr]
+selectStatement returns [std::unique_ptr<raw::select_statement> expr]
    @init {
        bool is_distinct = false;
        ::shared_ptr<cql3::term::raw> limit;
@@ -409,7 +409,7 @@ selectStatement returns [shared_ptr<raw::select_statement> expr]
      ( K_BYPASS K_CACHE { bypass_cache = true; })?
      {
          auto params = make_lw_shared<raw::select_statement::parameters>(std::move(orderings), is_distinct, allow_filtering, is_json, bypass_cache);
-          $expr = ::make_shared<raw::select_statement>(std::move(cf), std::move(params),
+          $expr = std::make_unique<raw::select_statement>(std::move(cf), std::move(params),
            std::move(sclause), std::move(wclause), std::move(limit), std::move(per_partition_limit),
            std::move(gbcolumns));
      }
@@ -476,7 +476,7 @@ jsonValue returns [::shared_ptr<cql3::term::raw> value]
 * USING TIMESTAMP <long>;
 *
 */
-insertStatement returns [::shared_ptr<raw::modification_statement> expr]
+insertStatement returns [std::unique_ptr<raw::modification_statement> expr]
    @init {
        auto attrs = std::make_unique<cql3::attributes::raw>();
        std::vector<::shared_ptr<cql3::column_identifier::raw>> column_names;
@@ -492,7 +492,7 @@ insertStatement returns [::shared_ptr<raw::modification_statement> expr]
            ( K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
            ( usingClause[attrs] )?
              {
-              $expr = ::make_shared<raw::insert_statement>(std::move(cf),
+              $expr = std::make_unique<raw::insert_statement>(std::move(cf),
                                                       std::move(attrs),
                                                       std::move(column_names),
                                                       std::move(values),
@@ -504,7 +504,7 @@ insertStatement returns [::shared_ptr<raw::modification_statement> expr]
            ( K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
            ( usingClause[attrs] )?
              {
-              $expr = ::make_shared<raw::insert_json_statement>(std::move(cf),
+              $expr = std::make_unique<raw::insert_json_statement>(std::move(cf),
                                                       std::move(attrs),
                                                       std::move(json_value),
                                                       if_not_exists,
@@ -528,7 +528,7 @@ usingClauseObjective[std::unique_ptr<cql3::attributes::raw>& attrs]
 * SET name1 = value1, name2 = value2
 * WHERE key = value;
 */
-updateStatement returns [::shared_ptr<raw::update_statement> expr]
+updateStatement returns [std::unique_ptr<raw::update_statement> expr]
    @init {
        bool if_exists = false;
        auto attrs = std::make_unique<cql3::attributes::raw>();
@@ -540,7 +540,7 @@ updateStatement returns [::shared_ptr<raw::update_statement> expr]
      K_WHERE wclause=whereClause
      ( K_IF (K_EXISTS{ if_exists = true; } | conditions=updateConditions) )?
      {
-          return ::make_shared<raw::update_statement>(std::move(cf),
+          return std::make_unique<raw::update_statement>(std::move(cf),
                                                  std::move(attrs),
                                                  std::move(operations),
                                                  std::move(wclause),
@@ -560,7 +560,7 @@ updateConditions returns [conditions_type conditions]
 * WHERE KEY = keyname
   [IF (EXISTS | name = value, ...)];
 */
-deleteStatement returns [::shared_ptr<raw::delete_statement> expr]
+deleteStatement returns [std::unique_ptr<raw::delete_statement> expr]
    @init {
        auto attrs = std::make_unique<cql3::attributes::raw>();
        std::vector<::shared_ptr<cql3::operation::raw_deletion>> column_deletions;
@@ -572,7 +572,7 @@ deleteStatement returns [::shared_ptr<raw::delete_statement> expr]
      K_WHERE wclause=whereClause
      ( K_IF ( K_EXISTS { if_exists = true; } | conditions=updateConditions ))?
      {
-          return ::make_shared<raw::delete_statement>(cf,
+          return std::make_unique<raw::delete_statement>(cf,
                                            std::move(attrs),
                                            std::move(column_deletions),
                                            std::move(wclause),
@@ -620,11 +620,11 @@ usingClauseDelete[std::unique_ptr<cql3::attributes::raw>& attrs]
 *   ...
 * APPLY BATCH
 */
-batchStatement returns [shared_ptr<cql3::statements::raw::batch_statement> expr]
+batchStatement returns [std::unique_ptr<cql3::statements::raw::batch_statement> expr]
    @init {
        using btype = cql3::statements::raw::batch_statement::type; 
        btype type = btype::LOGGED;
-        std::vector<shared_ptr<cql3::statements::raw::modification_statement>> statements;
+        std::vector<std::unique_ptr<cql3::statements::raw::modification_statement>> statements;
        auto attrs = std::make_unique<cql3::attributes::raw>();
    }
    : K_BEGIN
@@ -633,14 +633,14 @@ batchStatement returns [shared_ptr<cql3::statements::raw::batch_statement> expr]
          ( s=batchStatementObjective ';'? { statements.push_back(std::move(s)); } )*
      K_APPLY K_BATCH
      {
-          $expr = ::make_shared<cql3::statements::raw::batch_statement>(type, std::move(attrs), std::move(statements));
+          $expr = std::make_unique<cql3::statements::raw::batch_statement>(type, std::move(attrs), std::move(statements));
      }
    ;

-batchStatementObjective returns [shared_ptr<cql3::statements::raw::modification_statement> statement]
-    : i=insertStatement  { $statement = i; }
-    | u=updateStatement  { $statement = u; }
-    | d=deleteStatement  { $statement = d; }
+batchStatementObjective returns [std::unique_ptr<cql3::statements::raw::modification_statement> statement]
+    : i=insertStatement  { $statement = std::move(i); }
+    | u=updateStatement  { $statement = std::move(u); }
+    | d=deleteStatement  { $statement = std::move(d); }
    ;

 #if 0
@@ -694,7 +694,7 @@ dropAggregateStatement returns [DropAggregateStatement expr]
    ;
 #endif

-createFunctionStatement returns [shared_ptr<cql3::statements::create_function_statement> expr]
+createFunctionStatement returns [std::unique_ptr<cql3::statements::create_function_statement> expr]
    @init {
        bool or_replace = false;
        bool if_not_exists = false;
@@ -719,10 +719,10 @@ createFunctionStatement returns [shared_ptr<cql3::statements::create_function_st
      K_RETURNS rt = comparatorType
      K_LANGUAGE language = IDENT
      K_AS body = STRING_LITERAL
-      { $expr = ::make_shared<cql3::statements::create_function_statement>(std::move(fn), to_lower($language.text), $body.text, std::move(arg_names), std::move(arg_types), std::move(rt), called_on_null_input, or_replace, if_not_exists); }
+      { $expr = std::make_unique<cql3::statements::create_function_statement>(std::move(fn), to_lower($language.text), $body.text, std::move(arg_names), std::move(arg_types), std::move(rt), called_on_null_input, or_replace, if_not_exists); }
    ;

-dropFunctionStatement returns [shared_ptr<cql3::statements::drop_function_statement> expr]
+dropFunctionStatement returns [std::unique_ptr<cql3::statements::drop_function_statement> expr]
    @init {
        bool if_exists = false;
        std::vector<shared_ptr<cql3_type::raw>> arg_types;
@@ -740,19 +740,19 @@ dropFunctionStatement returns [shared_ptr<cql3::statements::drop_function_statem
        ')'
        { args_present = true; }
      )?
-      { $expr = ::make_shared<cql3::statements::drop_function_statement>(std::move(fn), std::move(arg_types), args_present, if_exists); }
+      { $expr = std::make_unique<cql3::statements::drop_function_statement>(std::move(fn), std::move(arg_types), args_present, if_exists); }
    ;

 /**
 * CREATE KEYSPACE [IF NOT EXISTS] <KEYSPACE> WITH attr1 = value1 AND attr2 = value2;
 */
-createKeyspaceStatement returns [shared_ptr<cql3::statements::create_keyspace_statement> expr]
+createKeyspaceStatement returns [std::unique_ptr<cql3::statements::create_keyspace_statement> expr]
    @init {
        auto attrs = make_shared<cql3::statements::ks_prop_defs>();
        bool if_not_exists = false;
    }
    : K_CREATE K_KEYSPACE (K_IF K_NOT K_EXISTS { if_not_exists = true; } )? ks=keyspaceName
-      K_WITH properties[attrs] { $expr = ::make_shared<cql3::statements::create_keyspace_statement>(ks, attrs, if_not_exists); }
+      K_WITH properties[*attrs] { $expr = std::make_unique<cql3::statements::create_keyspace_statement>(ks, attrs, if_not_exists); }
    ;

 /**
@@ -762,33 +762,33 @@ createKeyspaceStatement returns [shared_ptr<cql3::statements::create_keyspace_st
 *     <name3> <type>
 * ) WITH <property> = <value> AND ...;
 */
-createTableStatement returns [shared_ptr<cql3::statements::create_table_statement::raw_statement> expr]
+createTableStatement returns [std::unique_ptr<cql3::statements::create_table_statement::raw_statement> expr]
    @init { bool if_not_exists = false; }
    : K_CREATE K_COLUMNFAMILY (K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
-      cf=columnFamilyName { $expr = make_shared<cql3::statements::create_table_statement::raw_statement>(cf, if_not_exists); }
-      cfamDefinition[expr]
+      cf=columnFamilyName { $expr = std::make_unique<cql3::statements::create_table_statement::raw_statement>(cf, if_not_exists); }
+      cfamDefinition[*expr]
    ;

-cfamDefinition[shared_ptr<cql3::statements::create_table_statement::raw_statement> expr]
+cfamDefinition[cql3::statements::create_table_statement::raw_statement& expr]
    : '(' cfamColumns[expr] ( ',' cfamColumns[expr]? )* ')'
-      ( K_WITH cfamProperty[$expr->properties()] ( K_AND cfamProperty[$expr->properties()] )*)?
+      ( K_WITH cfamProperty[$expr.properties()] ( K_AND cfamProperty[$expr.properties()] )*)?
    ;

-cfamColumns[shared_ptr<cql3::statements::create_table_statement::raw_statement> expr]
+cfamColumns[cql3::statements::create_table_statement::raw_statement& expr]
    @init { bool is_static=false; }
-    : k=ident v=comparatorType (K_STATIC {is_static = true;})? { $expr->add_definition(k, v, is_static); }
-        (K_PRIMARY K_KEY { $expr->add_key_aliases(std::vector<shared_ptr<cql3::column_identifier>>{k}); })?
-    | K_PRIMARY K_KEY '(' pkDef[expr] (',' c=ident { $expr->add_column_alias(c); } )* ')'
+    : k=ident v=comparatorType (K_STATIC {is_static = true;})? { $expr.add_definition(k, v, is_static); }
+        (K_PRIMARY K_KEY { $expr.add_key_aliases(std::vector<shared_ptr<cql3::column_identifier>>{k}); })?
+    | K_PRIMARY K_KEY '(' pkDef[expr] (',' c=ident { $expr.add_column_alias(c); } )* ')'
    ;

-pkDef[shared_ptr<cql3::statements::create_table_statement::raw_statement> expr]
+pkDef[cql3::statements::create_table_statement::raw_statement& expr]
    @init { std::vector<shared_ptr<cql3::column_identifier>> l; }
-    : k=ident { $expr->add_key_aliases(std::vector<shared_ptr<cql3::column_identifier>>{k}); }
-    | '(' k1=ident { l.push_back(k1); } ( ',' kn=ident { l.push_back(kn); } )* ')' { $expr->add_key_aliases(l); }
+    : k=ident { $expr.add_key_aliases(std::vector<shared_ptr<cql3::column_identifier>>{k}); }
+    | '(' k1=ident { l.push_back(k1); } ( ',' kn=ident { l.push_back(kn); } )* ')' { $expr.add_key_aliases(l); }
    ;

 cfamProperty[cql3::statements::cf_properties& expr]
-    : property[$expr.properties()]
+    : property[*$expr.properties()]
    | K_COMPACT K_STORAGE { $expr.set_compact_storage(); }
    | K_CLUSTERING K_ORDER K_BY '(' cfamOrdering[expr] (',' cfamOrdering[expr])* ')'
    ;
@@ -806,15 +806,15 @@ cfamOrdering[cql3::statements::cf_properties& expr]
 *    ....
 * )
 */
-createTypeStatement returns [::shared_ptr<create_type_statement> expr]
+createTypeStatement returns [std::unique_ptr<create_type_statement> expr]
    @init { bool if_not_exists = false; }
    : K_CREATE K_TYPE (K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
-         tn=userTypeName { $expr = ::make_shared<create_type_statement>(tn, if_not_exists); }
-         '(' typeColumns[expr] ( ',' typeColumns[expr]? )* ')'
+         tn=userTypeName { $expr = std::make_unique<create_type_statement>(tn, if_not_exists); }
+         '(' typeColumns[*expr] ( ',' typeColumns[*expr]? )* ')'
    ;

-typeColumns[::shared_ptr<create_type_statement> expr]
-    : k=ident v=comparatorType { $expr->add_definition(k, v); }
+typeColumns[create_type_statement& expr]
+    : k=ident v=comparatorType { $expr.add_definition(k, v); }
    ;


@@ -822,7 +822,7 @@ typeColumns[::shared_ptr<create_type_statement> expr]
 * CREATE INDEX [IF NOT EXISTS] [indexName] ON <columnFamily> (<columnName>);
 * CREATE CUSTOM INDEX [IF NOT EXISTS] [indexName] ON <columnFamily> (<columnName>) USING <indexClass>;
 */
-createIndexStatement returns [::shared_ptr<create_index_statement> expr]
+createIndexStatement returns [std::unique_ptr<create_index_statement> expr]
    @init {
        auto props = make_shared<index_prop_defs>();
        bool if_not_exists = false;
@@ -830,10 +830,10 @@ createIndexStatement returns [::shared_ptr<create_index_statement> expr]
        std::vector<::shared_ptr<index_target::raw>> targets;
    }
    : K_CREATE (K_CUSTOM { props->is_custom = true; })? K_INDEX (K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
-        (idxName[name])? K_ON cf=columnFamilyName '(' (target1=indexIdent { targets.emplace_back(target1); } (',' target2=indexIdent { targets.emplace_back(target2); } )*)? ')'
+        (idxName[*name])? K_ON cf=columnFamilyName '(' (target1=indexIdent { targets.emplace_back(target1); } (',' target2=indexIdent { targets.emplace_back(target2); } )*)? ')'
        (K_USING cls=STRING_LITERAL { props->custom_class = sstring{$cls.text}; })?
-        (K_WITH properties[props])?
-      { $expr = ::make_shared<create_index_statement>(cf, name, targets, props, if_not_exists); }
+        (K_WITH properties[*props])?
+      { $expr = std::make_unique<create_index_statement>(cf, name, targets, props, if_not_exists); }
    ;

 indexIdent returns [::shared_ptr<index_target::raw> id]
@@ -856,7 +856,7 @@ indexIdent returns [::shared_ptr<index_target::raw> id]
 *  PRIMARY KEY (<pkColumns>)
 *  WITH <property> = <value> AND ...;
 */
-createViewStatement returns [::shared_ptr<create_view_statement> expr]
+createViewStatement returns [std::unique_ptr<create_view_statement> expr]
    @init {
        bool if_not_exists = false;
        std::vector<::shared_ptr<cql3::column_identifier::raw>> partition_keys;
@@ -870,7 +870,7 @@ createViewStatement returns [::shared_ptr<create_view_statement> expr]
    |   '(' k1=cident { partition_keys.push_back(k1); } ( ',' cn=cident { composite_keys.push_back(cn); } )* ')'
        )
        {
-             $expr = ::make_shared<create_view_statement>(
+             $expr = std::make_unique<create_view_statement>(
                std::move(cf),
                std::move(basecf),
                std::move(sclause),
@@ -909,12 +909,12 @@ dropTriggerStatement returns [DropTriggerStatement expr]
 /**
 * ALTER KEYSPACE <KS> WITH <property> = <value>;
 */
-alterKeyspaceStatement returns [shared_ptr<cql3::statements::alter_keyspace_statement> expr]
+alterKeyspaceStatement returns [std::unique_ptr<cql3::statements::alter_keyspace_statement> expr]
    @init {
        auto attrs = make_shared<cql3::statements::ks_prop_defs>();
    }
    : K_ALTER K_KEYSPACE ks=keyspaceName
-        K_WITH properties[attrs] { $expr = ::make_shared<cql3::statements::alter_keyspace_statement>(ks, attrs); }
+        K_WITH properties[*attrs] { $expr = std::make_unique<cql3::statements::alter_keyspace_statement>(ks, attrs); }
    ;

 /**
@@ -924,7 +924,7 @@ alterKeyspaceStatement returns [shared_ptr<cql3::statements::alter_keyspace_stat
 * ALTER COLUMN FAMILY <CF> WITH <property> = <value>;
 * ALTER COLUMN FAMILY <CF> RENAME <column> TO <column>;
 */
-alterTableStatement returns [shared_ptr<alter_table_statement> expr]
+alterTableStatement returns [std::unique_ptr<alter_table_statement> expr]
    @init {
        alter_table_statement::type type;
        auto props = make_shared<cql3::statements::cf_prop_defs>();
@@ -943,13 +943,13 @@ alterTableStatement returns [shared_ptr<alter_table_statement> expr]
            | '('     id1=cident { column_changes.emplace_back(alter_table_statement::column_change{id1}); }
                 (',' idn=cident { column_changes.emplace_back(alter_table_statement::column_change{idn}); } )* ')'
            )
-          | K_WITH  properties[props]                 { type = alter_table_statement::type::opts; }
+          | K_WITH  properties[*props]                 { type = alter_table_statement::type::opts; }
          | K_RENAME                                  { type = alter_table_statement::type::rename; }
               id1=cident K_TO toId1=cident { renames.emplace_back(id1, toId1); }
               ( K_AND idn=cident K_TO toIdn=cident { renames.emplace_back(idn, toIdn); } )*
          )
    {
-        $expr = ::make_shared<alter_table_statement>(std::move(cf), type, std::move(column_changes), std::move(props), std::move(renames));
+        $expr = std::make_unique<alter_table_statement>(std::move(cf), type, std::move(column_changes), std::move(props), std::move(renames));
    }
    ;

@@ -968,126 +968,126 @@ cfisStatic returns [bool isStaticColumn]
 * ALTER TYPE <name> ADD <field> <newtype>;
 * ALTER TYPE <name> RENAME <field> TO <newtype> AND ...;
 */
-alterTypeStatement returns [::shared_ptr<alter_type_statement> expr]
+alterTypeStatement returns [std::unique_ptr<alter_type_statement> expr]
    : K_ALTER K_TYPE name=userTypeName
-          ( K_ALTER f=ident K_TYPE v=comparatorType { $expr = ::make_shared<alter_type_statement::add_or_alter>(name, false, f, v); }
-          | K_ADD   f=ident v=comparatorType        { $expr = ::make_shared<alter_type_statement::add_or_alter>(name, true, f, v); }
+          ( K_ALTER f=ident K_TYPE v=comparatorType { $expr = std::make_unique<alter_type_statement::add_or_alter>(name, false, f, v); }
+          | K_ADD   f=ident v=comparatorType        { $expr = std::make_unique<alter_type_statement::add_or_alter>(name, true, f, v); }
          | K_RENAME
-               { $expr = ::make_shared<alter_type_statement::renames>(name); }
-               renames[{ static_pointer_cast<alter_type_statement::renames>($expr) }]
+               { $expr = std::make_unique<alter_type_statement::renames>(name); }
+               renames[{ static_cast<alter_type_statement::renames&>(*$expr) }]
          )
    ;

 /**
 * ALTER MATERIALIZED VIEW <CF> WITH <property> = <value>;
 */
-alterViewStatement returns [::shared_ptr<alter_view_statement> expr]
+alterViewStatement returns [std::unique_ptr<alter_view_statement> expr]
    @init {
        auto props = make_shared<cql3::statements::cf_prop_defs>();
    }
-    : K_ALTER K_MATERIALIZED K_VIEW cf=columnFamilyName K_WITH properties[props]
+    : K_ALTER K_MATERIALIZED K_VIEW cf=columnFamilyName K_WITH properties[*props]
    {
-        $expr = ::make_shared<alter_view_statement>(std::move(cf), std::move(props));
+        $expr = std::make_unique<alter_view_statement>(std::move(cf), std::move(props));
    }
    ;

-renames[::shared_ptr<alter_type_statement::renames> expr]
-    : fromId=ident K_TO toId=ident { $expr->add_rename(fromId, toId); }
+renames[alter_type_statement::renames& expr]
+    : fromId=ident K_TO toId=ident { $expr.add_rename(fromId, toId); }
      ( K_AND renames[$expr] )?
    ;

 /**
 * DROP KEYSPACE [IF EXISTS] <KSP>;
 */
-dropKeyspaceStatement returns [::shared_ptr<drop_keyspace_statement> ksp]
+dropKeyspaceStatement returns [std::unique_ptr<drop_keyspace_statement> ksp]
    @init { bool if_exists = false; }
-    : K_DROP K_KEYSPACE (K_IF K_EXISTS { if_exists = true; } )? ks=keyspaceName { $ksp = ::make_shared<drop_keyspace_statement>(ks, if_exists); }
+    : K_DROP K_KEYSPACE (K_IF K_EXISTS { if_exists = true; } )? ks=keyspaceName { $ksp = std::make_unique<drop_keyspace_statement>(ks, if_exists); }
    ;

 /**
 * DROP COLUMNFAMILY [IF EXISTS] <CF>;
 */
-dropTableStatement returns [::shared_ptr<drop_table_statement> stmt]
+dropTableStatement returns [std::unique_ptr<drop_table_statement> stmt]
    @init { bool if_exists = false; }
-    : K_DROP K_COLUMNFAMILY (K_IF K_EXISTS { if_exists = true; } )? cf=columnFamilyName { $stmt = ::make_shared<drop_table_statement>(cf, if_exists); }
+    : K_DROP K_COLUMNFAMILY (K_IF K_EXISTS { if_exists = true; } )? cf=columnFamilyName { $stmt = std::make_unique<drop_table_statement>(cf, if_exists); }
    ;

 /**
 * DROP TYPE <name>;
 */
-dropTypeStatement returns [::shared_ptr<drop_type_statement> stmt]
+dropTypeStatement returns [std::unique_ptr<drop_type_statement> stmt]
    @init { bool if_exists = false; }
-    : K_DROP K_TYPE (K_IF K_EXISTS { if_exists = true; } )? name=userTypeName { $stmt = ::make_shared<drop_type_statement>(name, if_exists); }
+    : K_DROP K_TYPE (K_IF K_EXISTS { if_exists = true; } )? name=userTypeName { $stmt = std::make_unique<drop_type_statement>(name, if_exists); }
    ;

 /**
 * DROP MATERIALIZED VIEW [IF EXISTS] <view_name>
 */
-dropViewStatement returns [::shared_ptr<drop_view_statement> stmt]
+dropViewStatement returns [std::unique_ptr<drop_view_statement> stmt]
    @init { bool if_exists = false; }
    : K_DROP K_MATERIALIZED K_VIEW (K_IF K_EXISTS { if_exists = true; } )? cf=columnFamilyName
-      { $stmt = ::make_shared<drop_view_statement>(cf, if_exists); }
+      { $stmt = std::make_unique<drop_view_statement>(cf, if_exists); }
    ;

 /**
 * DROP INDEX [IF EXISTS] <INDEX_NAME>
 */
-dropIndexStatement returns [::shared_ptr<drop_index_statement> expr]
+dropIndexStatement returns [std::unique_ptr<drop_index_statement> expr]
    @init { bool if_exists = false; }
    : K_DROP K_INDEX (K_IF K_EXISTS { if_exists = true; } )? index=indexName
-      { $expr = ::make_shared<drop_index_statement>(index, if_exists); }
+      { $expr = std::make_unique<drop_index_statement>(index, if_exists); }
    ;

 /**
  * TRUNCATE <CF>;
  */
-truncateStatement returns [::shared_ptr<truncate_statement> stmt]
-    : K_TRUNCATE (K_COLUMNFAMILY)? cf=columnFamilyName { $stmt = ::make_shared<truncate_statement>(cf); }
+truncateStatement returns [std::unique_ptr<truncate_statement> stmt]
+    : K_TRUNCATE (K_COLUMNFAMILY)? cf=columnFamilyName { $stmt = std::make_unique<truncate_statement>(cf); }
    ;

 /**
 * GRANT <permission> ON <resource> TO <grantee>
 */
-grantStatement returns [::shared_ptr<grant_statement> stmt]
+grantStatement returns [std::unique_ptr<grant_statement> stmt]
    : K_GRANT
          permissionOrAll
      K_ON
          resource
      K_TO
          grantee=userOrRoleName
-      { $stmt = ::make_shared<grant_statement>($permissionOrAll.perms, $resource.res, std::move(grantee)); } 
+      { $stmt = std::make_unique<grant_statement>($permissionOrAll.perms, $resource.res, std::move(grantee)); }
    ;

 /**
 * REVOKE <permission> ON <resource> FROM <revokee>
 */
-revokeStatement returns [::shared_ptr<revoke_statement> stmt]
+revokeStatement returns [std::unique_ptr<revoke_statement> stmt]
    : K_REVOKE
          permissionOrAll
      K_ON
          resource
      K_FROM
          revokee=userOrRoleName
-      { $stmt = ::make_shared<revoke_statement>($permissionOrAll.perms, $resource.res, std::move(revokee)); } 
+      { $stmt = std::make_unique<revoke_statement>($permissionOrAll.perms, $resource.res, std::move(revokee)); }
    ;

 /**
 * GRANT <rolename> to <grantee>
 */
-grantRoleStatement returns [::shared_ptr<grant_role_statement> stmt]
+grantRoleStatement returns [std::unique_ptr<grant_role_statement> stmt]
    : K_GRANT role=userOrRoleName K_TO grantee=userOrRoleName
-      { $stmt = ::make_shared<grant_role_statement>(std::move(role), std::move(grantee));  }
+      { $stmt = std::make_unique<grant_role_statement>(std::move(role), std::move(grantee));  }
    ;

 /**
 * REVOKE <rolename> FROM <revokee>
 */
-revokeRoleStatement returns [::shared_ptr<revoke_role_statement> stmt]
+revokeRoleStatement returns [std::unique_ptr<revoke_role_statement> stmt]
    : K_REVOKE role=userOrRoleName K_FROM revokee=userOrRoleName
-      { $stmt = ::make_shared<revoke_role_statement>(std::move(role), std::move(revokee)); }
+      { $stmt = std::make_unique<revoke_role_statement>(std::move(role), std::move(revokee)); }
    ;

-listPermissionsStatement returns [::shared_ptr<list_permissions_statement> stmt]
+listPermissionsStatement returns [std::unique_ptr<list_permissions_statement> stmt]
    @init {
 		std::optional<auth::resource> r;
 		std::optional<sstring> role;
@@ -1098,7 +1098,7 @@ listPermissionsStatement returns [::shared_ptr<list_permissions_statement> stmt]
      ( K_ON resource { r = $resource.res; } )?
      ( K_OF rn=userOrRoleName { role = sstring(static_cast<cql3::role_name>(rn).to_string()); } )?
      ( K_NORECURSIVE { recursive = false; } )?
-      { $stmt = ::make_shared<list_permissions_statement>($permissionOrAll.perms, std::move(r), std::move(role), recursive); } 
+      { $stmt = std::make_unique<list_permissions_statement>($permissionOrAll.perms, std::move(r), std::move(role), recursive); }
    ;

 permission returns [auth::permission perm]
@@ -1131,7 +1131,7 @@ roleResource returns [uninitialized<auth::resource> res]
 /**
 * CREATE USER [IF NOT EXISTS] <username> [WITH PASSWORD <password>] [SUPERUSER|NOSUPERUSER]
 */
-createUserStatement returns [::shared_ptr<create_role_statement> stmt]
+createUserStatement returns [std::unique_ptr<create_role_statement> stmt]
    @init {
        cql3::role_options opts;
        opts.is_superuser = false;
@@ -1142,42 +1142,42 @@ createUserStatement returns [::shared_ptr<create_role_statement> stmt]
    : K_CREATE K_USER (K_IF K_NOT K_EXISTS { ifNotExists = true; })? u=username
      ( K_WITH K_PASSWORD v=STRING_LITERAL { opts.password = $v.text; })?
      ( K_SUPERUSER { opts.is_superuser = true; } | K_NOSUPERUSER { opts.is_superuser = false; } )?
-      { $stmt = ::make_shared<create_role_statement>(cql3::role_name(u, cql3::preserve_role_case::yes), std::move(opts), ifNotExists); }
+      { $stmt = std::make_unique<create_role_statement>(cql3::role_name(u, cql3::preserve_role_case::yes), std::move(opts), ifNotExists); }
    ;

 /**
 * ALTER USER <username> [WITH PASSWORD <password>] [SUPERUSER|NOSUPERUSER]
 */
-alterUserStatement returns [::shared_ptr<alter_role_statement> stmt]
+alterUserStatement returns [std::unique_ptr<alter_role_statement> stmt]
    @init {
        cql3::role_options opts;
    }
    : K_ALTER K_USER u=username
      ( K_WITH K_PASSWORD v=STRING_LITERAL { opts.password = $v.text; })?
      ( K_SUPERUSER { opts.is_superuser = true; } | K_NOSUPERUSER { opts.is_superuser = false; } )?
-      { $stmt = ::make_shared<alter_role_statement>(cql3::role_name(u, cql3::preserve_role_case::yes), std::move(opts)); }
+      { $stmt = std::make_unique<alter_role_statement>(cql3::role_name(u, cql3::preserve_role_case::yes), std::move(opts)); }
    ;

 /**
 * DROP USER [IF EXISTS] <username>
 */
-dropUserStatement returns [::shared_ptr<drop_role_statement> stmt]
+dropUserStatement returns [std::unique_ptr<drop_role_statement> stmt]
    @init { bool ifExists = false; }
    : K_DROP K_USER (K_IF K_EXISTS { ifExists = true; })? u=username
-      { $stmt = ::make_shared<drop_role_statement>(cql3::role_name(u, cql3::preserve_role_case::yes), ifExists); }
+      { $stmt = std::make_unique<drop_role_statement>(cql3::role_name(u, cql3::preserve_role_case::yes), ifExists); }
    ;

 /**
 * LIST USERS
 */
-listUsersStatement returns [::shared_ptr<list_users_statement> stmt]
-    : K_LIST K_USERS { $stmt = ::make_shared<list_users_statement>(); }
+listUsersStatement returns [std::unique_ptr<list_users_statement> stmt]
+    : K_LIST K_USERS { $stmt = std::make_unique<list_users_statement>(); }
    ;

 /**
 * CREATE ROLE [IF NOT EXISTS] <role_name> [WITH <roleOption> [AND <roleOption>]*]
 */
-createRoleStatement returns [::shared_ptr<create_role_statement> stmt]
+createRoleStatement returns [std::unique_ptr<create_role_statement> stmt]
    @init {
        cql3::role_options opts;
        opts.is_superuser = false;
@@ -1186,36 +1186,36 @@ createRoleStatement returns [::shared_ptr<create_role_statement> stmt]
    }
    : K_CREATE K_ROLE (K_IF K_NOT K_EXISTS { if_not_exists = true; })? name=userOrRoleName
      (K_WITH roleOptions[opts])?
-      { $stmt = ::make_shared<create_role_statement>(name, std::move(opts), if_not_exists); }
+      { $stmt = std::make_unique<create_role_statement>(name, std::move(opts), if_not_exists); }
    ;

 /**
 * ALTER ROLE <rolename> [WITH <roleOption> [AND <roleOption>]*]
 */
-alterRoleStatement returns [::shared_ptr<alter_role_statement> stmt]
+alterRoleStatement returns [std::unique_ptr<alter_role_statement> stmt]
    @init {
        cql3::role_options opts;
    }
    : K_ALTER K_ROLE name=userOrRoleName
      (K_WITH roleOptions[opts])?
-      { $stmt = ::make_shared<alter_role_statement>(name, std::move(opts)); }
+      { $stmt = std::make_unique<alter_role_statement>(name, std::move(opts)); }
    ;

 /**
 * DROP ROLE [IF EXISTS] <rolename>
 */
-dropRoleStatement returns [::shared_ptr<drop_role_statement> stmt]
+dropRoleStatement returns [std::unique_ptr<drop_role_statement> stmt]
    @init {
        bool if_exists = false;
    }
    : K_DROP K_ROLE (K_IF K_EXISTS { if_exists = true; })? name=userOrRoleName
-      { $stmt = ::make_shared<drop_role_statement>(name, if_exists); }
+      { $stmt = std::make_unique<drop_role_statement>(name, if_exists); }
    ;

 /**
 * LIST ROLES [OF <rolename>] [NORECURSIVE]
 */
-listRolesStatement returns [::shared_ptr<list_roles_statement> stmt]
+listRolesStatement returns [std::unique_ptr<list_roles_statement> stmt]
    @init {
        bool recursive = true;
        std::optional<cql3::role_name> grantee;
@@ -1223,7 +1223,7 @@ listRolesStatement returns [::shared_ptr<list_roles_statement> stmt]
    : K_LIST K_ROLES
        (K_OF g=userOrRoleName { grantee = std::move(g); })?
        (K_NORECURSIVE { recursive = false; })?
-        { $stmt = ::make_shared<list_roles_statement>(grantee, recursive); }
+        { $stmt = std::make_unique<list_roles_statement>(grantee, recursive); }
    ;

 roleOptions[cql3::role_options& opts]
@@ -1258,17 +1258,17 @@ ident returns [shared_ptr<cql3::column_identifier> id]
 // Keyspace & Column family names
 keyspaceName returns [sstring id]
    @init { auto name = make_shared<cql3::cf_name>(); }
-    : ksName[name] { $id = name->get_keyspace(); }
+    : ksName[*name] { $id = name->get_keyspace(); }
    ;

 indexName returns [::shared_ptr<cql3::index_name> name]
    @init { $name = ::make_shared<cql3::index_name>(); }
-    : (ksName[name] '.')? idxName[name]
+    : (ksName[*name] '.')? idxName[*name]
    ;

 columnFamilyName returns [::shared_ptr<cql3::cf_name> name]
    @init { $name = ::make_shared<cql3::cf_name>(); }
-    : (ksName[name] '.')? cfName[name]
+    : (ksName[*name] '.')? cfName[*name]
    ;

 userTypeName returns [uninitialized<cql3::ut_name> name]
@@ -1283,24 +1283,24 @@ userOrRoleName returns [uninitialized<cql3::role_name> name]
    | QMARK {add_recognition_error("Bind variables cannot be used for role names");}
    ;

-ksName[::shared_ptr<cql3::keyspace_element_name> name]
-    : t=IDENT              { $name->set_keyspace($t.text, false);}
-    | t=QUOTED_NAME        { $name->set_keyspace($t.text, true);}
-    | k=unreserved_keyword { $name->set_keyspace(k, false);}
+ksName[cql3::keyspace_element_name& name]
+    : t=IDENT              { $name.set_keyspace($t.text, false);}
+    | t=QUOTED_NAME        { $name.set_keyspace($t.text, true);}
+    | k=unreserved_keyword { $name.set_keyspace(k, false);}
    | QMARK {add_recognition_error("Bind variables cannot be used for keyspace names");}
    ;

-cfName[::shared_ptr<cql3::cf_name> name]
-    : t=IDENT              { $name->set_column_family($t.text, false); }
-    | t=QUOTED_NAME        { $name->set_column_family($t.text, true); }
-    | k=unreserved_keyword { $name->set_column_family(k, false); }
+cfName[cql3::cf_name& name]
+    : t=IDENT              { $name.set_column_family($t.text, false); }
+    | t=QUOTED_NAME        { $name.set_column_family($t.text, true); }
+    | k=unreserved_keyword { $name.set_column_family(k, false); }
    | QMARK {add_recognition_error("Bind variables cannot be used for table names");}
    ;

-idxName[::shared_ptr<cql3::index_name> name]
-    : t=IDENT              { $name->set_index($t.text, false); }
-    | t=QUOTED_NAME        { $name->set_index($t.text, true);}
-    | k=unreserved_keyword { $name->set_index(k, false); }
+idxName[cql3::index_name& name]
+    : t=IDENT              { $name.set_index($t.text, false); }
+    | t=QUOTED_NAME        { $name.set_index($t.text, true);}
+    | k=unreserved_keyword { $name.set_index(k, false); }
    | QMARK {add_recognition_error("Bind variables cannot be used for index names");}
    ;

@@ -1489,13 +1489,13 @@ columnCondition[conditions_type& conditions]
        )
    ;

-properties[::shared_ptr<cql3::statements::property_definitions> props]
+properties[cql3::statements::property_definitions& props]
    : property[props] (K_AND property[props])*
    ;

-property[::shared_ptr<cql3::statements::property_definitions> props]
-    : k=ident '=' simple=propertyValue { try { $props->add_property(k->to_string(), simple); } catch (exceptions::syntax_exception e) { add_recognition_error(e.what()); } }
-    | k=ident '=' map=mapLiteral { try { $props->add_property(k->to_string(), convert_property_map(map)); } catch (exceptions::syntax_exception e) { add_recognition_error(e.what()); } }
+property[cql3::statements::property_definitions& props]
+    : k=ident '=' simple=propertyValue { try { $props.add_property(k->to_string(), simple); } catch (exceptions::syntax_exception e) { add_recognition_error(e.what()); } }
+    | k=ident '=' map=mapLiteral { try { $props.add_property(k->to_string(), convert_property_map(map)); } catch (exceptions::syntax_exception e) { add_recognition_error(e.what()); } }
    ;

 propertyValue returns [sstring str]
--- a/cql3/abstract_marker.cc
+++ b/cql3/abstract_marker.cc
@@ -50,7 +50,7 @@

 namespace cql3 {

-abstract_marker::abstract_marker(int32_t bind_index, ::shared_ptr<column_specification>&& receiver)
+abstract_marker::abstract_marker(int32_t bind_index, lw_shared_ptr<column_specification>&& receiver)
    : _bind_index{bind_index}
    , _receiver{std::move(receiver)}
 { }
@@ -67,7 +67,7 @@ abstract_marker::raw::raw(int32_t bind_index)
    : _bind_index{bind_index}
 { }

-::shared_ptr<term> abstract_marker::raw::prepare(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) const
+::shared_ptr<term> abstract_marker::raw::prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const
 {
    if (receiver->type->is_collection()) {
        if (receiver->type->get_kind() == abstract_type::kind::list) {
@@ -87,7 +87,7 @@ abstract_marker::raw::raw(int32_t bind_index)
    return ::make_shared<constants::marker>(_bind_index, receiver);
 }

-assignment_testable::test_result abstract_marker::raw::test_assignment(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) const {
+assignment_testable::test_result abstract_marker::raw::test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const {
    return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
 }

@@ -99,13 +99,13 @@ abstract_marker::in_raw::in_raw(int32_t bind_index)
    : raw{bind_index}
 { }

-::shared_ptr<column_specification> abstract_marker::in_raw::make_in_receiver(::shared_ptr<column_specification> receiver) {
-    auto in_name = ::make_shared<column_identifier>(sstring("in(") + receiver->name->to_string() + sstring(")"), true);
-    return ::make_shared<column_specification>(receiver->ks_name, receiver->cf_name, in_name, list_type_impl::get_instance(receiver->type, false));
+lw_shared_ptr<column_specification> abstract_marker::in_raw::make_in_receiver(const column_specification& receiver) {
+    auto in_name = ::make_shared<column_identifier>(sstring("in(") + receiver.name->to_string() + sstring(")"), true);
+    return make_lw_shared<column_specification>(receiver.ks_name, receiver.cf_name, in_name, list_type_impl::get_instance(receiver.type, false));
 }

-::shared_ptr<term> abstract_marker::in_raw::prepare(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) const {
-    return ::make_shared<lists::marker>(_bind_index, make_in_receiver(receiver));
+::shared_ptr<term> abstract_marker::in_raw::prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const {
+    return ::make_shared<lists::marker>(_bind_index, make_in_receiver(*receiver));
 }

 }
--- a/cql3/abstract_marker.hh
+++ b/cql3/abstract_marker.hh
@@ -53,9 +53,9 @@ namespace cql3 {
 class abstract_marker : public non_terminal {
 protected:
    const int32_t _bind_index;
-    const ::shared_ptr<column_specification> _receiver;
+    const lw_shared_ptr<column_specification> _receiver;
 public:
-    abstract_marker(int32_t bind_index, ::shared_ptr<column_specification>&& receiver);
+    abstract_marker(int32_t bind_index, lw_shared_ptr<column_specification>&& receiver);

    virtual void collect_marker_specification(variable_specifications& bound_names) const override;

@@ -70,9 +70,9 @@ public:
    public:
        raw(int32_t bind_index);

-        virtual ::shared_ptr<term> prepare(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) const override;
+        virtual ::shared_ptr<term> prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override;

-        virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) const override;
+        virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override;

        virtual sstring to_string() const override;
    };
@@ -87,9 +87,9 @@ public:
    public:
        in_raw(int32_t bind_index);
    private:
-        static ::shared_ptr<column_specification> make_in_receiver(::shared_ptr<column_specification> receiver);
+        static lw_shared_ptr<column_specification> make_in_receiver(const column_specification& receiver);
    public:
-        virtual ::shared_ptr<term> prepare(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) const override;
+        virtual ::shared_ptr<term> prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override;
    };
 };

--- a/cql3/assignment_testable.hh
+++ b/cql3/assignment_testable.hh
@@ -70,7 +70,7 @@ public:
    // Test all elements of toTest for assignment. If all are exact match, return exact match. If any is not assignable,
    // return not assignable. Otherwise, return weakly assignable.
    template <typename AssignmentTestablePtrRange>
-    static test_result test_all(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver,
+    static test_result test_all(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver,
                AssignmentTestablePtrRange&& to_test) {
        test_result res = test_result::EXACT_MATCH;
        for (auto&& rt : to_test) {
@@ -99,7 +99,7 @@ public:
     * Most caller should just call the isAssignable() method on the result, though functions have a use for
     * testing "strong" equality to decide the most precise overload to pick when multiple could match.
     */
-    virtual test_result test_assignment(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) const = 0;
+    virtual test_result test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const = 0;

    // for error reporting
    virtual sstring assignment_testable_source_context() const = 0;
--- a/cql3/attributes.cc
+++ b/cql3/attributes.cc
@@ -135,12 +135,12 @@ std::unique_ptr<attributes> attributes::raw::prepare(database& db, const sstring
    return std::unique_ptr<attributes>{new attributes{std::move(ts), std::move(ttl)}};
 }

-::shared_ptr<column_specification> attributes::raw::timestamp_receiver(const sstring& ks_name, const sstring& cf_name) const {
-    return ::make_shared<column_specification>(ks_name, cf_name, ::make_shared<column_identifier>("[timestamp]", true), data_type_for<int64_t>());
+lw_shared_ptr<column_specification> attributes::raw::timestamp_receiver(const sstring& ks_name, const sstring& cf_name) const {
+    return make_lw_shared<column_specification>(ks_name, cf_name, ::make_shared<column_identifier>("[timestamp]", true), data_type_for<int64_t>());
 }

-::shared_ptr<column_specification> attributes::raw::time_to_live_receiver(const sstring& ks_name, const sstring& cf_name) const {
-    return ::make_shared<column_specification>(ks_name, cf_name, ::make_shared<column_identifier>("[ttl]", true), data_type_for<int32_t>());
+lw_shared_ptr<column_specification> attributes::raw::time_to_live_receiver(const sstring& ks_name, const sstring& cf_name) const {
+    return make_lw_shared<column_specification>(ks_name, cf_name, ::make_shared<column_identifier>("[ttl]", true), data_type_for<int32_t>());
 }

 }
--- a/cql3/attributes.hh
+++ b/cql3/attributes.hh
@@ -78,9 +78,9 @@ public:

        std::unique_ptr<attributes> prepare(database& db, const sstring& ks_name, const sstring& cf_name) const;
    private:
-        ::shared_ptr<column_specification> timestamp_receiver(const sstring& ks_name, const sstring& cf_name) const;
+        lw_shared_ptr<column_specification> timestamp_receiver(const sstring& ks_name, const sstring& cf_name) const;

-        ::shared_ptr<column_specification> time_to_live_receiver(const sstring& ks_name, const sstring& cf_name) const;
+        lw_shared_ptr<column_specification> time_to_live_receiver(const sstring& ks_name, const sstring& cf_name) const;
    };
 };

--- a/cql3/column_condition.cc
+++ b/cql3/column_condition.cc
@@ -291,13 +291,13 @@ bool column_condition::applies_to(const data_value* cell_value, const query_opti
    }
 }

-::shared_ptr<column_condition>
+lw_shared_ptr<column_condition>
 column_condition::raw::prepare(database& db, const sstring& keyspace, const column_definition& receiver) const {
    if (receiver.type->is_counter()) {
        throw exceptions::invalid_request_exception("Conditions on counters are not supported");
    }
    shared_ptr<term> collection_element_term;
-    shared_ptr<column_specification> value_spec = receiver.column_specification;
+    lw_shared_ptr<column_specification> value_spec = receiver.column_specification;

    if (_collection_element) {
        if (!receiver.type->is_collection()) {
@@ -306,7 +306,7 @@ column_condition::raw::prepare(database& db, const sstring& keyspace, const colu
        }
        // Pass  a correct type specification to the collection_element->prepare(), so that it can
        // later be used to validate the parameter type is compatible with receiver type.
-        shared_ptr<column_specification> element_spec;
+        lw_shared_ptr<column_specification> element_spec;
        auto ctype = static_cast<const collection_type_impl*>(receiver.type.get());
        const column_specification& recv_column_spec = *receiver.column_specification;
        if (ctype->get_kind() == abstract_type::kind::list) {
--- a/cql3/column_condition.hh
+++ b/cql3/column_condition.hh
@@ -104,16 +104,16 @@ public:
     * "IF col = 'foo'"
     * "IF col LIKE <pattern>"
     */
-    static ::shared_ptr<column_condition> condition(const column_definition& def, ::shared_ptr<term> collection_element,
+    static lw_shared_ptr<column_condition> condition(const column_definition& def, ::shared_ptr<term> collection_element,
            ::shared_ptr<term> value, std::unique_ptr<like_matcher> matcher, const operator_type& op) {
-        return ::make_shared<column_condition>(def, std::move(collection_element), std::move(value),
+        return make_lw_shared<column_condition>(def, std::move(collection_element), std::move(value),
            std::vector<::shared_ptr<term>>{}, std::move(matcher), op);
    }

    // Helper constructor wrapper for  "IF col IN ... and IF col['key'] IN ... */
-    static ::shared_ptr<column_condition> in_condition(const column_definition& def, ::shared_ptr<term> collection_element,
+    static lw_shared_ptr<column_condition> in_condition(const column_definition& def, ::shared_ptr<term> collection_element,
            ::shared_ptr<term> in_marker, std::vector<::shared_ptr<term>> in_values) {
-        return ::make_shared<column_condition>(def, std::move(collection_element), std::move(in_marker),
+        return make_lw_shared<column_condition>(def, std::move(collection_element), std::move(in_marker),
            std::move(in_values), nullptr, operator_type::IN);
    }

@@ -146,9 +146,9 @@ public:
         * "IF col = 'foo'"
         * "IF col LIKE 'foo%'"
         */
-        static ::shared_ptr<raw> simple_condition(::shared_ptr<term::raw> value, ::shared_ptr<term::raw> collection_element,
+        static lw_shared_ptr<raw> simple_condition(::shared_ptr<term::raw> value, ::shared_ptr<term::raw> collection_element,
                const operator_type& op) {
-            return ::make_shared<raw>(std::move(value), std::vector<::shared_ptr<term::raw>>{},
+            return make_lw_shared<raw>(std::move(value), std::vector<::shared_ptr<term::raw>>{},
                    ::shared_ptr<abstract_marker::in_raw>{}, std::move(collection_element), op);
        }

@@ -160,13 +160,13 @@ public:
         * "IF col['key'] IN * ('foo', 'bar', ...)"
         * "IF col['key'] IN ?"
         */
-        static ::shared_ptr<raw> in_condition(::shared_ptr<term::raw> collection_element,
+        static lw_shared_ptr<raw> in_condition(::shared_ptr<term::raw> collection_element,
                ::shared_ptr<abstract_marker::in_raw> in_marker, std::vector<::shared_ptr<term::raw>> in_values) {
-            return ::make_shared<raw>(::shared_ptr<term::raw>{}, std::move(in_values), std::move(in_marker),
+            return make_lw_shared<raw>(::shared_ptr<term::raw>{}, std::move(in_values), std::move(in_marker),
                    std::move(collection_element), operator_type::IN);
        }

-        ::shared_ptr<column_condition> prepare(database& db, const sstring& keyspace, const column_definition& receiver) const;
+        lw_shared_ptr<column_condition> prepare(database& db, const sstring& keyspace, const column_definition& receiver) const;
    };
 };

--- a/cql3/column_specification.cc
+++ b/cql3/column_specification.cc
@@ -51,7 +51,7 @@ column_specification::column_specification(std::string_view ks_name_, std::strin
    { }


-bool column_specification::all_in_same_table(const std::vector<::shared_ptr<column_specification>>& names)
+bool column_specification::all_in_same_table(const std::vector<lw_shared_ptr<column_specification>>& names)
 {
    assert(!names.empty());

--- a/cql3/column_specification.hh
+++ b/cql3/column_specification.hh
@@ -45,7 +45,6 @@

 namespace cql3 {

-class column_specification;
 class column_identifier;

 class column_specification final {
@@ -63,15 +62,15 @@ public:
     * @param alias the column alias
     * @return a new <code>ColumnSpecification</code> for the same column but with the specified alias.
     */
-    ::shared_ptr<column_specification> with_alias(::shared_ptr<column_identifier> alias) {
-        return ::make_shared<column_specification>(ks_name, cf_name, alias, type);
+    lw_shared_ptr<column_specification> with_alias(::shared_ptr<column_identifier> alias) {
+        return make_lw_shared<column_specification>(ks_name, cf_name, alias, type);
    }
    
    bool is_reversed_type() const {
        return ::dynamic_pointer_cast<const reversed_type_impl>(type) != nullptr;
    }

-    static bool all_in_same_table(const std::vector<::shared_ptr<column_specification>>& names);
+    static bool all_in_same_table(const std::vector<lw_shared_ptr<column_specification>>& names);
 };

 }
--- a/cql3/constants.cc
+++ b/cql3/constants.cc
@@ -82,7 +82,7 @@ constants::literal::parsed_value(data_type validator) const
 }

 assignment_testable::test_result
-constants::literal::test_assignment(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) const
+constants::literal::test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const
 {
    auto receiver_type = receiver->type->as_cql3_type();
    if (receiver_type.is_collection() || receiver_type.is_user_type()) {
@@ -155,7 +155,7 @@ constants::literal::test_assignment(database& db, const sstring& keyspace, ::sha
 }

 ::shared_ptr<term>
-constants::literal::prepare(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) const
+constants::literal::prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const
 {
    if (!is_assignable(test_assignment(db, keyspace, receiver))) {
        throw exceptions::invalid_request_exception(format("Invalid {} constant ({}) for \"{}\" of type {}",
--- a/cql3/constants.hh
+++ b/cql3/constants.hh
@@ -87,7 +87,7 @@ public:
        };
    public:
        static thread_local const ::shared_ptr<terminal> NULL_VALUE;
-        virtual ::shared_ptr<term> prepare(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) const override {
+        virtual ::shared_ptr<term> prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override {
            if (!is_assignable(test_assignment(db, keyspace, receiver))) {
                throw exceptions::invalid_request_exception("Invalid null value for counter increment/decrement");
            }
@@ -96,7 +96,7 @@ public:

        virtual assignment_testable::test_result test_assignment(database& db,
            const sstring& keyspace,
-            ::shared_ptr<column_specification> receiver) const override {
+            lw_shared_ptr<column_specification> receiver) const override {
                return receiver->type->is_counter()
                    ? assignment_testable::test_result::NOT_ASSIGNABLE
                    : assignment_testable::test_result::WEAKLY_ASSIGNABLE;
@@ -153,7 +153,7 @@ public:
            return ::make_shared<literal>(type::DURATION, text);
        }

-        virtual ::shared_ptr<term> prepare(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) const override;
+        virtual ::shared_ptr<term> prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override;
    private:
        bytes parsed_value(data_type validator) const;
    public:
@@ -161,7 +161,7 @@ public:
            return _text;
        }

-        virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) const;
+        virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const;

        virtual sstring to_string() const override {
            return _type == type::STRING ? sstring(format("'{}'", _text)) : _text;
@@ -170,7 +170,7 @@ public:

    class marker : public abstract_marker {
    public:
-        marker(int32_t bind_index, ::shared_ptr<column_specification> receiver)
+        marker(int32_t bind_index, lw_shared_ptr<column_specification> receiver)
            : abstract_marker{bind_index, std::move(receiver)}
        {
            assert(!_receiver->type->is_collection() && !_receiver->type->is_user_type());
--- a/cql3/functions/aggregate_fcts.cc
+++ b/cql3/functions/aggregate_fcts.cc
@@ -267,10 +267,13 @@ public:
    }
 };

-/// The same as `impl_max_function_for' but without knowledge of `Type'.
+/// The same as `impl_max_function_for' but without compile-time dependency on `Type'.
 class impl_max_dynamic_function final : public aggregate_function::aggregate {
+    data_type _io_type;
    opt_bytes _max;
 public:
+    impl_max_dynamic_function(data_type io_type) : _io_type(std::move(io_type)) {}
+
    virtual void reset() override {
        _max = {};
    }
@@ -278,12 +281,11 @@ public:
        return _max.value_or(bytes{});
    }
    virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
-        if (!values[0]) {
+        if (values.empty() || !values[0]) {
            return;
        }
-        const auto val = *values[0];
-        if (!_max || *_max < val) {
-            _max = val;
+        if (!_max || _io_type->less(*_max, *values[0])) {
+            _max = values[0];
        }
    }
 };
@@ -298,10 +300,13 @@ public:
 };

 class max_dynamic_function final : public native_aggregate_function {
+    data_type _io_type;
 public:
-    max_dynamic_function(data_type io_type) : native_aggregate_function("max", io_type, { io_type }) {}
+    max_dynamic_function(data_type io_type)
+            : native_aggregate_function("max", io_type, { io_type })
+            , _io_type(std::move(io_type)) {}
    virtual std::unique_ptr<aggregate> new_aggregate() override {
-        return std::make_unique<impl_max_dynamic_function>();
+        return std::make_unique<impl_max_dynamic_function>(_io_type);
    }
 };

@@ -358,10 +363,13 @@ public:
    }
 };

-/// The same as `impl_min_function_for' but without knowledge of `Type'.
+/// The same as `impl_min_function_for' but without compile-time dependency on `Type'.
 class impl_min_dynamic_function final : public aggregate_function::aggregate {
+    data_type _io_type;
    opt_bytes _min;
 public:
+    impl_min_dynamic_function(data_type io_type) : _io_type(std::move(io_type)) {}
+
    virtual void reset() override {
        _min = {};
    }
@@ -369,12 +377,11 @@ public:
        return _min.value_or(bytes{});
    }
    virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
-        if (!values[0]) {
+        if (values.empty() || !values[0]) {
            return;
        }
-        const auto val = *values[0];
-        if (!_min || val < *_min) {
-            _min = val;
+        if (!_min || _io_type->less(*values[0], *_min)) {
+            _min = values[0];
        }
    }
 };
@@ -389,10 +396,13 @@ public:
 };

 class min_dynamic_function final : public native_aggregate_function {
+    data_type _io_type;
 public:
-    min_dynamic_function(data_type io_type) : native_aggregate_function("min", io_type, { io_type }) {}
+    min_dynamic_function(data_type io_type)
+            : native_aggregate_function("min", io_type, { io_type })
+            , _io_type(std::move(io_type)) {}
    virtual std::unique_ptr<aggregate> new_aggregate() override {
-        return std::make_unique<impl_min_dynamic_function>();
+        return std::make_unique<impl_min_dynamic_function>(_io_type);
    }
 };

--- a/cql3/functions/castas_fcts.cc
+++ b/cql3/functions/castas_fcts.cc
@@ -63,7 +63,7 @@ public:
 };

 shared_ptr<function> make_castas_function(data_type to_type, data_type from_type, castas_fctn func) {
-    return ::make_shared<castas_function_for>(std::move(to_type), std::move(from_type), std::move(func));
+    return ::make_shared<castas_function_for>(std::move(to_type), std::move(from_type), func);
 }

 } /* Anonymous Namespace */
@@ -73,88 +73,69 @@ shared_ptr<function> make_castas_function(data_type to_type, data_type from_type
 */
 namespace {

+static data_value identity_castas_fctn(data_value val) {
+    return val;
+}
+
 using bytes_opt = std::optional<bytes>;

 template<typename ToType, typename FromType>
-std::function<data_value(data_value)> make_castas_fctn_simple() {
-    return [](data_value from) -> data_value {
-        auto val_from = value_cast<FromType>(from);
-        return static_cast<ToType>(val_from);
-    };
+static data_value castas_fctn_simple(data_value from) {
+    auto val_from = value_cast<FromType>(from);
+    return static_cast<ToType>(val_from);
 }

 template<typename ToType>
-std::function<data_value(data_value)> make_castas_fctn_from_decimal_to_float() {
-    return [](data_value from) -> data_value {
-        auto val_from = value_cast<big_decimal>(from);
-        boost::multiprecision::cpp_int ten(10);
-        boost::multiprecision::cpp_rational r = val_from.unscaled_value();
-        r /= boost::multiprecision::pow(ten, val_from.scale());
-        return static_cast<ToType>(r);
-    };
+static data_value castas_fctn_from_decimal_to_float(data_value from) {
+    auto val_from = value_cast<big_decimal>(from);
+    return static_cast<ToType>(val_from.as_rational());
 }

 static utils::multiprecision_int from_decimal_to_cppint(const data_value& from) {
    const auto& val_from = value_cast<big_decimal>(from);
-    boost::multiprecision::cpp_int ten(10);
-    return boost::multiprecision::cpp_int(val_from.unscaled_value() / boost::multiprecision::pow(ten, val_from.scale()));
+    auto r = val_from.as_rational();
+    return utils::multiprecision_int(numerator(r)/denominator(r));
 }

 template<typename ToType>
-std::function<data_value(data_value)> make_castas_fctn_from_varint_to_integer() {
-    return [](data_value from) -> data_value {
-        const auto& varint = value_cast<utils::multiprecision_int>(from);
-        return static_cast<ToType>(from_varint_to_integer(varint));
-    };
+static data_value castas_fctn_from_varint_to_integer(data_value from) {
+    const auto& varint = value_cast<utils::multiprecision_int>(from);
+    return static_cast<ToType>(from_varint_to_integer(varint));
 }

 template<typename ToType>
-std::function<data_value(data_value)> make_castas_fctn_from_decimal_to_integer() {
-    return [](data_value from) -> data_value {
-        auto varint = from_decimal_to_cppint(from);
-        return static_cast<ToType>(from_varint_to_integer(varint));
-    };
+static data_value castas_fctn_from_decimal_to_integer(data_value from) {
+    auto varint = from_decimal_to_cppint(from);
+    return static_cast<ToType>(from_varint_to_integer(varint));
 }

-std::function<data_value(data_value)> make_castas_fctn_from_decimal_to_varint() {
-    return [](data_value from) -> data_value {
-        return from_decimal_to_cppint(from);
-    };
+static data_value castas_fctn_from_decimal_to_varint(data_value from) {
+    return from_decimal_to_cppint(from);
 }

 template<typename FromType>
-std::function<data_value(data_value)> make_castas_fctn_from_integer_to_decimal() {
-    return [](data_value from) -> data_value {
-        auto val_from = value_cast<FromType>(from);
-        return big_decimal(1, 10*static_cast<boost::multiprecision::cpp_int>(val_from));
-    };
+static data_value castas_fctn_from_integer_to_decimal(data_value from) {
+    auto val_from = value_cast<FromType>(from);
+    return big_decimal(1, 10*static_cast<boost::multiprecision::cpp_int>(val_from));
 }

 template<typename FromType>
-std::function<data_value(data_value)> make_castas_fctn_from_float_to_decimal() {
-    return [](data_value from) -> data_value {
-        auto val_from = value_cast<FromType>(from);
-        return big_decimal(boost::lexical_cast<std::string>(val_from));
-    };
+static data_value castas_fctn_from_float_to_decimal(data_value from) {
+    auto val_from = value_cast<FromType>(from);
+    return big_decimal(boost::lexical_cast<std::string>(val_from));
 }

 template<typename FromType>
-std::function<data_value(data_value)> make_castas_fctn_to_string() {
-    return [](data_value from) -> data_value {
-        return to_sstring(value_cast<FromType>(from));
-    };
+static data_value castas_fctn_to_string(data_value from) {
+    return to_sstring(value_cast<FromType>(from));
 }

-std::function<data_value(data_value)> make_castas_fctn_from_varint_to_string() {
-    return [](data_value from) -> data_value {
-        return to_sstring(value_cast<utils::multiprecision_int>(from).str());
-    };
+static data_value castas_fctn_from_varint_to_string(data_value from) {
+    return to_sstring(value_cast<utils::multiprecision_int>(from).str());
 }

-std::function<data_value(data_value)> make_castas_fctn_from_decimal_to_string() {
-    return [](data_value from) -> data_value {
-        return value_cast<big_decimal>(from).to_string();
-    };
+static data_value castas_fctn_from_decimal_to_string(data_value from) {
+    return value_cast<big_decimal>(from).to_string();
 }

 db_clock::time_point millis_to_time_point(const int64_t millis) {
@@ -177,178 +158,237 @@ db_clock::time_point date_to_time_point(const uint32_t date) {
    return db_clock::time_point(std::chrono::duration_cast<db_clock::duration>(millis));
 }

-std::function<data_value(data_value)> make_castas_fctn_from_timestamp_to_date() {
-    return [](data_value from) -> data_value {
-        const auto val_from = value_cast<db_clock::time_point>(from);
-        return time_point_to_date(val_from);
-    };
+static data_value castas_fctn_from_timestamp_to_date(data_value from) {
+    const auto val_from = value_cast<db_clock::time_point>(from);
+    return time_point_to_date(val_from);
 }

-std::function<data_value(data_value)> make_castas_fctn_from_date_to_timestamp() {
-    return [](data_value from) -> data_value {
-        const auto val_from = value_cast<uint32_t>(from);
-        return date_to_time_point(val_from);
-    };
+static data_value castas_fctn_from_date_to_timestamp(data_value from) {
+    const auto val_from = value_cast<uint32_t>(from);
+    return date_to_time_point(val_from);
 }

-std::function<data_value(data_value)> make_castas_fctn_from_timeuuid_to_timestamp() {
-    return [](data_value from) -> data_value {
-        const auto val_from = value_cast<utils::UUID>(from);
-        return db_clock::time_point{db_clock::duration{utils::UUID_gen::unix_timestamp(val_from)}};
-    };
+static data_value castas_fctn_from_timeuuid_to_timestamp(data_value from) {
+    const auto val_from = value_cast<utils::UUID>(from);
+    return db_clock::time_point{db_clock::duration{utils::UUID_gen::unix_timestamp(val_from)}};
 }

-std::function<data_value(data_value)> make_castas_fctn_from_timeuuid_to_date() {
-    return [](data_value from) -> data_value {
-        const auto val_from = value_cast<utils::UUID>(from);
-        return time_point_to_date(millis_to_time_point(utils::UUID_gen::unix_timestamp(val_from)));
-    };
+static data_value castas_fctn_from_timeuuid_to_date(data_value from) {
+    const auto val_from = value_cast<utils::UUID>(from);
+    return time_point_to_date(millis_to_time_point(utils::UUID_gen::unix_timestamp(val_from)));
 }

-static std::function<data_value(data_value)> make_castas_fctn_from_dv_to_string() {
-    return [](data_value from) -> data_value {
-        return from.type()->to_string_impl(from);
-    };
+static data_value castas_fctn_from_dv_to_string(data_value from) {
+    return from.type()->to_string_impl(from);
 }

 // FIXME: Add conversions for counters, after they are fully implemented...

-// Map <ToType, FromType> -> castas_fctn
-using castas_fctn_key = std::pair<data_type, data_type>;
-struct castas_fctn_hash {
-    std::size_t operator()(const castas_fctn_key& x) const noexcept {
-        return boost::hash_value(x);
+static constexpr unsigned next_power_of_2(unsigned val) {
+    unsigned ret = 1;
+    while (ret <= val) {
+        ret *= 2;
    }
-};
-using castas_fctns_map = std::unordered_map<castas_fctn_key, castas_fctn, castas_fctn_hash>;
-
-// List of supported castas functions...
-thread_local castas_fctns_map castas_fctns {
-    { {byte_type, byte_type}, make_castas_fctn_simple<int8_t, int8_t>() },
-    { {byte_type, short_type}, make_castas_fctn_simple<int8_t, int16_t>() },
-    { {byte_type, int32_type}, make_castas_fctn_simple<int8_t, int32_t>() },
-    { {byte_type, long_type}, make_castas_fctn_simple<int8_t, int64_t>() },
-    { {byte_type, float_type}, make_castas_fctn_simple<int8_t, float>() },
-    { {byte_type, double_type}, make_castas_fctn_simple<int8_t, double>() },
-    { {byte_type, varint_type}, make_castas_fctn_from_varint_to_integer<int8_t>() },
-    { {byte_type, decimal_type}, make_castas_fctn_from_decimal_to_integer<int8_t>() },
-
-    { {short_type, byte_type}, make_castas_fctn_simple<int16_t, int8_t>() },
-    { {short_type, short_type}, make_castas_fctn_simple<int16_t, int16_t>() },
-    { {short_type, int32_type}, make_castas_fctn_simple<int16_t, int32_t>() },
-    { {short_type, long_type}, make_castas_fctn_simple<int16_t, int64_t>() },
-    { {short_type, float_type}, make_castas_fctn_simple<int16_t, float>() },
-    { {short_type, double_type}, make_castas_fctn_simple<int16_t, double>() },
-    { {short_type, varint_type}, make_castas_fctn_from_varint_to_integer<int16_t>() },
-    { {short_type, decimal_type}, make_castas_fctn_from_decimal_to_integer<int16_t>() },
-
-    { {int32_type, byte_type}, make_castas_fctn_simple<int32_t, int8_t>() },
-    { {int32_type, short_type}, make_castas_fctn_simple<int32_t, int16_t>() },
-    { {int32_type, int32_type}, make_castas_fctn_simple<int32_t, int32_t>() },
-    { {int32_type, long_type}, make_castas_fctn_simple<int32_t, int64_t>() },
-    { {int32_type, float_type}, make_castas_fctn_simple<int32_t, float>() },
-    { {int32_type, double_type}, make_castas_fctn_simple<int32_t, double>() },
-    { {int32_type, varint_type}, make_castas_fctn_from_varint_to_integer<int32_t>() },
-    { {int32_type, decimal_type}, make_castas_fctn_from_decimal_to_integer<int32_t>() },
-
-    { {long_type, byte_type}, make_castas_fctn_simple<int64_t, int8_t>() },
-    { {long_type, short_type}, make_castas_fctn_simple<int64_t, int16_t>() },
-    { {long_type, int32_type}, make_castas_fctn_simple<int64_t, int32_t>() },
-    { {long_type, long_type}, make_castas_fctn_simple<int64_t, int64_t>() },
-    { {long_type, float_type}, make_castas_fctn_simple<int64_t, float>() },
-    { {long_type, double_type}, make_castas_fctn_simple<int64_t, double>() },
-    { {long_type, varint_type}, make_castas_fctn_from_varint_to_integer<int64_t>() },
-    { {long_type, decimal_type}, make_castas_fctn_from_decimal_to_integer<int64_t>() },
-
-    { {float_type, byte_type}, make_castas_fctn_simple<float, int8_t>() },
-    { {float_type, short_type}, make_castas_fctn_simple<float, int16_t>() },
-    { {float_type, int32_type}, make_castas_fctn_simple<float, int32_t>() },
-    { {float_type, long_type}, make_castas_fctn_simple<float, int64_t>() },
-    { {float_type, float_type}, make_castas_fctn_simple<float, float>() },
-    { {float_type, double_type}, make_castas_fctn_simple<float, double>() },
-    { {float_type, varint_type}, make_castas_fctn_simple<float, utils::multiprecision_int>() },
-    { {float_type, decimal_type}, make_castas_fctn_from_decimal_to_float<float>() },
-
-    { {double_type, byte_type}, make_castas_fctn_simple<double, int8_t>() },
-    { {double_type, short_type}, make_castas_fctn_simple<double, int16_t>() },
-    { {double_type, int32_type}, make_castas_fctn_simple<double, int32_t>() },
-    { {double_type, long_type}, make_castas_fctn_simple<double, int64_t>() },
-    { {double_type, float_type}, make_castas_fctn_simple<double, float>() },
-    { {double_type, double_type}, make_castas_fctn_simple<double, double>() },
-    { {double_type, varint_type}, make_castas_fctn_simple<double, utils::multiprecision_int>() },
-    { {double_type, decimal_type}, make_castas_fctn_from_decimal_to_float<double>() },
-
-    { {varint_type, byte_type}, make_castas_fctn_simple<utils::multiprecision_int, int8_t>() },
-    { {varint_type, short_type}, make_castas_fctn_simple<utils::multiprecision_int, int16_t>() },
-    { {varint_type, int32_type}, make_castas_fctn_simple<utils::multiprecision_int, int32_t>() },
-    { {varint_type, long_type}, make_castas_fctn_simple<utils::multiprecision_int, int64_t>() },
-    { {varint_type, float_type}, make_castas_fctn_simple<utils::multiprecision_int, float>() },
-    { {varint_type, double_type}, make_castas_fctn_simple<utils::multiprecision_int, double>() },
-    { {varint_type, varint_type}, make_castas_fctn_simple<utils::multiprecision_int, utils::multiprecision_int>() },
-    { {varint_type, decimal_type}, make_castas_fctn_from_decimal_to_varint() },
-
-    { {decimal_type, byte_type}, make_castas_fctn_from_integer_to_decimal<int8_t>() },
-    { {decimal_type, short_type}, make_castas_fctn_from_integer_to_decimal<int16_t>() },
-    { {decimal_type, int32_type}, make_castas_fctn_from_integer_to_decimal<int32_t>() },
-    { {decimal_type, long_type}, make_castas_fctn_from_integer_to_decimal<int64_t>() },
-    { {decimal_type, float_type}, make_castas_fctn_from_float_to_decimal<float>() },
-    { {decimal_type, double_type}, make_castas_fctn_from_float_to_decimal<double>() },
-    { {decimal_type, varint_type}, make_castas_fctn_from_integer_to_decimal<utils::multiprecision_int>() },
-    { {decimal_type, decimal_type}, make_castas_fctn_simple<big_decimal, big_decimal>() },
-
-    { {ascii_type, byte_type}, make_castas_fctn_to_string<int8_t>() },
-    { {ascii_type, short_type}, make_castas_fctn_to_string<int16_t>() },
-    { {ascii_type, int32_type}, make_castas_fctn_to_string<int32_t>() },
-    { {ascii_type, long_type}, make_castas_fctn_to_string<int64_t>() },
-    { {ascii_type, float_type}, make_castas_fctn_to_string<float>() },
-    { {ascii_type, double_type}, make_castas_fctn_to_string<double>() },
-    { {ascii_type, varint_type}, make_castas_fctn_from_varint_to_string() },
-    { {ascii_type, decimal_type}, make_castas_fctn_from_decimal_to_string() },
-
-    { {utf8_type, byte_type}, make_castas_fctn_to_string<int8_t>() },
-    { {utf8_type, short_type}, make_castas_fctn_to_string<int16_t>() },
-    { {utf8_type, int32_type}, make_castas_fctn_to_string<int32_t>() },
-    { {utf8_type, long_type}, make_castas_fctn_to_string<int64_t>() },
-    { {utf8_type, float_type}, make_castas_fctn_to_string<float>() },
-    { {utf8_type, double_type}, make_castas_fctn_to_string<double>() },
-    { {utf8_type, varint_type}, make_castas_fctn_from_varint_to_string() },
-    { {utf8_type, decimal_type}, make_castas_fctn_from_decimal_to_string() },
-
-    { {simple_date_type, timestamp_type}, make_castas_fctn_from_timestamp_to_date() },
-    { {simple_date_type, timeuuid_type}, make_castas_fctn_from_timeuuid_to_date() },
-
-    { {timestamp_type, simple_date_type}, make_castas_fctn_from_date_to_timestamp() },
-    { {timestamp_type, timeuuid_type}, make_castas_fctn_from_timeuuid_to_timestamp() },
-
-    { {ascii_type, timestamp_type}, make_castas_fctn_from_dv_to_string() },
-    { {ascii_type, simple_date_type}, make_castas_fctn_from_dv_to_string() },
-    { {ascii_type, time_type}, make_castas_fctn_from_dv_to_string() },
-    { {ascii_type, timeuuid_type}, make_castas_fctn_from_dv_to_string() },
-    { {ascii_type, uuid_type}, make_castas_fctn_from_dv_to_string() },
-    { {ascii_type, boolean_type}, make_castas_fctn_from_dv_to_string() },
-    { {ascii_type, inet_addr_type}, make_castas_fctn_from_dv_to_string() },
-    { {ascii_type, ascii_type}, make_castas_fctn_simple<sstring, sstring>() },
-
-    { {utf8_type, timestamp_type}, make_castas_fctn_from_dv_to_string() },
-    { {utf8_type, simple_date_type}, make_castas_fctn_from_dv_to_string() },
-    { {utf8_type, time_type}, make_castas_fctn_from_dv_to_string() },
-    { {utf8_type, timeuuid_type}, make_castas_fctn_from_dv_to_string() },
-    { {utf8_type, uuid_type}, make_castas_fctn_from_dv_to_string() },
-    { {utf8_type, boolean_type}, make_castas_fctn_from_dv_to_string() },
-    { {utf8_type, inet_addr_type}, make_castas_fctn_from_dv_to_string() },
-    { {utf8_type, ascii_type}, make_castas_fctn_simple<sstring, sstring>() },
-    { {utf8_type, utf8_type}, make_castas_fctn_simple<sstring, sstring>() },
-};
+    return ret;
+}

+static constexpr unsigned next_kind_power_of_2 = next_power_of_2(static_cast<unsigned>(abstract_type::kind::last));
+static constexpr unsigned cast_switch_case_val(abstract_type::kind A, abstract_type::kind B) {
+    return static_cast<unsigned>(A) * next_kind_power_of_2 + static_cast<unsigned>(B);
+}
 } /* Anonymous Namespace */

 castas_fctn get_castas_fctn(data_type to_type, data_type from_type) {
-    auto it_candidate = castas_fctns.find(castas_fctn_key{to_type, from_type});
-    if (it_candidate == castas_fctns.end()) {
-        throw exceptions::invalid_request_exception(format("{} cannot be cast to {}", from_type->name(), to_type->name()));
+    if (from_type == to_type) {
+        // Casting any type to itself doesn't make sense, but it is
+        // harmless so allow it instead of reporting a confusing error
+        // message about TypeX not being castable to TypeX.
+        return identity_castas_fctn;
    }

-    return it_candidate->second;
+    using kind = abstract_type::kind;
+    switch(cast_switch_case_val(to_type->get_kind(), from_type->get_kind())) {
+    case cast_switch_case_val(kind::byte, kind::short_kind):
+        return castas_fctn_simple<int8_t, int16_t>;
+    case cast_switch_case_val(kind::byte, kind::int32):
+        return castas_fctn_simple<int8_t, int32_t>;
+    case cast_switch_case_val(kind::byte, kind::long_kind):
+        return castas_fctn_simple<int8_t, int64_t>;
+    case cast_switch_case_val(kind::byte, kind::float_kind):
+        return castas_fctn_simple<int8_t, float>;
+    case cast_switch_case_val(kind::byte, kind::double_kind):
+        return castas_fctn_simple<int8_t, double>;
+    case cast_switch_case_val(kind::byte, kind::varint):
+        return castas_fctn_from_varint_to_integer<int8_t>;
+    case cast_switch_case_val(kind::byte, kind::decimal):
+        return castas_fctn_from_decimal_to_integer<int8_t>;
+
+    case cast_switch_case_val(kind::short_kind, kind::byte):
+        return castas_fctn_simple<int16_t, int8_t>;
+    case cast_switch_case_val(kind::short_kind, kind::int32):
+        return castas_fctn_simple<int16_t, int32_t>;
+    case cast_switch_case_val(kind::short_kind, kind::long_kind):
+        return castas_fctn_simple<int16_t, int64_t>;
+    case cast_switch_case_val(kind::short_kind, kind::float_kind):
+        return castas_fctn_simple<int16_t, float>;
+    case cast_switch_case_val(kind::short_kind, kind::double_kind):
+        return castas_fctn_simple<int16_t, double>;
+    case cast_switch_case_val(kind::short_kind, kind::varint):
+        return castas_fctn_from_varint_to_integer<int16_t>;
+    case cast_switch_case_val(kind::short_kind, kind::decimal):
+        return castas_fctn_from_decimal_to_integer<int16_t>;
+
+    case cast_switch_case_val(kind::int32, kind::byte):
+        return castas_fctn_simple<int32_t, int8_t>;
+    case cast_switch_case_val(kind::int32, kind::short_kind):
+        return castas_fctn_simple<int32_t, int16_t>;
+    case cast_switch_case_val(kind::int32, kind::long_kind):
+        return castas_fctn_simple<int32_t, int64_t>;
+    case cast_switch_case_val(kind::int32, kind::float_kind):
+        return castas_fctn_simple<int32_t, float>;
+    case cast_switch_case_val(kind::int32, kind::double_kind):
+        return castas_fctn_simple<int32_t, double>;
+    case cast_switch_case_val(kind::int32, kind::varint):
+        return castas_fctn_from_varint_to_integer<int32_t>;
+    case cast_switch_case_val(kind::int32, kind::decimal):
+        return castas_fctn_from_decimal_to_integer<int32_t>;
+
+    case cast_switch_case_val(kind::long_kind, kind::byte):
+        return castas_fctn_simple<int64_t, int8_t>;
+    case cast_switch_case_val(kind::long_kind, kind::short_kind):
+        return castas_fctn_simple<int64_t, int16_t>;
+    case cast_switch_case_val(kind::long_kind, kind::int32):
+        return castas_fctn_simple<int64_t, int32_t>;
+    case cast_switch_case_val(kind::long_kind, kind::float_kind):
+        return castas_fctn_simple<int64_t, float>;
+    case cast_switch_case_val(kind::long_kind, kind::double_kind):
+        return castas_fctn_simple<int64_t, double>;
+    case cast_switch_case_val(kind::long_kind, kind::varint):
+        return castas_fctn_from_varint_to_integer<int64_t>;
+    case cast_switch_case_val(kind::long_kind, kind::decimal):
+        return castas_fctn_from_decimal_to_integer<int64_t>;
+
+    case cast_switch_case_val(kind::float_kind, kind::byte):
+        return castas_fctn_simple<float, int8_t>;
+    case cast_switch_case_val(kind::float_kind, kind::short_kind):
+        return castas_fctn_simple<float, int16_t>;
+    case cast_switch_case_val(kind::float_kind, kind::int32):
+        return castas_fctn_simple<float, int32_t>;
+    case cast_switch_case_val(kind::float_kind, kind::long_kind):
+        return castas_fctn_simple<float, int64_t>;
+    case cast_switch_case_val(kind::float_kind, kind::double_kind):
+        return castas_fctn_simple<float, double>;
+    case cast_switch_case_val(kind::float_kind, kind::varint):
+        return castas_fctn_simple<float, utils::multiprecision_int>;
+    case cast_switch_case_val(kind::float_kind, kind::decimal):
+        return castas_fctn_from_decimal_to_float<float>;
+
+    case cast_switch_case_val(kind::double_kind, kind::byte):
+        return castas_fctn_simple<double, int8_t>;
+    case cast_switch_case_val(kind::double_kind, kind::short_kind):
+        return castas_fctn_simple<double, int16_t>;
+    case cast_switch_case_val(kind::double_kind, kind::int32):
+        return castas_fctn_simple<double, int32_t>;
+    case cast_switch_case_val(kind::double_kind, kind::long_kind):
+        return castas_fctn_simple<double, int64_t>;
+    case cast_switch_case_val(kind::double_kind, kind::float_kind):
+        return castas_fctn_simple<double, float>;
+    case cast_switch_case_val(kind::double_kind, kind::varint):
+        return castas_fctn_simple<double, utils::multiprecision_int>;
+    case cast_switch_case_val(kind::double_kind, kind::decimal):
+        return castas_fctn_from_decimal_to_float<double>;
+
+    case cast_switch_case_val(kind::varint, kind::byte):
+        return castas_fctn_simple<utils::multiprecision_int, int8_t>;
+    case cast_switch_case_val(kind::varint, kind::short_kind):
+        return castas_fctn_simple<utils::multiprecision_int, int16_t>;
+    case cast_switch_case_val(kind::varint, kind::int32):
+        return castas_fctn_simple<utils::multiprecision_int, int32_t>;
+    case cast_switch_case_val(kind::varint, kind::long_kind):
+        return castas_fctn_simple<utils::multiprecision_int, int64_t>;
+    case cast_switch_case_val(kind::varint, kind::float_kind):
+        return castas_fctn_simple<utils::multiprecision_int, float>;
+    case cast_switch_case_val(kind::varint, kind::double_kind):
+        return castas_fctn_simple<utils::multiprecision_int, double>;
+    case cast_switch_case_val(kind::varint, kind::decimal):
+        return castas_fctn_from_decimal_to_varint;
+
+    case cast_switch_case_val(kind::decimal, kind::byte):
+        return castas_fctn_from_integer_to_decimal<int8_t>;
+    case cast_switch_case_val(kind::decimal, kind::short_kind):
+        return castas_fctn_from_integer_to_decimal<int16_t>;
+    case cast_switch_case_val(kind::decimal, kind::int32):
+        return castas_fctn_from_integer_to_decimal<int32_t>;
+    case cast_switch_case_val(kind::decimal, kind::long_kind):
+        return castas_fctn_from_integer_to_decimal<int64_t>;
+    case cast_switch_case_val(kind::decimal, kind::float_kind):
+        return castas_fctn_from_float_to_decimal<float>;
+    case cast_switch_case_val(kind::decimal, kind::double_kind):
+        return castas_fctn_from_float_to_decimal<double>;
+    case cast_switch_case_val(kind::decimal, kind::varint):
+        return castas_fctn_from_integer_to_decimal<utils::multiprecision_int>;
+
+    case cast_switch_case_val(kind::ascii, kind::byte):
+    case cast_switch_case_val(kind::utf8, kind::byte):
+        return castas_fctn_to_string<int8_t>;
+
+    case cast_switch_case_val(kind::ascii, kind::short_kind):
+    case cast_switch_case_val(kind::utf8, kind::short_kind):
+        return castas_fctn_to_string<int16_t>;
+
+    case cast_switch_case_val(kind::ascii, kind::int32):
+    case cast_switch_case_val(kind::utf8, kind::int32):
+        return castas_fctn_to_string<int32_t>;
+
+    case cast_switch_case_val(kind::ascii, kind::long_kind):
+    case cast_switch_case_val(kind::utf8, kind::long_kind):
+        return castas_fctn_to_string<int64_t>;
+
+    case cast_switch_case_val(kind::ascii, kind::float_kind):
+    case cast_switch_case_val(kind::utf8, kind::float_kind):
+        return castas_fctn_to_string<float>;
+
+    case cast_switch_case_val(kind::ascii, kind::double_kind):
+    case cast_switch_case_val(kind::utf8, kind::double_kind):
+        return castas_fctn_to_string<double>;
+
+    case cast_switch_case_val(kind::ascii, kind::varint):
+    case cast_switch_case_val(kind::utf8, kind::varint):
+        return castas_fctn_from_varint_to_string;
+
+    case cast_switch_case_val(kind::ascii, kind::decimal):
+    case cast_switch_case_val(kind::utf8, kind::decimal):
+        return castas_fctn_from_decimal_to_string;
+
+    case cast_switch_case_val(kind::simple_date, kind::timestamp):
+        return castas_fctn_from_timestamp_to_date;
+    case cast_switch_case_val(kind::simple_date, kind::timeuuid):
+        return castas_fctn_from_timeuuid_to_date;
+
+    case cast_switch_case_val(kind::timestamp, kind::simple_date):
+        return castas_fctn_from_date_to_timestamp;
+    case cast_switch_case_val(kind::timestamp, kind::timeuuid):
+        return castas_fctn_from_timeuuid_to_timestamp;
+
+    case cast_switch_case_val(kind::ascii, kind::timestamp):
+    case cast_switch_case_val(kind::ascii, kind::simple_date):
+    case cast_switch_case_val(kind::ascii, kind::time):
+    case cast_switch_case_val(kind::ascii, kind::timeuuid):
+    case cast_switch_case_val(kind::ascii, kind::uuid):
+    case cast_switch_case_val(kind::ascii, kind::boolean):
+    case cast_switch_case_val(kind::ascii, kind::inet):
+    case cast_switch_case_val(kind::utf8, kind::timestamp):
+    case cast_switch_case_val(kind::utf8, kind::simple_date):
+    case cast_switch_case_val(kind::utf8, kind::time):
+    case cast_switch_case_val(kind::utf8, kind::timeuuid):
+    case cast_switch_case_val(kind::utf8, kind::uuid):
+    case cast_switch_case_val(kind::utf8, kind::boolean):
+    case cast_switch_case_val(kind::utf8, kind::inet):
+        return castas_fctn_from_dv_to_string;
+    case cast_switch_case_val(kind::utf8, kind::ascii):
+        return castas_fctn_simple<sstring, sstring>;
+    }
+    throw exceptions::invalid_request_exception(format("{} cannot be cast to {}", from_type->name(), to_type->name()));
 }

 shared_ptr<function> castas_functions::get(data_type to_type, const std::vector<shared_ptr<cql3::selection::selector>>& provided_args) {
--- a/cql3/functions/castas_fcts.hh
+++ b/cql3/functions/castas_fcts.hh
@@ -58,7 +58,7 @@ namespace functions {
 * Support for CAST(. AS .) functions.
 */

-using castas_fctn = std::function<data_value(data_value)>;
+using castas_fctn = data_value(*)(data_value);

 castas_fctn get_castas_fctn(data_type to_type, data_type from_type);

--- a/cql3/functions/function_call.hh
+++ b/cql3/functions/function_call.hh
@@ -74,12 +74,12 @@ public:
        raw(function_name name, std::vector<shared_ptr<term::raw>> terms)
            : _name(std::move(name)), _terms(std::move(terms)) {
        }
-        virtual ::shared_ptr<term> prepare(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) const override;
+        virtual ::shared_ptr<term> prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override;
    private:
        // All parameters must be terminal
        static bytes_opt execute(scalar_function& fun, std::vector<shared_ptr<term>> parameters);
    public:
-        virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, shared_ptr<column_specification> receiver) const override;
+        virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override;
        virtual sstring to_string() const override;
    };
 };
--- a/cql3/functions/functions.cc
+++ b/cql3/functions/functions.cc
@@ -141,12 +141,12 @@ void functions::remove_function(const function_name& name, const std::vector<dat
    with_udf_iter(name, arg_types, [] (functions::declared_t::iterator i) { _declared.erase(i); });
 }

-shared_ptr<column_specification>
+lw_shared_ptr<column_specification>
 functions::make_arg_spec(const sstring& receiver_ks, const sstring& receiver_cf,
        const function& fun, size_t i) {
    auto&& name = boost::lexical_cast<std::string>(fun.name());
    std::transform(name.begin(), name.end(), name.begin(), ::tolower);
-    return ::make_shared<column_specification>(receiver_ks,
+    return make_lw_shared<column_specification>(receiver_ks,
                                   receiver_cf,
                                   ::make_shared<column_identifier>(format("arg{:d}({})", i, name), true),
                                   fun.arg_types()[i]);
@@ -187,7 +187,7 @@ functions::get(database& db,
        const std::vector<shared_ptr<assignment_testable>>& provided_args,
        const sstring& receiver_ks,
        const sstring& receiver_cf,
-        shared_ptr<column_specification> receiver) {
+        lw_shared_ptr<column_specification> receiver) {

    static const function_name TOKEN_FUNCTION_NAME = function_name::native_function("token");
    static const function_name TO_JSON_FUNCTION_NAME = function_name::native_function("tojson");
@@ -507,7 +507,7 @@ function_call::make_terminal(shared_ptr<function> fun, cql3::raw_value result, c
 }

 ::shared_ptr<term>
-function_call::raw::prepare(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) const {
+function_call::raw::prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const {
    std::vector<shared_ptr<assignment_testable>> args;
    args.reserve(_terms.size());
    std::transform(_terms.begin(), _terms.end(), std::back_inserter(args),
@@ -572,7 +572,7 @@ function_call::raw::execute(scalar_function& fun, std::vector<shared_ptr<term>>
 }

 assignment_testable::test_result
-function_call::raw::test_assignment(database& db, const sstring& keyspace, shared_ptr<column_specification> receiver) const {
+function_call::raw::test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const {
    // Note: Functions.get() will return null if the function doesn't exist, or throw is no function matching
    // the arguments can be found. We may get one of those if an undefined/wrong function is used as argument
    // of another, existing, function. In that case, we return true here because we'll throw a proper exception
--- a/cql3/functions/functions.hh
+++ b/cql3/functions/functions.hh
@@ -67,7 +67,7 @@ class functions {
 private:
    static std::unordered_multimap<function_name, shared_ptr<function>> init();
 public:
-    static shared_ptr<column_specification> make_arg_spec(const sstring& receiver_ks, const sstring& receiver_cf,
+    static lw_shared_ptr<column_specification> make_arg_spec(const sstring& receiver_ks, const sstring& receiver_cf,
            const function& fun, size_t i);
    static int get_overload_count(const function_name& name);
 public:
@@ -77,7 +77,7 @@ public:
                                    const std::vector<shared_ptr<assignment_testable>>& provided_args,
                                    const sstring& receiver_ks,
                                    const sstring& receiver_cf,
-                                    ::shared_ptr<column_specification> receiver = nullptr);
+                                    lw_shared_ptr<column_specification> receiver = nullptr);
    template <typename AssignmentTestablePtrRange>
    static shared_ptr<function> get(database& db,
                                    const sstring& keyspace,
@@ -85,7 +85,7 @@ public:
                                    AssignmentTestablePtrRange&& provided_args,
                                    const sstring& receiver_ks,
                                    const sstring& receiver_cf,
-                                    ::shared_ptr<column_specification> receiver = nullptr) {
+                                    lw_shared_ptr<column_specification> receiver = nullptr) {
        const std::vector<shared_ptr<assignment_testable>> args(std::begin(provided_args), std::end(provided_args));
        return get(db, keyspace, name, args, receiver_ks, receiver_cf, receiver);
    }
--- a/cql3/lists.cc
+++ b/cql3/lists.cc
@@ -30,28 +30,28 @@

 namespace cql3 {

-shared_ptr<column_specification>
+lw_shared_ptr<column_specification>
 lists::index_spec_of(const column_specification& column) {
-    return ::make_shared<column_specification>(column.ks_name, column.cf_name,
+    return make_lw_shared<column_specification>(column.ks_name, column.cf_name,
            ::make_shared<column_identifier>(format("idx({})", *column.name), true), int32_type);
 }

-shared_ptr<column_specification>
+lw_shared_ptr<column_specification>
 lists::value_spec_of(const column_specification& column) {
-    return ::make_shared<column_specification>(column.ks_name, column.cf_name,
+    return make_lw_shared<column_specification>(column.ks_name, column.cf_name,
            ::make_shared<column_identifier>(format("value({})", *column.name), true),
                dynamic_pointer_cast<const list_type_impl>(column.type)->get_elements_type());
 }

-shared_ptr<column_specification>
+lw_shared_ptr<column_specification>
 lists::uuid_index_spec_of(const column_specification& column) {
-    return ::make_shared<column_specification>(column.ks_name, column.cf_name,
+    return make_lw_shared<column_specification>(column.ks_name, column.cf_name,
            ::make_shared<column_identifier>(format("uuid_idx({})", *column.name), true), uuid_type);
 }


 shared_ptr<term>
-lists::literal::prepare(database& db, const sstring& keyspace, shared_ptr<column_specification> receiver) const {
+lists::literal::prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const {
    validate_assignable_to(db, keyspace, *receiver);

    // In Cassandra, an empty (unfrozen) map/set/list is equivalent to the column being null. In
@@ -101,7 +101,7 @@ lists::literal::validate_assignable_to(database& db, const sstring keyspace, con
 }

 assignment_testable::test_result
-lists::literal::test_assignment(database& db, const sstring& keyspace, shared_ptr<column_specification> receiver) const {
+lists::literal::test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const {
    if (!dynamic_pointer_cast<const list_type_impl>(receiver->type)) {
        return assignment_testable::test_result::NOT_ASSIGNABLE;
    }
@@ -357,7 +357,12 @@ lists::setter_by_uuid::execute(mutation& m, const clustering_key_prefix& prefix,

    collection_mutation_description mut;
    mut.cells.reserve(1);
-    mut.cells.emplace_back(to_bytes(*index), params.make_cell(*ltype->value_comparator(), *value, atomic_cell::collection_member::yes));
+
+    if (!value) {
+        mut.cells.emplace_back(to_bytes(*index), params.make_dead_cell());
+    } else {
+        mut.cells.emplace_back(to_bytes(*index), params.make_cell(*ltype->value_comparator(), *value, atomic_cell::collection_member::yes));
+    }

    m.set_cell(prefix, column, mut.serialize(*ltype));
 }
--- a/cql3/lists.hh
+++ b/cql3/lists.hh
@@ -54,9 +54,9 @@ namespace cql3 {
 class lists {
    lists() = delete;
 public:
-    static shared_ptr<column_specification> index_spec_of(const column_specification&);
-    static shared_ptr<column_specification> value_spec_of(const column_specification&);
-    static shared_ptr<column_specification> uuid_index_spec_of(const column_specification&);
+    static lw_shared_ptr<column_specification> index_spec_of(const column_specification&);
+    static lw_shared_ptr<column_specification> value_spec_of(const column_specification&);
+    static lw_shared_ptr<column_specification> uuid_index_spec_of(const column_specification&);

    class literal : public term::raw {
        const std::vector<shared_ptr<term::raw>> _elements;
@@ -64,11 +64,11 @@ public:
        explicit literal(std::vector<shared_ptr<term::raw>> elements)
            : _elements(std::move(elements)) {
        }
-        virtual shared_ptr<term> prepare(database& db, const sstring& keyspace, shared_ptr<column_specification> receiver) const override;
+        virtual shared_ptr<term> prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override;
    private:
        void validate_assignable_to(database& db, const sstring keyspace, const column_specification& receiver) const;
    public:
-        virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, shared_ptr<column_specification> receiver) const override;
+        virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override;
        virtual sstring to_string() const override;
    };

@@ -113,7 +113,7 @@ public:
     */
    class marker : public abstract_marker {
    public:
-        marker(int32_t bind_index, ::shared_ptr<column_specification> receiver)
+        marker(int32_t bind_index, lw_shared_ptr<column_specification> receiver)
            : abstract_marker{bind_index, std::move(receiver)}
        { }
        virtual ::shared_ptr<terminal> bind(const query_options& options) override;
--- a/cql3/maps.cc
+++ b/cql3/maps.cc
@@ -51,22 +51,22 @@

 namespace cql3 {

-shared_ptr<column_specification>
+lw_shared_ptr<column_specification>
 maps::key_spec_of(const column_specification& column) {
-    return ::make_shared<column_specification>(column.ks_name, column.cf_name,
+    return make_lw_shared<column_specification>(column.ks_name, column.cf_name,
                ::make_shared<column_identifier>(format("key({})", *column.name), true),
                 dynamic_pointer_cast<const map_type_impl>(column.type)->get_keys_type());
 }

-shared_ptr<column_specification>
+lw_shared_ptr<column_specification>
 maps::value_spec_of(const column_specification& column) {
-    return ::make_shared<column_specification>(column.ks_name, column.cf_name,
+    return make_lw_shared<column_specification>(column.ks_name, column.cf_name,
                ::make_shared<column_identifier>(format("value({})", *column.name), true),
                 dynamic_pointer_cast<const map_type_impl>(column.type)->get_values_type());
 }

 ::shared_ptr<term>
-maps::literal::prepare(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) const {
+maps::literal::prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const {
    validate_assignable_to(db, keyspace, *receiver);

    auto key_spec = maps::key_spec_of(*receiver);
@@ -114,7 +114,7 @@ maps::literal::validate_assignable_to(database& db, const sstring& keyspace, con
 }

 assignment_testable::test_result
-maps::literal::test_assignment(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) const {
+maps::literal::test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const {
    if (!dynamic_pointer_cast<const map_type_impl>(receiver->type)) {
        return assignment_testable::test_result::NOT_ASSIGNABLE;
    }
--- a/cql3/maps.hh
+++ b/cql3/maps.hh
@@ -56,8 +56,8 @@ class maps {
 private:
    maps() = delete;
 public:
-    static shared_ptr<column_specification> key_spec_of(const column_specification& column);
-    static shared_ptr<column_specification> value_spec_of(const column_specification& column);
+    static lw_shared_ptr<column_specification> key_spec_of(const column_specification& column);
+    static lw_shared_ptr<column_specification> value_spec_of(const column_specification& column);

    class literal : public term::raw {
    public:
@@ -66,11 +66,11 @@ public:
        literal(const std::vector<std::pair<::shared_ptr<term::raw>, ::shared_ptr<term::raw>>>& entries_)
            : entries{entries_}
        { }
-        virtual ::shared_ptr<term> prepare(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) const override;
+        virtual ::shared_ptr<term> prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override;
    private:
        void validate_assignable_to(database& db, const sstring& keyspace, const column_specification& receiver) const;
    public:
-        virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) const override;
+        virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override;
        virtual sstring to_string() const override;
    };

@@ -104,7 +104,7 @@ public:

    class marker : public abstract_marker {
    public:
-        marker(int32_t bind_index, ::shared_ptr<column_specification> receiver)
+        marker(int32_t bind_index, lw_shared_ptr<column_specification> receiver)
            : abstract_marker{bind_index, std::move(receiver)}
        { }
        virtual ::shared_ptr<terminal> bind(const query_options& options) override;
--- a/cql3/multi_column_relation.hh
+++ b/cql3/multi_column_relation.hh
@@ -140,7 +140,7 @@ protected:
    virtual shared_ptr<restrictions::restriction> new_EQ_restriction(database& db, schema_ptr schema,
                                                                     variable_specifications& bound_names) override {
        auto rs = receivers(db, *schema);
-        std::vector<::shared_ptr<column_specification>> col_specs(rs.size());
+        std::vector<lw_shared_ptr<column_specification>> col_specs(rs.size());
        std::transform(rs.begin(), rs.end(), col_specs.begin(), [] (auto cs) {
            return cs->column_specification;
        });
@@ -151,7 +151,7 @@ protected:
    virtual shared_ptr<restrictions::restriction> new_IN_restriction(database& db, schema_ptr schema,
                                                                     variable_specifications& bound_names) override {
        auto rs = receivers(db, *schema);
-        std::vector<::shared_ptr<column_specification>> col_specs(rs.size());
+        std::vector<lw_shared_ptr<column_specification>> col_specs(rs.size());
        std::transform(rs.begin(), rs.end(), col_specs.begin(), [] (auto cs) {
            return cs->column_specification;
        });
@@ -175,7 +175,7 @@ protected:
                                                                        variable_specifications& bound_names,
                                                                        statements::bound bound, bool inclusive) override {
        auto rs = receivers(db, *schema);
-        std::vector<::shared_ptr<column_specification>> col_specs(rs.size());
+        std::vector<lw_shared_ptr<column_specification>> col_specs(rs.size());
        std::transform(rs.begin(), rs.end(), col_specs.begin(), [] (auto cs) {
            return cs->column_specification;
        });
@@ -200,7 +200,7 @@ protected:
        return ::make_shared(multi_column_relation(std::move(new_entities), _relation_type, _values_or_marker, _in_values, _in_marker));
    }

-    virtual shared_ptr<term> to_term(const std::vector<shared_ptr<column_specification>>& receivers,
+    virtual shared_ptr<term> to_term(const std::vector<lw_shared_ptr<column_specification>>& receivers,
                                     const term::raw& raw, database& db, const sstring& keyspace,
                                     variable_specifications& bound_names) const override {
        const auto& as_multi_column_raw = dynamic_cast<const term::multi_column_raw&>(raw);
--- a/cql3/operation.cc
+++ b/cql3/operation.cc
@@ -216,7 +216,7 @@ operation::subtraction::prepare(database& db, const sstring& keyspace, const col
    } else if (ctype->get_kind() == abstract_type::kind::map) {
        auto&& mtype = dynamic_pointer_cast<const map_type_impl>(ctype);
        // The value for a map subtraction is actually a set
-        auto&& vr = ::make_shared<column_specification>(
+        auto&& vr = make_lw_shared<column_specification>(
                receiver.column_specification->ks_name,
                receiver.column_specification->cf_name,
                receiver.column_specification->name,
@@ -294,7 +294,7 @@ operation::set_counter_value_from_tuple_list::prepare(database& db, const sstrin

    // We need to fake a column of list<tuple<...>> to prepare the value term
    auto & os = receiver.column_specification;
-    auto spec = ::make_shared<cql3::column_specification>(os->ks_name, os->cf_name, os->name, counter_tuple_list_type);
+    auto spec = make_lw_shared<cql3::column_specification>(os->ks_name, os->cf_name, os->name, counter_tuple_list_type);
    auto v = _value->prepare(db, keyspace, spec);

    // Will not be used elsewhere, so make it local.
--- a/cql3/query_options.cc
+++ b/cql3/query_options.cc
@@ -189,7 +189,7 @@ bytes_view query_options::linearize(fragmented_temporary_buffer::view view) cons
    }
 }

-void query_options::prepare(const std::vector<::shared_ptr<column_specification>>& specs)
+void query_options::prepare(const std::vector<lw_shared_ptr<column_specification>>& specs)
 {
    if (!_names) {
        return;
--- a/cql3/query_options.hh
+++ b/cql3/query_options.hh
@@ -245,7 +245,7 @@ public:
        return _cql_config;
    }

-    void prepare(const std::vector<::shared_ptr<column_specification>>& specs);
+    void prepare(const std::vector<lw_shared_ptr<column_specification>>& specs);
 private:
    void fill_value_views();
 };
--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -510,7 +510,7 @@ query_processor::execute_prepared(
    if (needs_authorization) {
        fut = statement->check_access(_proxy, query_state.get_client_state()).then([this, &query_state, prepared = std::move(prepared), cache_key = std::move(cache_key)] () mutable {
            return _authorized_prepared_cache.insert(*query_state.get_client_state().user(), std::move(cache_key), std::move(prepared)).handle_exception([this] (auto eptr) {
-                log.error("failed to cache the entry", eptr);
+                log.error("failed to cache the entry: {}", eptr);
            });
        });
    }
@@ -607,10 +607,10 @@ prepared_cache_key_type query_processor::compute_thrift_id(

 std::unique_ptr<prepared_statement>
 query_processor::get_statement(const sstring_view& query, const service::client_state& client_state) {
-    ::shared_ptr<raw::parsed_statement> statement = parse_statement(query);
+    std::unique_ptr<raw::parsed_statement> statement = parse_statement(query);

    // Set keyspace for statement that require login
-    auto cf_stmt = dynamic_pointer_cast<raw::cf_statement>(statement);
+    auto cf_stmt = dynamic_cast<raw::cf_statement*>(statement.get());
    if (cf_stmt) {
        cf_stmt->prepare_keyspace(client_state);
    }
@@ -620,7 +620,7 @@ query_processor::get_statement(const sstring_view& query, const service::client_
    return p;
 }

-::shared_ptr<raw::parsed_statement>
+std::unique_ptr<raw::parsed_statement>
 query_processor::parse_statement(const sstring_view& query) {
    try {
        auto statement = util::do_with_parser(query,  std::mem_fn(&cql3_parser::CqlParser::query));
@@ -853,7 +853,7 @@ query_processor::execute_batch(
    return batch->check_access(_proxy, query_state.get_client_state()).then([this, &query_state, &options, batch, pending_authorization_entries = std::move(pending_authorization_entries)] () mutable {
        return parallel_for_each(pending_authorization_entries, [this, &query_state] (auto& e) {
            return _authorized_prepared_cache.insert(*query_state.get_client_state().user(), e.first, std::move(e.second)).handle_exception([this] (auto eptr) {
-                log.error("failed to cache the entry", eptr);
+                log.error("failed to cache the entry: {}", eptr);
            });
        }).then([this, &query_state, &options, batch] {
            batch->validate();
--- a/cql3/query_processor.hh
+++ b/cql3/query_processor.hh
@@ -147,7 +147,7 @@ public:
            const std::string_view& query_string,
            const sstring& keyspace);

-    static ::shared_ptr<statements::raw::parsed_statement> parse_statement(const std::string_view& query);
+    static std::unique_ptr<statements::raw::parsed_statement> parse_statement(const std::string_view& query);

    query_processor(service::storage_proxy& proxy, database& db, service::migration_notifier& mn, memory_config mcfg, cql_config& cql_cfg);

--- a/cql3/relation.cc
+++ b/cql3/relation.cc
@@ -49,7 +49,7 @@ relation::to_column_definition(const schema& schema, const column_identifier::ra
    auto id = entity.prepare_column_identifier(schema);
    auto def = get_column_definition(schema, *id);
    if (!def || def->is_hidden_from_cql()) {
-        throw exceptions::unrecognized_entity_exception(id, shared_from_this());
+        throw exceptions::unrecognized_entity_exception(*id, to_string());
    }
    return *def;
 }
--- a/cql3/relation.hh
+++ b/cql3/relation.hh
@@ -249,7 +249,7 @@ protected:
     * @return the <code>Term</code> corresponding to the specified <code>Raw</code>
     * @throws InvalidRequestException if the <code>Raw</code> term is not valid
     */
-    virtual ::shared_ptr<term> to_term(const std::vector<::shared_ptr<column_specification>>& receivers,
+    virtual ::shared_ptr<term> to_term(const std::vector<lw_shared_ptr<column_specification>>& receivers,
                                       const term::raw& raw,
                                       database& db,
                                       const sstring& keyspace,
@@ -265,7 +265,7 @@ protected:
     * @return the <code>Term</code>s corresponding to the specified <code>Raw</code> terms
     * @throws InvalidRequestException if the <code>Raw</code> terms are not valid
     */
-    std::vector<::shared_ptr<term>> to_terms(const std::vector<::shared_ptr<column_specification>>& receivers,
+    std::vector<::shared_ptr<term>> to_terms(const std::vector<lw_shared_ptr<column_specification>>& receivers,
                                             const std::vector<::shared_ptr<term::raw>>& raws,
                                             database& db,
                                             const sstring& keyspace,
--- a/cql3/restrictions/statement_restrictions.cc
+++ b/cql3/restrictions/statement_restrictions.cc
@@ -417,7 +417,7 @@ std::vector<const column_definition*> statement_restrictions::get_column_defs_fo
                    _clustering_columns_restrictions->num_prefix_columns_that_need_not_be_filtered();
            for (auto&& cdef : _clustering_columns_restrictions->get_column_defs()) {
                ::shared_ptr<single_column_restriction> restr;
-                if (single_pk_restrs) {
+                if (single_ck_restrs) {
                    auto it = single_ck_restrs->restrictions().find(cdef);
                    if (it != single_ck_restrs->restrictions().end()) {
                        restr = dynamic_pointer_cast<single_column_restriction>(it->second);
@@ -624,9 +624,6 @@ bool single_column_restriction::EQ::is_satisfied_by(const schema& schema,
        const row& cells,
        const query_options& options,
        gc_clock::time_point now) const {
-    if (_column_def.type->is_counter()) {
-        fail(unimplemented::cause::COUNTERS);
-    }
    auto operand = value(options);
    if (operand) {
        auto cell_value = get_value(schema, key, ckey, cells, now);
@@ -641,11 +638,11 @@ bool single_column_restriction::EQ::is_satisfied_by(const schema& schema,
 }

 bool single_column_restriction::EQ::is_satisfied_by(bytes_view data, const query_options& options) const {
-    if (_column_def.type->is_counter()) {
-        fail(unimplemented::cause::COUNTERS);
-    }
    auto operand = value(options);
-    return operand && _column_def.type->compare(*operand, data) == 0;
+    if (!operand) {
+        throw exceptions::invalid_request_exception(format("Invalid null value for {}", _column_def.name_as_text()));
+    }
+    return _column_def.type->compare(*operand, data) == 0;
 }

 bool single_column_restriction::IN::is_satisfied_by(const schema& schema,
@@ -654,9 +651,6 @@ bool single_column_restriction::IN::is_satisfied_by(const schema& schema,
        const row& cells,
        const query_options& options,
        gc_clock::time_point now) const {
-    if (_column_def.type->is_counter()) {
-        fail(unimplemented::cause::COUNTERS);
-    }
    auto cell_value = get_value(schema, key, ckey, cells, now);
    if (!cell_value) {
        return false;
@@ -670,16 +664,13 @@ bool single_column_restriction::IN::is_satisfied_by(const schema& schema,
 }

 bool single_column_restriction::IN::is_satisfied_by(bytes_view data, const query_options& options) const {
-    if (_column_def.type->is_counter()) {
-        fail(unimplemented::cause::COUNTERS);
-    }
    auto operands = values(options);
    return boost::algorithm::any_of(operands, [this, &data] (const bytes_opt& operand) {
        return operand && _column_def.type->compare(*operand, data) == 0;
    });
 }

-static query::range<bytes_view> to_range(const term_slice& slice, const query_options& options) {
+static query::range<bytes_view> to_range(const term_slice& slice, const query_options& options, const sstring& name) {
    using range_type = query::range<bytes_view>;
    auto extract_bound = [&] (statements::bound bound) -> std::optional<range_type::bound> {
        if (!slice.has_bound(bound)) {
@@ -687,7 +678,7 @@ static query::range<bytes_view> to_range(const term_slice& slice, const query_op
        }
        auto value = slice.bound(bound)->bind_and_get(options);
        if (!value) {
-            return { };
+            throw exceptions::invalid_request_exception(format("Invalid null bound for {}", name));
        }
        auto value_view = options.linearize(*value);
        return { range_type::bound(value_view, slice.is_inclusive(bound)) };
@@ -697,6 +688,11 @@ static query::range<bytes_view> to_range(const term_slice& slice, const query_op
        extract_bound(statements::bound::END));
 }

+static bool contains_without_wraparound(
+        const query::range<bytes_view>& range, bytes_view value, const serialized_tri_compare& cmp) {
+    return !range.is_wrap_around(cmp) && range.contains(value, cmp);
+}
+
 bool single_column_restriction::slice::is_satisfied_by(const schema& schema,
        const partition_key& key,
        const clustering_key_prefix& ckey,
@@ -711,15 +707,14 @@ bool single_column_restriction::slice::is_satisfied_by(const schema& schema,
        return false;
    }
    return cell_value->with_linearized([&] (bytes_view cell_value_bv) {
-        return to_range(_slice, options).contains(cell_value_bv, _column_def.type->as_tri_comparator());
+        return contains_without_wraparound(to_range(_slice, options, _column_def.name_as_text()),
+                cell_value_bv, _column_def.type->as_tri_comparator());
    });
 }

 bool single_column_restriction::slice::is_satisfied_by(bytes_view data, const query_options& options) const {
-    if (_column_def.type->is_counter()) {
-        fail(unimplemented::cause::COUNTERS);
-    }
-    return to_range(_slice, options).contains(data, _column_def.type->underlying_type()->as_tri_comparator());
+    return contains_without_wraparound(to_range(_slice, options, _column_def.name_as_text()),
+            data, _column_def.type->underlying_type()->as_tri_comparator());
 }

 bool single_column_restriction::contains::is_satisfied_by(const schema& schema,
@@ -728,9 +723,6 @@ bool single_column_restriction::contains::is_satisfied_by(const schema& schema,
        const row& cells,
        const query_options& options,
        gc_clock::time_point now) const {
-    if (_column_def.type->is_counter()) {
-        fail(unimplemented::cause::COUNTERS);
-    }
    if (!_column_def.type->is_collection()) {
        return false;
    }
@@ -881,7 +873,9 @@ bool single_column_restriction::contains::is_satisfied_by(bytes_view collection_
            auto map_key = _entry_keys[i]->bind_and_get(options);
            auto map_value = _entry_values[i]->bind_and_get(options);
            if (!map_key || !map_value) {
-                continue;
+                throw exceptions::invalid_request_exception(
+                        format("Unsupported null map {} for column {}",
+                               map_key ? "key" : "value", _column_def.name_as_text()));
            }
            auto found = with_linearized(*map_key, [&] (bytes_view map_key_bv) {
              return std::find_if(data_map.begin(), data_map.end(), [&] (auto&& element) {
@@ -929,7 +923,7 @@ bool token_restriction::slice::is_satisfied_by(const schema& schema,
        const query_options& options,
        gc_clock::time_point now) const {
    bool satisfied = false;
-    auto range = to_range(_slice, options);
+    auto range = to_range(_slice, options, "token");
    for (auto* cdef : _column_definitions) {
        auto cell_value = do_get_value(schema, *cdef, key, ckey, cells, now);
        if (!cell_value) {
--- a/cql3/result_set.cc
+++ b/cql3/result_set.cc
@@ -43,12 +43,12 @@

 namespace cql3 {

-metadata::metadata(std::vector<::shared_ptr<column_specification>> names_)
+metadata::metadata(std::vector<lw_shared_ptr<column_specification>> names_)
        : _flags(flag_enum_set())
        , _column_info(make_lw_shared<column_info>(std::move(names_)))
 { }

-metadata::metadata(flag_enum_set flags, std::vector<::shared_ptr<column_specification>> names_, uint32_t column_count,
+metadata::metadata(flag_enum_set flags, std::vector<lw_shared_ptr<column_specification>> names_, uint32_t column_count,
        lw_shared_ptr<const service::pager::paging_state> paging_state)
    : _flags(flags)
    , _column_info(make_lw_shared<column_info>(std::move(names_), column_count))
@@ -60,7 +60,7 @@ uint32_t metadata::value_count() const {
    return _flags.contains<flag::NO_METADATA>() ? _column_info->_column_count : _column_info->_names.size();
 }

-void metadata::add_non_serialized_column(::shared_ptr<column_specification> name) {
+void metadata::add_non_serialized_column(lw_shared_ptr<column_specification> name) {
    // See comment above. Because columnCount doesn't account the newly added name, it
    // won't be serialized.
    _column_info->_names.emplace_back(std::move(name));
@@ -101,7 +101,7 @@ lw_shared_ptr<const service::pager::paging_state> metadata::paging_state() const
    return _paging_state;
 }

-prepared_metadata::prepared_metadata(const std::vector<::shared_ptr<column_specification>>& names,
+prepared_metadata::prepared_metadata(const std::vector<lw_shared_ptr<column_specification>>& names,
                                     const std::vector<uint16_t>& partition_key_bind_indices)
    : _names{names}
    , _partition_key_bind_indices{partition_key_bind_indices}
@@ -115,7 +115,7 @@ prepared_metadata::flag_enum_set prepared_metadata::flags() const {
    return _flags;
 }

-const std::vector<::shared_ptr<column_specification>>& prepared_metadata::names() const {
+const std::vector<lw_shared_ptr<column_specification>>& prepared_metadata::names() const {
    return _names;
 }

@@ -123,7 +123,7 @@ const std::vector<uint16_t>& prepared_metadata::partition_key_bind_indices() con
    return _partition_key_bind_indices;
 }

-result_set::result_set(std::vector<::shared_ptr<column_specification>> metadata_)
+result_set::result_set(std::vector<lw_shared_ptr<column_specification>> metadata_)
    : _metadata(::make_shared<metadata>(std::move(metadata_)))
 { }

@@ -179,7 +179,7 @@ const std::deque<std::vector<bytes_opt>>& result_set::rows() const {
 shared_ptr<const cql3::metadata>
 make_empty_metadata() {
    static thread_local shared_ptr<const metadata> empty_metadata_cache = [] {
-        auto result = ::make_shared<metadata>(std::vector<::shared_ptr<cql3::column_specification>>{});
+        auto result = ::make_shared<metadata>(std::vector<lw_shared_ptr<cql3::column_specification>>{});
        result->set_skip_metadata();
        return result;
    }();
--- a/cql3/result_set.hh
+++ b/cql3/result_set.hh
@@ -74,15 +74,15 @@ public:
    // used to include columns in the resultSet that we need to do post-query re-orderings
    // (SelectStatement.orderResults) but that shouldn't be sent to the user as they haven't been requested
    // (CASSANDRA-4911). So the serialization code will exclude any columns in name whose index is >= columnCount.
-        std::vector<::shared_ptr<column_specification>> _names;
+        std::vector<lw_shared_ptr<column_specification>> _names;
        uint32_t _column_count;

-        column_info(std::vector<::shared_ptr<column_specification>> names, uint32_t column_count)
+        column_info(std::vector<lw_shared_ptr<column_specification>> names, uint32_t column_count)
            : _names(std::move(names))
            , _column_count(column_count)
        { }

-        explicit column_info(std::vector<::shared_ptr<column_specification>> names)
+        explicit column_info(std::vector<lw_shared_ptr<column_specification>> names)
            : _names(std::move(names))
            , _column_count(_names.size())
        { }
@@ -95,15 +95,15 @@ private:
    lw_shared_ptr<const service::pager::paging_state> _paging_state;

 public:
-    metadata(std::vector<::shared_ptr<column_specification>> names_);
+    metadata(std::vector<lw_shared_ptr<column_specification>> names_);

-    metadata(flag_enum_set flags, std::vector<::shared_ptr<column_specification>> names_, uint32_t column_count,
+    metadata(flag_enum_set flags, std::vector<lw_shared_ptr<column_specification>> names_, uint32_t column_count,
            lw_shared_ptr<const service::pager::paging_state> paging_state);

    // The maximum number of values that the ResultSet can hold. This can be bigger than columnCount due to CASSANDRA-4911
    uint32_t value_count() const;

-    void add_non_serialized_column(::shared_ptr<column_specification> name);
+    void add_non_serialized_column(lw_shared_ptr<column_specification> name);

 private:
    bool all_in_same_cf() const;
@@ -120,7 +120,7 @@ public:

    lw_shared_ptr<const service::pager::paging_state> paging_state() const;

-    const std::vector<::shared_ptr<column_specification>>& get_names() const {
+    const std::vector<lw_shared_ptr<column_specification>>& get_names() const {
        return _column_info->_names;
    }
 };
@@ -139,14 +139,14 @@ public:
    using flag_enum_set = enum_set<flag_enum>;
 private:
    flag_enum_set _flags;
-    std::vector<::shared_ptr<column_specification>> _names;
+    std::vector<lw_shared_ptr<column_specification>> _names;
    std::vector<uint16_t> _partition_key_bind_indices;
 public:
-    prepared_metadata(const std::vector<::shared_ptr<column_specification>>& names,
+    prepared_metadata(const std::vector<lw_shared_ptr<column_specification>>& names,
                      const std::vector<uint16_t>& partition_key_bind_indices);

    flag_enum_set flags() const;
-    const std::vector<::shared_ptr<column_specification>>& names() const;
+    const std::vector<lw_shared_ptr<column_specification>>& names() const;
    const std::vector<uint16_t>& partition_key_bind_indices() const;
 };

@@ -167,7 +167,7 @@ class result_set {

    friend class result;
 public:
-    result_set(std::vector<::shared_ptr<column_specification>> metadata_);
+    result_set(std::vector<lw_shared_ptr<column_specification>> metadata_);

    result_set(::shared_ptr<metadata> metadata);

--- a/cql3/selection/selection.cc
+++ b/cql3/selection/selection.cc
@@ -56,7 +56,7 @@ namespace selection {

 selection::selection(schema_ptr schema,
    std::vector<const column_definition*> columns,
-    std::vector<::shared_ptr<column_specification>> metadata_,
+    std::vector<lw_shared_ptr<column_specification>> metadata_,
    bool collect_timestamps,
    bool collect_TTLs,
    trivial is_trivial)
@@ -92,7 +92,7 @@ private:
    const bool _is_wildcard;
 public:
    static ::shared_ptr<simple_selection> make(schema_ptr schema, std::vector<const column_definition*> columns, bool is_wildcard) {
-        std::vector<::shared_ptr<column_specification>> metadata;
+        std::vector<lw_shared_ptr<column_specification>> metadata;
        metadata.reserve(columns.size());
        for (auto&& col : columns) {
            metadata.emplace_back(col->column_specification);
@@ -106,7 +106,7 @@ public:
     * get much duplicate in practice, it's more efficient not to bother.
     */
    simple_selection(schema_ptr schema, std::vector<const column_definition*> columns,
-        std::vector<::shared_ptr<column_specification>> metadata, bool is_wildcard)
+        std::vector<lw_shared_ptr<column_specification>> metadata, bool is_wildcard)
            : selection(schema, std::move(columns), std::move(metadata), false, false, trivial::yes)
            , _is_wildcard(is_wildcard)
    { }
@@ -155,7 +155,7 @@ private:
    ::shared_ptr<selector_factories> _factories;
 public:
    selection_with_processing(schema_ptr schema, std::vector<const column_definition*> columns,
-            std::vector<::shared_ptr<column_specification>> metadata, ::shared_ptr<selector_factories> factories)
+            std::vector<lw_shared_ptr<column_specification>> metadata, ::shared_ptr<selector_factories> factories)
        : selection(schema, std::move(columns), std::move(metadata),
            factories->contains_write_time_selector_factory(),
            factories->contains_ttl_selector_factory())
@@ -264,14 +264,14 @@ uint32_t selection::add_column_for_post_processing(const column_definition& c) {
    }
 }

-std::vector<::shared_ptr<column_specification>>
+std::vector<lw_shared_ptr<column_specification>>
 selection::collect_metadata(const schema& schema, const std::vector<::shared_ptr<raw_selector>>& raw_selectors,
        const selector_factories& factories) {
-    std::vector<::shared_ptr<column_specification>> r;
+    std::vector<lw_shared_ptr<column_specification>> r;
    r.reserve(raw_selectors.size());
    auto i = raw_selectors.begin();
    for (auto&& factory : factories) {
-        ::shared_ptr<column_specification> col_spec = factory->get_column_specification(schema);
+        lw_shared_ptr<column_specification> col_spec = factory->get_column_specification(schema);
        ::shared_ptr<column_identifier> alias = (*i++)->alias;
        r.push_back(alias ? col_spec->with_alias(alias) : col_spec);
    }
--- a/cql3/selection/selection.hh
+++ b/cql3/selection/selection.hh
@@ -99,7 +99,7 @@ protected:

    selection(schema_ptr schema,
        std::vector<const column_definition*> columns,
-        std::vector<::shared_ptr<column_specification>> metadata_,
+        std::vector<lw_shared_ptr<column_specification>> metadata_,
        bool collect_timestamps,
        bool collect_TTLs, trivial is_trivial = trivial::no);

@@ -197,7 +197,7 @@ private:
            [] (auto&& s) { return s->processes_selection(); });
    }

-    static std::vector<::shared_ptr<column_specification>> collect_metadata(const schema& schema,
+    static std::vector<lw_shared_ptr<column_specification>> collect_metadata(const schema& schema,
        const std::vector<::shared_ptr<raw_selector>>& raw_selectors, const selector_factories& factories);
 public:
    static ::shared_ptr<selection> from_selectors(database& db, schema_ptr schema, const std::vector<::shared_ptr<raw_selector>>& raw_selectors);
@@ -268,7 +268,7 @@ public:
        if (_selectors->requires_thread()) {
            return async(std::move(func));
        } else {
-            return futurize_apply(std::move(func));
+            return futurize_invoke(std::move(func));
        }
    }

--- a/cql3/selection/selector.cc
+++ b/cql3/selection/selector.cc
@@ -26,9 +26,9 @@ namespace cql3 {

 namespace selection {

-::shared_ptr<column_specification>
+lw_shared_ptr<column_specification>
 selector::factory::get_column_specification(const schema& schema) const {
-    return ::make_shared<column_specification>(schema.ks_name(),
+    return make_lw_shared<column_specification>(schema.ks_name(),
        schema.cf_name(),
        ::make_shared<column_identifier>(column_name(), true),
        get_return_type());
--- a/cql3/selection/selector.hh
+++ b/cql3/selection/selector.hh
@@ -107,7 +107,7 @@ public:
     */
    virtual void reset() = 0;

-    virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) const override {
+    virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override {
        auto t1 = receiver->type->underlying_type();
        auto t2 = get_type()->underlying_type();
        // We want columns of `counter_type' to be served by underlying type's overloads
@@ -142,7 +142,7 @@ public:
     * @param schema the column family schema
     * @return a column specification
     */
-    ::shared_ptr<column_specification> get_column_specification(const schema& schema) const;
+    lw_shared_ptr<column_specification> get_column_specification(const schema& schema) const;

    /**
     * Creates a new <code>selector</code> instance.
--- a/cql3/sets.cc
+++ b/cql3/sets.cc
@@ -27,15 +27,15 @@

 namespace cql3 {

-shared_ptr<column_specification>
+lw_shared_ptr<column_specification>
 sets::value_spec_of(const column_specification& column) {
-    return ::make_shared<column_specification>(column.ks_name, column.cf_name,
+    return make_lw_shared<column_specification>(column.ks_name, column.cf_name,
            ::make_shared<column_identifier>(format("value({})", *column.name), true),
            dynamic_pointer_cast<const set_type_impl>(column.type)->get_elements_type());
 }

 shared_ptr<term>
-sets::literal::prepare(database& db, const sstring& keyspace, shared_ptr<column_specification> receiver) const {
+sets::literal::prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const {
    validate_assignable_to(db, keyspace, *receiver);

    if (_elements.empty()) {
@@ -105,7 +105,7 @@ sets::literal::validate_assignable_to(database& db, const sstring& keyspace, con
 }

 assignment_testable::test_result
-sets::literal::test_assignment(database& db, const sstring& keyspace, shared_ptr<column_specification> receiver) const {
+sets::literal::test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const {
    if (!dynamic_pointer_cast<const set_type_impl>(receiver->type)) {
        // We've parsed empty maps as a set literal to break the ambiguity so handle that case now
        if (dynamic_pointer_cast<const map_type_impl>(receiver->type) && _elements.empty()) {
@@ -224,7 +224,7 @@ sets::delayed_value::bind(const query_options& options) {
 }


-sets::marker::marker(int32_t bind_index, ::shared_ptr<column_specification> receiver)
+sets::marker::marker(int32_t bind_index, lw_shared_ptr<column_specification> receiver)
    : abstract_marker{bind_index, std::move(receiver)} {
        assert(dynamic_cast<const set_type_impl*>(_receiver->type.get()));
    }
--- a/cql3/sets.hh
+++ b/cql3/sets.hh
@@ -56,7 +56,7 @@ namespace cql3 {
 class sets {
    sets() = delete;
 public:
-    static shared_ptr<column_specification> value_spec_of(const column_specification& column);
+    static lw_shared_ptr<column_specification> value_spec_of(const column_specification& column);

    class literal : public term::raw {
        std::vector<shared_ptr<term::raw>> _elements;
@@ -64,10 +64,10 @@ public:
        explicit literal(std::vector<shared_ptr<term::raw>> elements)
                : _elements(std::move(elements)) {
        }
-        virtual shared_ptr<term> prepare(database& db, const sstring& keyspace, shared_ptr<column_specification> receiver) const override;
+        virtual shared_ptr<term> prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const override;
        void validate_assignable_to(database& db, const sstring& keyspace, const column_specification& receiver) const;
        assignment_testable::test_result
-        test_assignment(database& db, const sstring& keyspace, shared_ptr<column_specification> receiver) const;
+        test_assignment(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const;
        virtual sstring to_string() const override;
    };

@@ -100,7 +100,7 @@ public:

    class marker : public abstract_marker {
    public:
-        marker(int32_t bind_index, ::shared_ptr<column_specification> receiver);
+        marker(int32_t bind_index, lw_shared_ptr<column_specification> receiver);
        virtual ::shared_ptr<terminal> bind(const query_options& options) override;
    };

--- a/cql3/single_column_relation.cc
+++ b/cql3/single_column_relation.cc
@@ -53,7 +53,7 @@ using namespace cql3::restrictions;
 namespace cql3 {

 ::shared_ptr<term>
-single_column_relation::to_term(const std::vector<::shared_ptr<column_specification>>& receivers,
+single_column_relation::to_term(const std::vector<lw_shared_ptr<column_specification>>& receivers,
                                const term::raw& raw,
                                database& db,
                                const sstring& keyspace,
@@ -107,7 +107,7 @@ single_column_relation::new_LIKE_restriction(
    return ::make_shared<single_column_restriction::LIKE>(column_def, std::move(term));
 }

-std::vector<::shared_ptr<column_specification>>
+std::vector<lw_shared_ptr<column_specification>>
 single_column_relation::to_receivers(const schema& schema, const column_definition& column_def) const
 {
    using namespace statements::request_validations;
--- a/cql3/single_column_relation.hh
+++ b/cql3/single_column_relation.hh
@@ -117,7 +117,7 @@ public:
    }

 protected:
-    virtual ::shared_ptr<term> to_term(const std::vector<::shared_ptr<column_specification>>& receivers,
+    virtual ::shared_ptr<term> to_term(const std::vector<lw_shared_ptr<column_specification>>& receivers,
                          const term::raw& raw, database& db, const sstring& keyspace,
                          variable_specifications& bound_names) const override;

@@ -202,9 +202,9 @@ private:
     * @return the receivers for the specified relation.
     * @throws exceptions::invalid_request_exception if the relation is invalid
     */
-    std::vector<::shared_ptr<column_specification>> to_receivers(const schema& schema, const column_definition& column_def) const;
+    std::vector<lw_shared_ptr<column_specification>> to_receivers(const schema& schema, const column_definition& column_def) const;

-    static shared_ptr<column_specification> make_collection_receiver(shared_ptr<column_specification> receiver, bool for_key) {
+    static lw_shared_ptr<column_specification> make_collection_receiver(lw_shared_ptr<column_specification> receiver, bool for_key) {
        return static_cast<const collection_type_impl*>(receiver->type.get())->make_collection_receiver(*receiver, for_key);
    }

--- a/cql3/statements/alter_role_statement.hh
+++ b/cql3/statements/alter_role_statement.hh
@@ -62,6 +62,8 @@ public:
                , _options(std::move(options)) {
    }

+    std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;
+
    void validate(service::storage_proxy&, const service::client_state&) const override;

    virtual future<> check_access(service::storage_proxy& proxy, const service::client_state&) const override;
--- a/cql3/statements/alter_table_statement.cc
+++ b/cql3/statements/alter_table_statement.cc
@@ -207,6 +207,9 @@ void alter_table_statement::add_column(const schema& schema, const table& cf, sc
                "because a collection with the same name and a different type has already been used in the past", column_name));
        }
    }
+    if (type->is_counter() && !schema.is_counter()) {
+        throw exceptions::configuration_exception(format("Cannot add a counter column ({}) in a non counter column family", column_name));
+    }

    cfm.with_column(column_name.name(), type, is_static ? column_kind::static_column : column_kind::regular_column);

@@ -222,7 +225,7 @@ void alter_table_statement::add_column(const schema& schema, const table& cf, sc
            schema_builder builder(view);
            if (view->view_info()->include_all_columns()) {
                builder.with_column(column_name.name(), type);
-            } else if (view->view_info()->base_non_pk_columns_in_view_pk().empty()) {
+            } else if (!view->view_info()->has_base_non_pk_columns_in_view_pk()) {
                db::view::create_virtual_column(builder, column_name.name(), type);
            }
            view_updates.push_back(view_ptr(builder.build()));
--- a/cql3/statements/authentication_statement.cc
+++ b/cql3/statements/authentication_statement.cc
@@ -46,11 +46,6 @@ uint32_t cql3::statements::authentication_statement::get_bound_terms() const {
    return 0;
 }

-std::unique_ptr<cql3::statements::prepared_statement> cql3::statements::authentication_statement::prepare(
-                database& db, cql_stats& stats) {
-    return std::make_unique<prepared_statement>(this->shared_from_this());
-}
-
 bool cql3::statements::authentication_statement::uses_function(
                const sstring& ks_name, const sstring& function_name) const {
    return parsed_statement::uses_function(ks_name, function_name);
--- a/cql3/statements/authentication_statement.hh
+++ b/cql3/statements/authentication_statement.hh
@@ -50,14 +50,12 @@ namespace cql3 {

 namespace statements {

-class authentication_statement : public raw::parsed_statement, public cql_statement_no_metadata, public ::enable_shared_from_this<authentication_statement> {
+class authentication_statement : public raw::parsed_statement, public cql_statement_no_metadata {
 public:
    authentication_statement() : cql_statement_no_metadata(&timeout_config::other_timeout) {}

    uint32_t get_bound_terms() const override;

-    std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;
-
    bool uses_function(const sstring& ks_name, const sstring& function_name) const override;

    bool depends_on_keyspace(const sstring& ks_name) const override;
--- a/cql3/statements/authorization_statement.cc
+++ b/cql3/statements/authorization_statement.cc
@@ -46,11 +46,6 @@ uint32_t cql3::statements::authorization_statement::get_bound_terms() const {
    return 0;
 }

-std::unique_ptr<cql3::statements::prepared_statement> cql3::statements::authorization_statement::prepare(
-                database& db, cql_stats& stats) {
-    return std::make_unique<prepared_statement>(this->shared_from_this());
-}
-
 bool cql3::statements::authorization_statement::uses_function(
                const sstring& ks_name, const sstring& function_name) const {
    return parsed_statement::uses_function(ks_name, function_name);
--- a/cql3/statements/authorization_statement.hh
+++ b/cql3/statements/authorization_statement.hh
@@ -54,14 +54,12 @@ namespace cql3 {

 namespace statements {

-class authorization_statement : public raw::parsed_statement, public cql_statement_no_metadata, public ::enable_shared_from_this<authorization_statement> {
+class authorization_statement : public raw::parsed_statement, public cql_statement_no_metadata {
 public:
    authorization_statement() : cql_statement_no_metadata(&timeout_config::other_timeout) {}

    uint32_t get_bound_terms() const override;

-    std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;
-
    bool uses_function(const sstring& ks_name, const sstring& function_name) const override;

    bool depends_on_keyspace(const sstring& ks_name) const override;
--- a/cql3/statements/batch_statement.cc
+++ b/cql3/statements/batch_statement.cc
@@ -68,6 +68,7 @@ batch_statement::batch_statement(int bound_terms, type type_,
    , _has_conditions(boost::algorithm::any_of(_statements, [] (auto&& s) { return s.statement->has_conditions(); }))
    , _stats(stats)
 {
+    validate();
    if (has_conditions()) {
        // A batch can be created not only by raw::batch_statement::prepare, but also by
        // cql_server::connection::process_batch, which doesn't call any methods of
@@ -340,7 +341,7 @@ future<shared_ptr<cql_transport::messages::result_message>> batch_statement::exe
        const query_options& options,
        service::query_state& qs) const {

-    auto cl_for_commit = options.get_consistency();
+    auto cl_for_learn = options.get_consistency();
    auto cl_for_paxos = options.check_serial_consistency();
    seastar::shared_ptr<cas_request> request;
    schema_ptr schema;
@@ -378,7 +379,7 @@ future<shared_ptr<cql_transport::messages::result_message>> batch_statement::exe
    }

    auto shard = service::storage_proxy::cas_shard(*_statements[0].statement->s, request->key()[0].start()->value().as_decorated_key().token());
-    if (shard != engine().cpu_id()) {
+    if (shard != this_shard_id()) {
        proxy.get_stats().replica_cross_shard_ops++;
        return make_ready_future<shared_ptr<cql_transport::messages::result_message>>(
                make_shared<cql_transport::messages::result_message::bounce_to_shard>(shard));
@@ -386,7 +387,7 @@ future<shared_ptr<cql_transport::messages::result_message>> batch_statement::exe

    return proxy.cas(schema, request, request->read_command(), request->key(),
            {read_timeout, qs.get_permit(), qs.get_client_state(), qs.get_trace_state()},
-            cl_for_paxos, cl_for_commit, batch_timeout, cas_timeout).then([this, request] (bool is_applied) {
+            cl_for_paxos, cl_for_learn, batch_timeout, cas_timeout).then([this, request] (bool is_applied) {
        return modification_statement::build_cas_result_set(_metadata, _columns_of_cas_result_set, is_applied, request->rows());
    });
 }
@@ -400,9 +401,9 @@ void batch_statement::build_cas_result_set_metadata() {
    _columns_of_cas_result_set.resize(schema.all_columns_count());

    // Add the mandatory [applied] column to result set metadata
-    std::vector<shared_ptr<column_specification>> columns;
+    std::vector<lw_shared_ptr<column_specification>> columns;

-    auto applied = ::make_shared<cql3::column_specification>(schema.ks_name(), schema.cf_name(),
+    auto applied = make_lw_shared<cql3::column_specification>(schema.ks_name(), schema.cf_name(),
            ::make_shared<cql3::column_identifier>("[applied]", false), boolean_type);
    columns.push_back(applied);

@@ -448,7 +449,6 @@ batch_statement::prepare(database& db, cql_stats& stats) {
    prep_attrs->collect_marker_specification(bound_names);

    cql3::statements::batch_statement batch_statement_(bound_names.size(), _type, std::move(statements), std::move(prep_attrs), stats);
-    batch_statement_.validate();

    std::vector<uint16_t> partition_key_bind_indices;
    if (!have_multiple_cfs && batch_statement_.get_statements().size() > 0) {
--- a/cql3/statements/cas_request.cc
+++ b/cql3/statements/cas_request.cc
@@ -175,9 +175,9 @@ bool cas_request::applies_to() const {
    return applies;
 }

-std::optional<mutation> cas_request::apply(query::result& qr,
+std::optional<mutation> cas_request::apply(foreign_ptr<lw_shared_ptr<query::result>> qr,
        const query::partition_slice& slice, api::timestamp_type ts) {
-    _rows = update_parameters::build_prefetch_data(_schema, qr, slice);
+    _rows = update_parameters::build_prefetch_data(_schema, *qr, slice);
    if (applies_to()) {
        return apply_updates(ts);
    } else {
--- a/cql3/statements/cas_request.hh
+++ b/cql3/statements/cas_request.hh
@@ -95,7 +95,7 @@ public:
    void add_row_update(const modification_statement& stmt_arg, std::vector<query::clustering_range> ranges_arg,
        modification_statement::json_cache_opt json_cache_arg, const query_options& options_arg);

-    virtual std::optional<mutation> apply(query::result& qr,
+    virtual std::optional<mutation> apply(foreign_ptr<lw_shared_ptr<query::result>> qr,
            const query::partition_slice& slice, api::timestamp_type ts) override;

 private:
--- a/cql3/statements/cf_prop_defs.cc
+++ b/cql3/statements/cf_prop_defs.cc
@@ -255,7 +255,9 @@ void cf_prop_defs::apply_to_builder(schema_builder& builder, schema::extensions_
        }
    }

-    builder.set_default_time_to_live(gc_clock::duration(get_int(KW_DEFAULT_TIME_TO_LIVE, DEFAULT_DEFAULT_TIME_TO_LIVE)));
+    if (has_property(KW_DEFAULT_TIME_TO_LIVE)) {
+        builder.set_default_time_to_live(gc_clock::duration(get_int(KW_DEFAULT_TIME_TO_LIVE, DEFAULT_DEFAULT_TIME_TO_LIVE)));
+    }

    if (has_property(KW_SPECULATIVE_RETRY)) {
        builder.set_speculative_retry(get_string(KW_SPECULATIVE_RETRY, builder.get_speculative_retry().to_sstring()));
--- a/cql3/statements/create_role_statement.hh
+++ b/cql3/statements/create_role_statement.hh
@@ -54,10 +54,6 @@ namespace statements {
 class create_role_statement final : public authentication_statement {
    sstring _role;

-    bool _is_superuser;
-
-    bool _can_login;
-
    role_options _options;

    bool _if_not_exists;
@@ -70,6 +66,8 @@ public:
                , _if_not_exists(if_not_exists) {
    }

+    std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;
+
    future<> grant_permissions_to_creator(const service::client_state&) const;

    void validate(service::storage_proxy&, const service::client_state&) const override;
--- a/cql3/statements/create_table_statement.cc
+++ b/cql3/statements/create_table_statement.cc
@@ -135,7 +135,7 @@ schema_ptr create_table_statement::get_cf_meta_data(const database& db) const {
 void create_table_statement::apply_properties_to(schema_builder& builder, const database& db) const {
    auto&& columns = get_columns();
    for (auto&& column : columns) {
-        builder.with_column(column);
+        builder.with_column_ordered(column);
    }
 #if 0
    cfmd.defaultValidator(defaultValidator)
--- a/cql3/statements/drop_role_statement.hh
+++ b/cql3/statements/drop_role_statement.hh
@@ -60,6 +60,8 @@ public:
    drop_role_statement(const cql3::role_name& name, bool if_exists) : _role(name.to_string()), _if_exists(if_exists) {
    }

+    std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;
+
    virtual void validate(service::storage_proxy&, const service::client_state&) const override;

    virtual future<> check_access(service::storage_proxy& proxy, const service::client_state&) const override;
--- a/cql3/statements/grant_role_statement.hh
+++ b/cql3/statements/grant_role_statement.hh
@@ -61,6 +61,8 @@ public:
        : _role(name.to_string()), _grantee(grantee.to_string()) {
    }

+    std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;
+
    virtual future<> check_access(service::storage_proxy& proxy, const service::client_state&) const override;

    virtual future<::shared_ptr<cql_transport::messages::result_message>>
--- a/cql3/statements/grant_statement.cc
+++ b/cql3/statements/grant_statement.cc
@@ -42,6 +42,11 @@
 #include "grant_statement.hh"
 #include "auth/authorizer.hh"

+std::unique_ptr<cql3::statements::prepared_statement> cql3::statements::grant_statement::prepare(
+                database& db, cql_stats& stats) {
+    return std::make_unique<prepared_statement>(::make_shared<grant_statement>(*this));
+}
+
 future<::shared_ptr<cql_transport::messages::result_message>>
 cql3::statements::grant_statement::execute(service::storage_proxy& proxy, service::query_state& state, const query_options& options) const {
    auto& auth_service = *state.get_client_state().get_auth_service();
--- a/cql3/statements/grant_statement.hh
+++ b/cql3/statements/grant_statement.hh
@@ -51,6 +51,8 @@ class grant_statement : public permission_altering_statement {
 public:
    using permission_altering_statement::permission_altering_statement;

+    std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;
+
    future<::shared_ptr<cql_transport::messages::result_message>> execute(service::storage_proxy&
                    , service::query_state&
                    , const query_options&) const override;
--- a/cql3/statements/list_permissions_statement.cc
+++ b/cql3/statements/list_permissions_statement.cc
@@ -58,6 +58,11 @@ cql3::statements::list_permissions_statement::list_permissions_statement(
            , _recursive(recursive) {
 }

+std::unique_ptr<cql3::statements::prepared_statement> cql3::statements::list_permissions_statement::prepare(
+                database& db, cql_stats& stats) {
+    return std::make_unique<prepared_statement>(::make_shared<list_permissions_statement>(*this));
+}
+
 void cql3::statements::list_permissions_statement::validate(
        service::storage_proxy& proxy,
        const service::client_state& state) const {
@@ -105,14 +110,14 @@ cql3::statements::list_permissions_statement::execute(
        service::query_state& state,
        const query_options& options) const {
    static auto make_column = [](sstring name) {
-        return ::make_shared<column_specification>(
+        return make_lw_shared<column_specification>(
                auth::meta::AUTH_KS,
                "permissions",
                ::make_shared<column_identifier>(std::move(name), true),
                utf8_type);
    };

-    static thread_local const std::vector<::shared_ptr<column_specification>> metadata({
+    static thread_local const std::vector<lw_shared_ptr<column_specification>> metadata({
        make_column("role"), make_column("username"), make_column("resource"), make_column("permission")
    });

--- a/cql3/statements/list_permissions_statement.hh
+++ b/cql3/statements/list_permissions_statement.hh
@@ -61,6 +61,8 @@ private:
 public:
    list_permissions_statement(auth::permission_set, std::optional<auth::resource>, std::optional<sstring>, bool);

+    std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;
+
    void validate(service::storage_proxy&, const service::client_state&) const override;

    future<> check_access(service::storage_proxy& proxy, const service::client_state&) const override;
--- a/cql3/statements/list_roles_statement.hh
+++ b/cql3/statements/list_roles_statement.hh
@@ -62,6 +62,8 @@ public:
    list_roles_statement(const std::optional<role_name>& grantee, bool recursive)
        : _grantee(grantee ? sstring(grantee->to_string()) : std::optional<sstring>()), _recursive(recursive) {}

+    std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;
+
    virtual future<> check_access(service::storage_proxy& proxy, const service::client_state&) const override;

    virtual future<::shared_ptr<cql_transport::messages::result_message>>
--- a/cql3/statements/list_users_statement.cc
+++ b/cql3/statements/list_users_statement.cc
@@ -45,6 +45,11 @@
 #include "auth/common.hh"
 #include "transport/messages/result_message.hh"

+std::unique_ptr<cql3::statements::prepared_statement> cql3::statements::list_users_statement::prepare(
+                database& db, cql_stats& stats) {
+    return std::make_unique<prepared_statement>(::make_shared<list_users_statement>(*this));
+}
+
 void cql3::statements::list_users_statement::validate(service::storage_proxy& proxy, const service::client_state& state) const {
 }

@@ -58,7 +63,7 @@ cql3::statements::list_users_statement::execute(service::storage_proxy& proxy, s
    static const sstring virtual_table_name("users");

    static const auto make_column_spec = [](const sstring& name, const ::shared_ptr<const abstract_type>& ty) {
-        return ::make_shared<column_specification>(
+        return make_lw_shared<column_specification>(
            auth::meta::AUTH_KS,
            virtual_table_name,
            ::make_shared<column_identifier>(name, true),
@@ -66,7 +71,7 @@ cql3::statements::list_users_statement::execute(service::storage_proxy& proxy, s
    };

    static thread_local const auto metadata = ::make_shared<cql3::metadata>(
-        std::vector<::shared_ptr<column_specification>>{
+        std::vector<lw_shared_ptr<column_specification>>{
                make_column_spec("name", utf8_type),
                make_column_spec("super", boolean_type)});

--- a/cql3/statements/list_users_statement.hh
+++ b/cql3/statements/list_users_statement.hh
@@ -49,6 +49,9 @@ namespace statements {

 class list_users_statement : public authentication_statement {
 public:
+
+    std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;
+
    void validate(service::storage_proxy&, const service::client_state&) const override;
    future<> check_access(service::storage_proxy& proxy, const service::client_state&) const override;
    future<::shared_ptr<cql_transport::messages::result_message>> execute(service::storage_proxy&
--- a/cql3/statements/modification_statement.cc
+++ b/cql3/statements/modification_statement.cc
@@ -213,7 +213,7 @@ bool modification_statement::applies_to(const update_parameters::prefetch_data::
        return row == nullptr;
    }

-    auto condition_applies = [&row, &options](const shared_ptr<column_condition>& cond) {
+    auto condition_applies = [&row, &options](const lw_shared_ptr<column_condition>& cond) {
        const data_value* value = nullptr;
        if (row != nullptr) {
            auto it = row->cells.find(cond->column.ordinal_id);
@@ -322,7 +322,7 @@ modification_statement::execute_without_condition(service::storage_proxy& proxy,
 future<::shared_ptr<cql_transport::messages::result_message>>
 modification_statement::execute_with_condition(service::storage_proxy& proxy, service::query_state& qs, const query_options& options) const {

-    auto cl_for_commit = options.get_consistency();
+    auto cl_for_learn = options.get_consistency();
    auto cl_for_paxos = options.check_serial_consistency();
    db::timeout_clock::time_point now = db::timeout_clock::now();
    const timeout_config& cfg = options.get_timeout_config();
@@ -346,7 +346,7 @@ modification_statement::execute_with_condition(service::storage_proxy& proxy, se
    request->add_row_update(*this, std::move(ranges), std::move(json_cache), options);

    auto shard = service::storage_proxy::cas_shard(*s, request->key()[0].start()->value().as_decorated_key().token());
-    if (shard != engine().cpu_id()) {
+    if (shard != this_shard_id()) {
        proxy.get_stats().replica_cross_shard_ops++;
        return make_ready_future<shared_ptr<cql_transport::messages::result_message>>(
                make_shared<cql_transport::messages::result_message::bounce_to_shard>(shard));
@@ -354,7 +354,7 @@ modification_statement::execute_with_condition(service::storage_proxy& proxy, se

    return proxy.cas(s, request, request->read_command(), request->key(),
            {read_timeout, qs.get_permit(), qs.get_client_state(), qs.get_trace_state()},
-            cl_for_paxos, cl_for_commit, statement_timeout, cas_timeout).then([this, request] (bool is_applied) {
+            cl_for_paxos, cl_for_learn, statement_timeout, cas_timeout).then([this, request] (bool is_applied) {
        return build_cas_result_set(_metadata, _columns_of_cas_result_set, is_applied, request->rows());
    });
 }
@@ -408,9 +408,9 @@ modification_statement::build_cas_result_set(seastar::shared_ptr<cql3::metadata>

 void modification_statement::build_cas_result_set_metadata() {

-    std::vector<shared_ptr<column_specification>> columns;
+    std::vector<lw_shared_ptr<column_specification>> columns;
    // Add the mandatory [applied] column to result set metadata
-    auto applied = seastar::make_shared<cql3::column_specification>(s->ks_name(), s->cf_name(),
+    auto applied = make_lw_shared<cql3::column_specification>(s->ks_name(), s->cf_name(),
            make_shared<cql3::column_identifier>("[applied]", false), boolean_type);

    columns.push_back(applied);
@@ -647,7 +647,7 @@ void modification_statement::inc_cql_stats(bool is_internal) const {
    ++_stats.query_cnt(src_sel, _ks_sel, cond_sel, type);
 }

-void modification_statement::add_condition(::shared_ptr<column_condition> cond) {
+void modification_statement::add_condition(lw_shared_ptr<column_condition> cond) {
    if (cond->column.is_static()) {
        _has_static_column_conditions = true;
        _static_conditions.emplace_back(std::move(cond));
--- a/Show More
+++ b/Show More