release: prepare for 4.3.2

Update seastar submodule
* seastar 69f8394742...b70b444924 (1): > io_queue: Fix "delay" metrics Fixes #8166.
2021-03-01 22:04:21 +02:00 · 2021-03-01 13:57:57 +02:00 · 2021-03-01 12:20:57 +02:00 · 2021-03-01 12:19:40 +02:00 · 2021-02-28 16:42:43 +02:00 · 2021-02-23 12:13:51 +02:00
83 changed files with 1756 additions and 428 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "seastar"]
 	path = seastar
-	url = ../seastar
+	url = ../scylla-seastar
 	ignore = dirty
 [submodule "swagger-ui"]
 	path = swagger-ui
--- a/2
+++ b/2
@@ -1,7 +1,7 @@
 #!/bin/sh

 PRODUCT=scylla
-VERSION=666.development
+VERSION=4.3.2

 if test -f version
 then
--- a/alternator/conditions.cc
+++ b/alternator/conditions.cc
@@ -159,23 +159,40 @@ static bool check_NE(const rjson::value* v1, const rjson::value& v2) {
 }

 // Check if two JSON-encoded values match with the BEGINS_WITH relation
-static bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2) {
-    // BEGINS_WITH requires that its single operand (v2) be a string or
-    // binary - otherwise it's a validation error. However, problems with
-    // the stored attribute (v1) will just return false (no match).
-    if (!v2.IsObject() || v2.MemberCount() != 1) {
-        throw api_error::validation(format("BEGINS_WITH operator encountered malformed AttributeValue: {}", v2));
-    }
-    auto it2 = v2.MemberBegin();
-    if (it2->name != "S" && it2->name != "B") {
-        throw api_error::validation(format("BEGINS_WITH operator requires String or Binary type in AttributeValue, got {}", it2->name));
-    }
-
-
+bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2,
+                       bool v1_from_query, bool v2_from_query) {
+    bool bad = false;
    if (!v1 || !v1->IsObject() || v1->MemberCount() != 1) {
+        if (v1_from_query) {
+            throw api_error::validation("begins_with() encountered malformed argument");
+        } else {
+            bad = true;
+        }
+    } else if (v1->MemberBegin()->name != "S" && v1->MemberBegin()->name != "B") {
+        if (v1_from_query) {
+            throw api_error::validation(format("begins_with supports only string or binary type, got: {}", *v1));
+        } else {
+            bad = true;
+        }
+    }
+    if (!v2.IsObject() || v2.MemberCount() != 1) {
+        if (v2_from_query) {
+            throw api_error::validation("begins_with() encountered malformed argument");
+        } else {
+            bad = true;
+        }
+    } else if (v2.MemberBegin()->name != "S" && v2.MemberBegin()->name != "B") {
+        if (v2_from_query) {
+            throw api_error::validation(format("begins_with() supports only string or binary type, got: {}", v2));
+        } else {
+            bad = true;
+        }
+    }
+    if (bad) {
        return false;
    }
    auto it1 = v1->MemberBegin();
+    auto it2 = v2.MemberBegin();
    if (it1->name != it2->name) {
        return false;
    }
@@ -279,24 +296,38 @@ static bool check_NOT_NULL(const rjson::value* val) {
    return val != nullptr;
 }

+// Only types S, N or B (string, number or bytes) may be compared by the
+// various comparion operators - lt, le, gt, ge, and between.
+static bool check_comparable_type(const rjson::value& v) {
+    if (!v.IsObject() || v.MemberCount() != 1) {
+        return false;
+    }
+    const rjson::value& type = v.MemberBegin()->name;
+    return type == "S" || type == "N" || type == "B";
+}
+
 // Check if two JSON-encoded values match with cmp.
 template <typename Comparator>
-bool check_compare(const rjson::value* v1, const rjson::value& v2, const Comparator& cmp) {
-    if (!v2.IsObject() || v2.MemberCount() != 1) {
-        throw api_error::validation(
-                        format("{} requires a single AttributeValue of type String, Number, or Binary",
-                               cmp.diagnostic));
+bool check_compare(const rjson::value* v1, const rjson::value& v2, const Comparator& cmp,
+                   bool v1_from_query, bool v2_from_query) {
+    bool bad = false;
+    if (!v1 || !check_comparable_type(*v1)) {
+        if (v1_from_query) {
+            throw api_error::validation(format("{} allow only the types String, Number, or Binary", cmp.diagnostic));
+        }
+        bad = true;
    }
-    const auto& kv2 = *v2.MemberBegin();
-    if (kv2.name != "S" && kv2.name != "N" && kv2.name != "B") {
-        throw api_error::validation(
-                        format("{} requires a single AttributeValue of type String, Number, or Binary",
-                               cmp.diagnostic));
+    if (!check_comparable_type(v2)) {
+        if (v2_from_query) {
+            throw api_error::validation(format("{} allow only the types String, Number, or Binary", cmp.diagnostic));
+        }
+        bad = true;
    }
-    if (!v1 || !v1->IsObject() || v1->MemberCount() != 1) {
+    if (bad) {
        return false;
    }
    const auto& kv1 = *v1->MemberBegin();
+    const auto& kv2 = *v2.MemberBegin();
    if (kv1.name != kv2.name) {
        return false;
    }
@@ -310,7 +341,8 @@ bool check_compare(const rjson::value* v1, const rjson::value& v2, const Compara
    if (kv1.name == "B") {
        return cmp(base64_decode(kv1.value), base64_decode(kv2.value));
    }
-    clogger.error("check_compare panic: LHS type equals RHS type, but one is in {N,S,B} while the other isn't");
+    // cannot reach here, as check_comparable_type() verifies the type is one
+    // of the above options.
    return false;
 }

@@ -341,56 +373,71 @@ struct cmp_gt {
    static constexpr const char* diagnostic = "GT operator";
 };

-// True if v is between lb and ub, inclusive.  Throws if lb > ub.
+// True if v is between lb and ub, inclusive.  Throws or returns false
+// (depending on bounds_from_query parameter) if lb > ub.
 template <typename T>
-static bool check_BETWEEN(const T& v, const T& lb, const T& ub) {
+static bool check_BETWEEN(const T& v, const T& lb, const T& ub, bool bounds_from_query) {
    if (cmp_lt()(ub, lb)) {
-        throw api_error::validation(
-                        format("BETWEEN operator requires lower_bound <= upper_bound, but {} > {}", lb, ub));
+        if (bounds_from_query) {
+            throw api_error::validation(
+                format("BETWEEN operator requires lower_bound <= upper_bound, but {} > {}", lb, ub));
+        } else {
+            return false;
+        }
    }
    return cmp_ge()(v, lb) && cmp_le()(v, ub);
 }

-static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const rjson::value& ub) {
-    if (!v) {
+static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const rjson::value& ub,
+                          bool v_from_query, bool lb_from_query, bool ub_from_query) {
+    if ((v && v_from_query && !check_comparable_type(*v)) ||
+        (lb_from_query && !check_comparable_type(lb)) ||
+        (ub_from_query && !check_comparable_type(ub))) {
+        throw api_error::validation("between allow only the types String, Number, or Binary");
+
+    }
+    if (!v || !v->IsObject() || v->MemberCount() != 1 ||
+        !lb.IsObject() || lb.MemberCount() != 1 ||
+        !ub.IsObject() || ub.MemberCount() != 1) {
        return false;
    }
-    if (!v->IsObject() || v->MemberCount() != 1) {
-        throw api_error::validation(format("BETWEEN operator encountered malformed AttributeValue: {}", *v));
-    }
-    if (!lb.IsObject() || lb.MemberCount() != 1) {
-        throw api_error::validation(format("BETWEEN operator encountered malformed AttributeValue: {}", lb));
-    }
-    if (!ub.IsObject() || ub.MemberCount() != 1) {
-        throw api_error::validation(format("BETWEEN operator encountered malformed AttributeValue: {}", ub));
-    }

    const auto& kv_v = *v->MemberBegin();
    const auto& kv_lb = *lb.MemberBegin();
    const auto& kv_ub = *ub.MemberBegin();
+    bool bounds_from_query = lb_from_query && ub_from_query;
    if (kv_lb.name != kv_ub.name) {
-        throw api_error::validation(
+        if (bounds_from_query) {
+           throw api_error::validation(
                format("BETWEEN operator requires the same type for lower and upper bound; instead got {} and {}",
                       kv_lb.name, kv_ub.name));
+        } else {
+            return false;
+        }
    }
    if (kv_v.name != kv_lb.name) { // Cannot compare different types, so v is NOT between lb and ub.
        return false;
    }
    if (kv_v.name == "N") {
        const char* diag = "BETWEEN operator";
-        return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag));
+        return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag), bounds_from_query);
    }
    if (kv_v.name == "S") {
        return check_BETWEEN(std::string_view(kv_v.value.GetString(), kv_v.value.GetStringLength()),
                             std::string_view(kv_lb.value.GetString(), kv_lb.value.GetStringLength()),
-                             std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()));
+                             std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()),
+                             bounds_from_query);
    }
    if (kv_v.name == "B") {
-        return check_BETWEEN(base64_decode(kv_v.value), base64_decode(kv_lb.value), base64_decode(kv_ub.value));
+        return check_BETWEEN(base64_decode(kv_v.value), base64_decode(kv_lb.value), base64_decode(kv_ub.value), bounds_from_query);
    }
-    throw api_error::validation(
-        format("BETWEEN operator requires AttributeValueList elements to be of type String, Number, or Binary; instead got {}",
+    if (v_from_query) {
+        throw api_error::validation(
+            format("BETWEEN operator requires AttributeValueList elements to be of type String, Number, or Binary; instead got {}",
               kv_lb.name));
+    } else {
+        return false;
+    }
 }

 // Verify one Expect condition on one attribute (whose content is "got")
@@ -437,19 +484,19 @@ static bool verify_expected_one(const rjson::value& condition, const rjson::valu
            return check_NE(got, (*attribute_value_list)[0]);
        case comparison_operator_type::LT:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_lt{});
+            return check_compare(got, (*attribute_value_list)[0], cmp_lt{}, false, true);
        case comparison_operator_type::LE:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_le{});
+            return check_compare(got, (*attribute_value_list)[0], cmp_le{}, false, true);
        case comparison_operator_type::GT:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_gt{});
+            return check_compare(got, (*attribute_value_list)[0], cmp_gt{}, false, true);
        case comparison_operator_type::GE:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_ge{});
+            return check_compare(got, (*attribute_value_list)[0], cmp_ge{}, false, true);
        case comparison_operator_type::BEGINS_WITH:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_BEGINS_WITH(got, (*attribute_value_list)[0]);
+            return check_BEGINS_WITH(got, (*attribute_value_list)[0], false, true);
        case comparison_operator_type::IN:
            verify_operand_count(attribute_value_list, nonempty(), *comparison_operator);
            return check_IN(got, *attribute_value_list);
@@ -461,7 +508,8 @@ static bool verify_expected_one(const rjson::value& condition, const rjson::valu
            return check_NOT_NULL(got);
        case comparison_operator_type::BETWEEN:
            verify_operand_count(attribute_value_list, exact_size(2), *comparison_operator);
-            return check_BETWEEN(got, (*attribute_value_list)[0], (*attribute_value_list)[1]);
+            return check_BETWEEN(got, (*attribute_value_list)[0], (*attribute_value_list)[1],
+                                 false, true, true);
        case comparison_operator_type::CONTAINS:
            {
                verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
@@ -573,7 +621,8 @@ static bool calculate_primitive_condition(const parsed::primitive_condition& con
            // Shouldn't happen unless we have a bug in the parser
            throw std::logic_error(format("Wrong number of values {} in BETWEEN primitive_condition", cond._values.size()));
        }
-        return check_BETWEEN(&calculated_values[0], calculated_values[1], calculated_values[2]);
+        return check_BETWEEN(&calculated_values[0], calculated_values[1], calculated_values[2],
+                             cond._values[0].is_constant(), cond._values[1].is_constant(), cond._values[2].is_constant());
    case parsed::primitive_condition::type::IN:
        return check_IN(calculated_values);
    case parsed::primitive_condition::type::VALUE:
@@ -604,13 +653,17 @@ static bool calculate_primitive_condition(const parsed::primitive_condition& con
    case parsed::primitive_condition::type::NE:
        return check_NE(&calculated_values[0], calculated_values[1]);
    case parsed::primitive_condition::type::GT:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_gt{});
+        return check_compare(&calculated_values[0], calculated_values[1], cmp_gt{},
+            cond._values[0].is_constant(), cond._values[1].is_constant());
    case parsed::primitive_condition::type::GE:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_ge{});
+        return check_compare(&calculated_values[0], calculated_values[1], cmp_ge{},
+            cond._values[0].is_constant(), cond._values[1].is_constant());
    case parsed::primitive_condition::type::LT:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_lt{});
+        return check_compare(&calculated_values[0], calculated_values[1], cmp_lt{},
+            cond._values[0].is_constant(), cond._values[1].is_constant());
    case parsed::primitive_condition::type::LE:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_le{});
+        return check_compare(&calculated_values[0], calculated_values[1], cmp_le{},
+            cond._values[0].is_constant(), cond._values[1].is_constant());
    default:
        // Shouldn't happen unless we have a bug in the parser
        throw std::logic_error(format("Unknown type {} in primitive_condition object", (int)(cond._op)));
--- a/alternator/conditions.hh
+++ b/alternator/conditions.hh
@@ -52,6 +52,7 @@ bool verify_expected(const rjson::value& req, const rjson::value* previous_item)
 bool verify_condition(const rjson::value& condition, bool require_all, const rjson::value* previous_item);

 bool check_CONTAINS(const rjson::value* v1, const rjson::value& v2);
+bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2, bool v1_from_query, bool v2_from_query);

 bool verify_condition_expression(
        const parsed::condition_expression& condition_expression,
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
@@ -1881,7 +1881,8 @@ static std::string get_item_type_string(const rjson::value& v) {

 // calculate_attrs_to_get() takes either AttributesToGet or
 // ProjectionExpression parameters (having both is *not* allowed),
-// and returns the list of cells we need to read.
+// and returns the list of cells we need to read, or an empty set when
+// *all* attributes are to be returned.
 // In our current implementation, only top-level attributes are stored
 // as cells, and nested documents are stored serialized as JSON.
 // So this function currently returns only the the top-level attributes
@@ -2243,19 +2244,30 @@ update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::t
                    rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item.get());
                    rjson::value v2 = calculate_value(addition, calculate_value_caller::UpdateExpression, previous_item.get());
                    rjson::value result;
-                    std::string v1_type = get_item_type_string(v1);
-                    if (v1_type == "N") {
-                        if (get_item_type_string(v2) != "N") {
-                            throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
+                    // An ADD can be used to create a new attribute (when
+                    // v1.IsNull()) or to add to a pre-existing attribute:
+                    if (v1.IsNull()) {
+                        std::string v2_type = get_item_type_string(v2);
+                        if (v2_type == "N" || v2_type == "SS" || v2_type == "NS" || v2_type == "BS") {
+                            result = v2;
+                        } else {
+                            throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v2));
                        }
-                        result = number_add(v1, v2);
-                    } else if (v1_type == "SS" || v1_type == "NS" || v1_type == "BS") {
-                        if (get_item_type_string(v2) != v1_type) {
-                            throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
-                        }
-                        result = set_sum(v1, v2);
                    } else {
-                        throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v1));
+                        std::string v1_type = get_item_type_string(v1);
+                        if (v1_type == "N") {
+                            if (get_item_type_string(v2) != "N") {
+                                throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
+                            }
+                            result = number_add(v1, v2);
+                        } else if (v1_type == "SS" || v1_type == "NS" || v1_type == "BS") {
+                            if (get_item_type_string(v2) != v1_type) {
+                                throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
+                            }
+                            result = set_sum(v1, v2);
+                        } else {
+                            throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v1));
+                        }
                    }
                    do_update(to_bytes(column_name), result);
                },
@@ -2571,6 +2583,10 @@ public:
            std::unordered_set<std::string>& used_attribute_values);
    bool check(const rjson::value& item) const;
    bool filters_on(std::string_view attribute) const;
+    // for_filters_on() runs the given function on the attributes that the
+    // filter works on. It may run for the same attribute more than once if
+    // used more than once in the filter.
+    void for_filters_on(const noncopyable_function<void(std::string_view)>& func) const;
    operator bool() const { return bool(_imp); }
 };

@@ -2651,10 +2667,26 @@ bool filter::filters_on(std::string_view attribute) const {
    }, *_imp);
 }

+void filter::for_filters_on(const noncopyable_function<void(std::string_view)>& func) const {
+    if (_imp) {
+        std::visit(overloaded_functor {
+            [&] (const conditions_filter& f) -> void {
+                for (auto it = f.conditions.MemberBegin(); it != f.conditions.MemberEnd(); ++it) {
+                    func(rjson::to_string_view(it->name));
+                }
+            },
+            [&] (const expression_filter& f) -> void {
+                return for_condition_expression_on(f.expression, func);
+            }
+        }, *_imp);
+    }
+}
+
 class describe_items_visitor {
    typedef std::vector<const column_definition*> columns_t;
    const columns_t& _columns;
    const std::unordered_set<std::string>& _attrs_to_get;
+    std::unordered_set<std::string> _extra_filter_attrs;
    const filter& _filter;
    typename columns_t::const_iterator _column_it;
    rjson::value _item;
@@ -2670,7 +2702,20 @@ public:
            , _item(rjson::empty_object())
            , _items(rjson::empty_array())
            , _scanned_count(0)
-    { }
+    {
+        // _filter.check() may need additional attributes not listed in
+        // _attrs_to_get (i.e., not requested as part of the output).
+        // We list those in _extra_filter_attrs. We will include them in
+        // the JSON but take them out before finally returning the JSON.
+        if (!_attrs_to_get.empty()) {
+            _filter.for_filters_on([&] (std::string_view attr) {
+                std::string a(attr); // no heterogenous maps searches :-(
+                if (!_attrs_to_get.contains(a)) {
+                    _extra_filter_attrs.emplace(std::move(a));
+                }
+            });
+        }
+    }

    void start_row() {
        _column_it = _columns.begin();
@@ -2684,7 +2729,7 @@ public:
        result_bytes_view->with_linearized([this] (bytes_view bv) {
            std::string column_name = (*_column_it)->name_as_text();
            if (column_name != executor::ATTRS_COLUMN_NAME) {
-                if (_attrs_to_get.empty() || _attrs_to_get.contains(column_name)) {
+                if (_attrs_to_get.empty() || _attrs_to_get.contains(column_name) || _extra_filter_attrs.contains(column_name)) {
                    if (!_item.HasMember(column_name.c_str())) {
                        rjson::set_with_string_name(_item, column_name, rjson::empty_object());
                    }
@@ -2696,7 +2741,7 @@ public:
                auto keys_and_values = value_cast<map_type_impl::native_type>(deserialized);
                for (auto entry : keys_and_values) {
                    std::string attr_name = value_cast<sstring>(entry.first);
-                    if (_attrs_to_get.empty() || _attrs_to_get.contains(attr_name)) {
+                    if (_attrs_to_get.empty() || _attrs_to_get.contains(attr_name) || _extra_filter_attrs.contains(attr_name)) {
                        bytes value = value_cast<bytes>(entry.second);
                        rjson::set_with_string_name(_item, attr_name, deserialize_item(value));
                    }
@@ -2708,6 +2753,11 @@ public:

    void end_row() {
        if (_filter.check(_item)) {
+            // Remove the extra attributes _extra_filter_attrs which we had
+            // to add just for the filter, and not requested to be returned:
+            for (const auto& attr : _extra_filter_attrs) {
+                rjson::remove_member(_item, attr);
+            }
            rjson::push_back(_items, std::move(_item));
        }
        _item = rjson::empty_object();
@@ -2742,7 +2792,7 @@ static rjson::value encode_paging_state(const schema& schema, const service::pag
    for (const column_definition& cdef : schema.partition_key_columns()) {
        rjson::set_with_string_name(last_evaluated_key, std::string_view(cdef.name_as_text()), rjson::empty_object());
        rjson::value& key_entry = last_evaluated_key[cdef.name_as_text()];
-        rjson::set_with_string_name(key_entry, type_to_string(cdef.type), rjson::parse(to_json_string(*cdef.type, *exploded_pk_it)));
+        rjson::set_with_string_name(key_entry, type_to_string(cdef.type), json_key_column_value(*exploded_pk_it, cdef));
        ++exploded_pk_it;
    }
    auto ck = paging_state.get_clustering_key();
@@ -2752,7 +2802,7 @@ static rjson::value encode_paging_state(const schema& schema, const service::pag
        for (const column_definition& cdef : schema.clustering_key_columns()) {
            rjson::set_with_string_name(last_evaluated_key, std::string_view(cdef.name_as_text()), rjson::empty_object());
            rjson::value& key_entry = last_evaluated_key[cdef.name_as_text()];
-            rjson::set_with_string_name(key_entry, type_to_string(cdef.type), rjson::parse(to_json_string(*cdef.type, *exploded_ck_it)));
+            rjson::set_with_string_name(key_entry, type_to_string(cdef.type), json_key_column_value(*exploded_ck_it, cdef));
            ++exploded_ck_it;
        }
    }
--- a/alternator/expressions.cc
+++ b/alternator/expressions.cc
@@ -348,6 +348,39 @@ bool condition_expression_on(const parsed::condition_expression& ce, std::string
    }, ce._expression);
 }

+// for_condition_expression_on() runs a given function over all the attributes
+// mentioned in the expression. If the same attribute is mentioned more than
+// once, the function will be called more than once for the same attribute.
+
+static void for_value_on(const parsed::value& v, const noncopyable_function<void(std::string_view)>& func) {
+    std::visit(overloaded_functor {
+        [&] (const parsed::constant& c) { },
+        [&] (const parsed::value::function_call& f) {
+            for (const parsed::value& value : f._parameters) {
+                for_value_on(value, func);
+            }
+        },
+        [&] (const parsed::path& p) {
+            func(p.root());
+        }
+    }, v._value);
+}
+
+void for_condition_expression_on(const parsed::condition_expression& ce, const noncopyable_function<void(std::string_view)>& func) {
+    std::visit(overloaded_functor {
+        [&] (const parsed::primitive_condition& cond) {
+            for (const parsed::value& value : cond._values) {
+                for_value_on(value, func);
+            }
+        },
+        [&] (const parsed::condition_expression::condition_list& list) {
+            for (const parsed::condition_expression& cond : list.conditions) {
+                for_condition_expression_on(cond, func);
+            }
+        }
+    }, ce._expression);
+}
+
 // The following calculate_value() functions calculate, or evaluate, a parsed
 // expression. The parsed expression is assumed to have been "resolved", with
 // the matching resolve_* function.
@@ -570,52 +603,8 @@ std::unordered_map<std::string_view, function_handler_type*> function_handlers {
            }
            rjson::value v1 = calculate_value(f._parameters[0], caller, previous_item);
            rjson::value v2 = calculate_value(f._parameters[1], caller, previous_item);
-            // TODO: There's duplication here with check_BEGINS_WITH().
-            // But unfortunately, the two functions differ a bit.
-
-            // If one of v1 or v2 is malformed or has an unsupported type
-            // (not B or S), what we do depends on whether it came from
-            // the user's query (is_constant()), or the item. Unsupported
-            // values in the query result in an error, but if they are in
-            // the item, we silently return false (no match).
-            bool bad = false;
-            if (!v1.IsObject() || v1.MemberCount() != 1) {
-                bad = true;
-                if (f._parameters[0].is_constant()) {
-                    throw api_error::validation(format("{}: begins_with() encountered malformed AttributeValue: {}", caller, v1));
-                }
-            } else if (v1.MemberBegin()->name != "S" && v1.MemberBegin()->name != "B") {
-                bad = true;
-                if (f._parameters[0].is_constant()) {
-                    throw api_error::validation(format("{}: begins_with() supports only string or binary in AttributeValue: {}", caller, v1));
-                }
-            }
-            if (!v2.IsObject() || v2.MemberCount() != 1) {
-                bad = true;
-                if (f._parameters[1].is_constant()) {
-                    throw api_error::validation(format("{}: begins_with() encountered malformed AttributeValue: {}", caller, v2));
-                }
-            } else if (v2.MemberBegin()->name != "S" && v2.MemberBegin()->name != "B") {
-                bad = true;
-                if (f._parameters[1].is_constant()) {
-                    throw api_error::validation(format("{}: begins_with() supports only string or binary in AttributeValue: {}", caller, v2));
-                }
-            }
-            bool ret = false;
-            if (!bad) {
-                auto it1 = v1.MemberBegin();
-                auto it2 = v2.MemberBegin();
-                if (it1->name == it2->name) {
-                    if (it2->name == "S") {
-                        std::string_view val1 = rjson::to_string_view(it1->value);
-                        std::string_view val2 = rjson::to_string_view(it2->value);
-                        ret = val1.starts_with(val2);
-                    } else /* it2->name == "B" */ {
-                        ret = base64_begins_with(rjson::to_string_view(it1->value), rjson::to_string_view(it2->value));
-                    }
-                }
-            }
-            return to_bool_json(ret);
+            return to_bool_json(check_BEGINS_WITH(v1.IsNull() ? nullptr : &v1,  v2,
+                                    f._parameters[0].is_constant(), f._parameters[1].is_constant()));
        }
    },
    {"contains", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
--- a/alternator/expressions.hh
+++ b/alternator/expressions.hh
@@ -27,6 +27,8 @@
 #include <unordered_set>
 #include <string_view>

+#include <seastar/util/noncopyable_function.hh>
+
 #include "expressions_types.hh"
 #include "utils/rjson.hh"

@@ -59,6 +61,11 @@ void validate_value(const rjson::value& v, const char* caller);

 bool condition_expression_on(const parsed::condition_expression& ce, std::string_view attribute);

+// for_condition_expression_on() runs the given function on the attributes
+// that the expression uses. It may run for the same attribute more than once
+// if the same attribute is used more than once in the expression.
+void for_condition_expression_on(const parsed::condition_expression& ce, const noncopyable_function<void(std::string_view)>& func);
+
 // calculate_value() behaves slightly different (especially, different
 // functions supported) when used in different types of expressions, as
 // enumerated in this enum:
--- a/alternator/streams.cc
+++ b/alternator/streams.cc
@@ -849,6 +849,7 @@ future<executor::request_return_type> executor::get_records(client_state& client

    static const bytes timestamp_column_name = cdc::log_meta_column_name_bytes("time");
    static const bytes op_column_name = cdc::log_meta_column_name_bytes("operation");
+    static const bytes eor_column_name = cdc::log_meta_column_name_bytes("end_of_batch");

    auto key_names = boost::copy_range<std::unordered_set<std::string>>(
        boost::range::join(std::move(base->partition_key_columns()), std::move(base->clustering_key_columns()))
@@ -872,7 +873,7 @@ future<executor::request_return_type> executor::get_records(client_state& client
    std::transform(cks.begin(), cks.end(), std::back_inserter(columns), [](auto& c) { return &c; });

    auto regular_columns = boost::copy_range<query::column_id_vector>(schema->regular_columns() 
-        | boost::adaptors::filtered([](const column_definition& cdef) { return cdef.name() == op_column_name || !cdc::is_cdc_metacolumn_name(cdef.name_as_text()); })
+        | boost::adaptors::filtered([](const column_definition& cdef) { return cdef.name() == op_column_name || cdef.name() == eor_column_name || !cdc::is_cdc_metacolumn_name(cdef.name_as_text()); })
        | boost::adaptors::transformed([&] (const column_definition& cdef) { columns.emplace_back(&cdef); return cdef.id; })
    );

@@ -905,6 +906,11 @@ future<executor::request_return_type> executor::get_records(client_state& client
                return cdef->name->name() == timestamp_column_name;
            })
        );
+        auto eor_index = std::distance(metadata.get_names().begin(), 
+            std::find_if(metadata.get_names().begin(), metadata.get_names().end(), [](const lw_shared_ptr<cql3::column_specification>& cdef) {
+                return cdef->name->name() == eor_column_name;
+            })
+        );

        std::optional<utils::UUID> timestamp;
        auto dynamodb = rjson::empty_object();
@@ -930,15 +936,7 @@ future<executor::request_return_type> executor::get_records(client_state& client
        for (auto& row : result_set->rows()) {
            auto op = static_cast<cdc::operation>(value_cast<op_utype>(data_type_for<op_utype>()->deserialize(*row[op_index])));
            auto ts = value_cast<utils::UUID>(data_type_for<utils::UUID>()->deserialize(*row[ts_index]));
-
-            if (timestamp && timestamp != ts) {
-                maybe_add_record();
-                if (limit == 0) {
-                    break;
-                }
-            }
-
-            timestamp = ts;
+            auto eor = row[eor_index].has_value() ? value_cast<bool>(boolean_type->deserialize(*row[eor_index])) : false;

            if (!dynamodb.HasMember("Keys")) {
                auto keys = rjson::empty_object();
@@ -991,9 +989,13 @@ future<executor::request_return_type> executor::get_records(client_state& client
                rjson::set(record, "eventName", "REMOVE");
                break;
            }
-        }
-        if (limit > 0 && timestamp) {
-            maybe_add_record();
+            if (eor) {
+                maybe_add_record();
+                timestamp = ts;
+                if (limit == 0) {
+                    break;
+                }
+            }
        }

        auto ret = rjson::empty_object();
@@ -1047,6 +1049,9 @@ void executor::add_stream_options(const rjson::value& stream_specification, sche
        if (!db.features().cluster_supports_cdc()) {
            throw api_error::validation("StreamSpecification: streams (CDC) feature not enabled in cluster.");
        }
+        if (!db.features().cluster_supports_alternator_streams()) {
+            throw api_error::validation("StreamSpecification: alternator streams feature not enabled in cluster.");
+        }

        cdc::options opts;
        opts.enabled(true);
--- a/api/column_family.cc
+++ b/api/column_family.cc
@@ -656,7 +656,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->filter_size();
+                return s + sst->filter_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -664,7 +664,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->filter_size();
+                return s + sst->filter_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -672,7 +672,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->filter_memory_size();
+                return s + sst->filter_memory_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -680,7 +680,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->filter_memory_size();
+                return s + sst->filter_memory_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -688,7 +688,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->get_summary().memory_footprint();
+                return s + sst->get_summary().memory_footprint();
            });
        }, std::plus<uint64_t>());
    });
@@ -696,7 +696,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->get_summary().memory_footprint();
+                return s + sst->get_summary().memory_footprint();
            });
        }, std::plus<uint64_t>());
    });
--- a/cdc/cdc_extension.hh
+++ b/cdc/cdc_extension.hh
@@ -20,10 +20,16 @@

 #pragma once

+#include <map>
+
+#include <seastar/core/sstring.hh>
+
+#include "bytes.hh"
 #include "serializer.hh"
 #include "db/extensions.hh"
 #include "cdc/cdc_options.hh"
 #include "schema.hh"
+#include "serializer_impl.hh"

 namespace cdc {

--- a/cdc/generation.cc
+++ b/cdc/generation.cc
@@ -23,6 +23,7 @@
 #include <random>
 #include <unordered_set>
 #include <seastar/core/sleep.hh>
+#include <algorithm>

 #include "keys.hh"
 #include "schema_builder.hh"
@@ -174,10 +175,29 @@ bool topology_description::operator==(const topology_description& o) const {
    return _entries == o._entries;
 }

-const std::vector<token_range_description>& topology_description::entries() const {
+const std::vector<token_range_description>& topology_description::entries() const& {
    return _entries;
 }

+std::vector<token_range_description>&& topology_description::entries() && {
+    return std::move(_entries);
+}
+
+static std::vector<stream_id> create_stream_ids(
+        size_t index, dht::token start, dht::token end, size_t shard_count, uint8_t ignore_msb) {
+    std::vector<stream_id> result;
+    result.reserve(shard_count);
+    dht::sharder sharder(shard_count, ignore_msb);
+    for (size_t shard_idx = 0; shard_idx < shard_count; ++shard_idx) {
+        auto t = dht::find_first_token_for_shard(sharder, start, end, shard_idx);
+        // compose the id from token and the "index" of the range end owning vnode
+        // as defined by token sort order. Basically grouping within this
+        // shard set.
+        result.emplace_back(stream_id(t, index));
+    }
+    return result;
+}
+
 class topology_description_generator final {
    const db::config& _cfg;
    const std::unordered_set<dht::token>& _bootstrap_tokens;
@@ -217,18 +237,9 @@ class topology_description_generator final {
        desc.token_range_end = end;

        auto [shard_count, ignore_msb] = get_sharding_info(end);
-        desc.streams.reserve(shard_count);
+        desc.streams = create_stream_ids(index, start, end, shard_count, ignore_msb);
        desc.sharding_ignore_msb = ignore_msb;

-        dht::sharder sharder(shard_count, ignore_msb);
-        for (size_t shard_idx = 0; shard_idx < shard_count; ++shard_idx) {
-            auto t = dht::find_first_token_for_shard(sharder, start, end, shard_idx);
-            // compose the id from token and the "index" of the range end owning vnode
-            // as defined by token sort order. Basically grouping within this
-            // shard set.
-            desc.streams.emplace_back(stream_id(t, index));
-        }
-
        return desc;
    }
 public:
@@ -294,6 +305,38 @@ future<db_clock::time_point> get_local_streams_timestamp() {
    });
 }

+// non-static for testing
+size_t limit_of_streams_in_topology_description() {
+    // Each stream takes 16B and we don't want to exceed 4MB so we can have
+    // at most 262144 streams but not less than 1 per vnode.
+    return 4 * 1024 * 1024 / 16;
+}
+
+// non-static for testing
+topology_description limit_number_of_streams_if_needed(topology_description&& desc) {
+    int64_t streams_count = 0;
+    for (auto& tr_desc : desc.entries()) {
+        streams_count += tr_desc.streams.size();
+    }
+
+    size_t limit = std::max(limit_of_streams_in_topology_description(), desc.entries().size());
+    if (limit >= size_t(streams_count)) {
+        return std::move(desc);
+    }
+    size_t streams_per_vnode_limit = limit / desc.entries().size();
+    auto entries = std::move(desc).entries();
+    auto start = entries.back().token_range_end;
+    for (size_t idx = 0; idx < entries.size(); ++idx) {
+        auto end = entries[idx].token_range_end;
+        if (entries[idx].streams.size() > streams_per_vnode_limit) {
+            entries[idx].streams =
+                create_stream_ids(idx, start, end, streams_per_vnode_limit, entries[idx].sharding_ignore_msb);
+        }
+        start = end;
+    }
+    return topology_description(std::move(entries));
+}
+
 // Run inside seastar::async context.
 db_clock::time_point make_new_cdc_generation(
        const db::config& cfg,
@@ -306,6 +349,18 @@ db_clock::time_point make_new_cdc_generation(
    using namespace std::chrono;
    auto gen = topology_description_generator(cfg, bootstrap_tokens, tm, g).generate();

+    // If the cluster is large we may end up with a generation that contains
+    // large number of streams. This is problematic because we store the
+    // generation in a single row. For a generation with large number of rows
+    // this will lead to a row that can be as big as 32MB. This is much more
+    // than the limit imposed by commitlog_segment_size_in_mb. If the size of
+    // the row that describes a new generation grows above
+    // commitlog_segment_size_in_mb, the write will fail and the new node won't
+    // be able to join. To avoid such problem we make sure that such row is
+    // always smaller than 4MB. We do that by removing some CDC streams from
+    // each vnode if the total number of streams is too large.
+    gen = limit_number_of_streams_if_needed(std::move(gen));
+
    // Begin the race.
    auto ts = db_clock::now() + (
            (for_testing || ring_delay == milliseconds(0)) ? milliseconds(0) : (
--- a/cdc/generation.hh
+++ b/cdc/generation.hh
@@ -68,6 +68,7 @@ public:

    stream_id() = default;
    stream_id(bytes);
+    stream_id(dht::token, size_t);

    bool is_set() const;
    bool operator==(const stream_id&) const;
@@ -81,9 +82,6 @@ public:

    partition_key to_partition_key(const schema& log_schema) const;
    static int64_t token_from_bytes(bytes_view);
-private:
-    friend class topology_description_generator;
-    stream_id(dht::token, size_t);
 };

 /* Describes a mapping of tokens to CDC streams in a token range.
@@ -116,7 +114,8 @@ public:
    topology_description(std::vector<token_range_description> entries);
    bool operator==(const topology_description&) const;

-    const std::vector<token_range_description>& entries() const;
+    const std::vector<token_range_description>& entries() const&;
+    std::vector<token_range_description>&& entries() &&;
 };

 /**
@@ -154,7 +153,7 @@ bool should_propose_first_generation(const gms::inet_address& me, const gms::gos
 future<db_clock::time_point> get_local_streams_timestamp();

 /* Generate a new set of CDC streams and insert it into the distributed cdc_generation_descriptions table.
- * Returns the timestamp of this new generation.
+ * Returns the timestamp of this new generation
 *
 * Should be called when starting the node for the first time (i.e., joining the ring).
 *
--- a/cdc/log.cc
+++ b/cdc/log.cc
@@ -519,6 +519,7 @@ static schema_ptr create_log_schema(const schema& s, std::optional<utils::UUID>
    b.with_column(log_meta_column_name_bytes("batch_seq_no"), int32_type, column_kind::clustering_key);
    b.with_column(log_meta_column_name_bytes("operation"), data_type_for<operation_native_type>());
    b.with_column(log_meta_column_name_bytes("ttl"), long_type);
+    b.with_column(log_meta_column_name_bytes("end_of_batch"), boolean_type);
    b.set_caching_options(caching_options::get_disabled_caching_options());
    auto add_columns = [&] (const schema::const_iterator_range_type& columns, bool is_data_col = false) {
        for (const auto& column : columns) {
@@ -880,14 +881,26 @@ public:
        return _base_schema;
    }

+    clustering_key create_ck(int batch) const {
+        return clustering_key::from_exploded(_log_schema, { _tuuid, int32_type->decompose(batch) });
+    }
+
    // Creates a new clustering row in the mutation, assigning it the next `cdc$batch_seq_no`.
    // The numbering of batch sequence numbers starts from 0.
    clustering_key allocate_new_log_row() {
-        auto log_ck = clustering_key::from_exploded(_log_schema, { _tuuid, int32_type->decompose(_batch_no++) });
+        auto log_ck = create_ck(_batch_no++);
        set_key_columns(log_ck, _base_schema.partition_key_columns(), _base_pk);
        return log_ck;
    }

+    bool has_rows() const {
+        return _batch_no != 0;
+    }
+
+    clustering_key last_row_key() const {
+        return create_ck(_batch_no - 1);
+    }
+
    // A common pattern is to allocate a row and then immediately set its `cdc$operation` column.
    clustering_key allocate_new_log_row(operation op) {
        auto log_ck = allocate_new_log_row();
@@ -944,6 +957,11 @@ public:
        _log_mut.set_cell(log_ck, log_cdef, atomic_cell::make_live(*log_cdef.type, _ts, deleted_elements, _ttl));
    }

+    void end_record() {
+        if (has_rows()) {
+            _log_mut.set_cell(last_row_key(), log_meta_column_name_bytes("end_of_batch"), data_value(true), _ts, _ttl);
+        }
+    }
 private:
    void set_key_columns(const clustering_key& log_ck, schema::const_iterator_range_type columns, const std::vector<bytes>& key) {
        size_t pos = 0;
@@ -1272,6 +1290,13 @@ struct process_change_visitor {
                _clustering_row_states, _generate_delta_values);
        visit_row_cells(v);

+        if (_enable_updating_state) {
+            // #7716: if there are no regular columns, our visitor would not have visited any cells,
+            // hence it would not have created a row_state for this row. In effect, postimage wouldn't be produced.
+            // Ensure that the row state exists.
+            _clustering_row_states.try_emplace(ckey);
+        }
+
        _builder.set_operation(log_ck, v._cdc_op);
        _builder.set_ttl(log_ck, v._ttl_column);
    }
@@ -1519,6 +1544,11 @@ public:
        cdc::inspect_mutation(m, v);
    }

+    void end_record() override {
+        assert(_builder);
+        _builder->end_record();
+    }
+
    // Takes and returns generated cdc log mutations and associated statistics about parts touched during transformer's lifetime.
    // The `transformer` object on which this method was called on should not be used anymore.
    std::tuple<std::vector<mutation>, stats::part_type_set> finish() && {
--- a/cdc/metadata.cc
+++ b/cdc/metadata.cc
@@ -51,7 +51,8 @@ static cdc::stream_id get_stream(
    return entry.streams[shard_id];
 }

-static cdc::stream_id get_stream(
+// non-static for testing
+cdc::stream_id get_stream(
        const std::vector<cdc::token_range_description>& entries,
        dht::token tok) {
    if (entries.empty()) {
--- a/cdc/split.cc
+++ b/cdc/split.cc
@@ -684,6 +684,8 @@ void process_changes_with_splitting(const mutation& base_mutation, change_proces
                processor.produce_postimage(&ck);
            }
        }
+
+        processor.end_record();
    }
 }

@@ -731,6 +733,8 @@ void process_changes_without_splitting(const mutation& base_mutation, change_pro
            processor.produce_postimage(&cr.key());
        }
    }
+
+    processor.end_record();
 }

 } // namespace cdc
--- a/cdc/split.hh
+++ b/cdc/split.hh
@@ -77,6 +77,10 @@ public:
    // both columns have different timestamp or TTL set.
    //   m - the small mutation to be converted into CDC log rows.
    virtual void process_change(const mutation& m) = 0;
+
+    // Tells processor we have reached end of record - last part
+    // of a given timestamp batch
+    virtual void end_record() = 0;
 };

 bool should_split(const mutation& base_mutation);
--- a/configure.py
+++ b/configure.py
@@ -275,6 +275,7 @@ modes = {

 scylla_tests = set([
    'test/boost/UUID_test',
+    'test/boost/cdc_generation_test',
    'test/boost/aggregate_fcts_test',
    'test/boost/allocation_strategy_test',
    'test/boost/alternator_base64_test',
--- a/cql3/statements/select_statement.cc
+++ b/cql3/statements/select_statement.cc
@@ -59,6 +59,7 @@
 #include "db/timeout_clock.hh"
 #include "db/consistency_level_validations.hh"
 #include "database.hh"
+#include "test/lib/select_statement_utils.hh"
 #include <boost/algorithm/cxx11/any_of.hpp>

 bool is_system_keyspace(const sstring& name);
@@ -67,6 +68,8 @@ namespace cql3 {

 namespace statements {

+static constexpr int DEFAULT_INTERNAL_PAGING_SIZE = select_statement::DEFAULT_COUNT_PAGE_SIZE;
+thread_local int internal_paging_size = DEFAULT_INTERNAL_PAGING_SIZE;
 thread_local const lw_shared_ptr<const select_statement::parameters> select_statement::_default_parameters = make_lw_shared<select_statement::parameters>();

 select_statement::parameters::parameters()
@@ -338,7 +341,7 @@ select_statement::do_execute(service::storage_proxy& proxy,
    const bool aggregate = _selection->is_aggregate() || has_group_by();
    const bool nonpaged_filtering = restrictions_need_filtering && page_size <= 0;
    if (aggregate || nonpaged_filtering) {
-        page_size = DEFAULT_COUNT_PAGE_SIZE;
+        page_size = internal_paging_size;
    }

    auto key_ranges = _restrictions->get_partition_key_ranges(options);
@@ -541,13 +544,29 @@ indexed_table_select_statement::do_execute_base_query(
            if (old_paging_state && concurrency == 1) {
                auto base_pk = generate_base_key_from_index_pk<partition_key>(old_paging_state->get_partition_key(),
                        old_paging_state->get_clustering_key(), *_schema, *_view_schema);
+                auto row_ranges = command->slice.default_row_ranges();
                if (old_paging_state->get_clustering_key() && _schema->clustering_key_size() > 0) {
                    auto base_ck = generate_base_key_from_index_pk<clustering_key>(old_paging_state->get_partition_key(),
                            old_paging_state->get_clustering_key(), *_schema, *_view_schema);
-                    command->slice.set_range(*_schema, base_pk,
-                            std::vector<query::clustering_range>{query::clustering_range::make_starting_with(range_bound<clustering_key>(base_ck, false))});
+
+                    query::trim_clustering_row_ranges_to(*_schema, row_ranges, base_ck, false);
+                    command->slice.set_range(*_schema, base_pk, row_ranges);
                } else {
-                    command->slice.set_range(*_schema, base_pk, std::vector<query::clustering_range>{query::clustering_range::make_open_ended_both_sides()});
+                    // There is no clustering key in old_paging_state and/or no clustering key in 
+                    // _schema, therefore read an entire partition (whole clustering range).
+                    //
+                    // The only exception to applying no restrictions on clustering key
+                    // is a case when we have a secondary index on the first column
+                    // of clustering key. In such a case we should not read the
+                    // entire clustering range - only a range in which first column
+                    // of clustering key has the correct value. 
+                    //
+                    // This means that we should not set a open_ended_both_sides
+                    // clustering range on base_pk, instead intersect it with
+                    // _row_ranges (which contains the restrictions neccessary for the
+                    // case described above). The result of such intersection is just
+                    // _row_ranges, which we explicity set on base_pk.
+                    command->slice.set_range(*_schema, base_pk, row_ranges);
                }
            }
            concurrency *= 2;
@@ -992,12 +1011,16 @@ indexed_table_select_statement::do_execute(service::storage_proxy& proxy,
    const bool aggregate = _selection->is_aggregate() || has_group_by();
    if (aggregate) {
        const bool restrictions_need_filtering = _restrictions->need_filtering();
-        return do_with(cql3::selection::result_set_builder(*_selection, now, options.get_cql_serialization_format()), std::make_unique<cql3::query_options>(cql3::query_options(options)),
+        return do_with(cql3::selection::result_set_builder(*_selection, now, options.get_cql_serialization_format(), *_group_by_cell_indices), std::make_unique<cql3::query_options>(cql3::query_options(options)),
                [this, &options, &proxy, &state, now, whole_partitions, partition_slices, restrictions_need_filtering] (cql3::selection::result_set_builder& builder, std::unique_ptr<cql3::query_options>& internal_options) {
            // page size is set to the internal count page size, regardless of the user-provided value
-            internal_options.reset(new cql3::query_options(std::move(internal_options), options.get_paging_state(), DEFAULT_COUNT_PAGE_SIZE));
+            internal_options.reset(new cql3::query_options(std::move(internal_options), options.get_paging_state(), internal_paging_size));
            return repeat([this, &builder, &options, &internal_options, &proxy, &state, now, whole_partitions, partition_slices, restrictions_need_filtering] () {
-                auto consume_results = [this, &builder, &options, &internal_options, restrictions_need_filtering] (foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd) {
+                auto consume_results = [this, &builder, &options, &internal_options, &proxy, &state, restrictions_need_filtering] (foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd, lw_shared_ptr<const service::pager::paging_state> paging_state) {
+                    if (paging_state) {
+                        paging_state = generate_view_paging_state_from_base_query_results(paging_state, results, proxy, state, options);
+                    }
+                    internal_options.reset(new cql3::query_options(std::move(internal_options), paging_state ? make_lw_shared<service::pager::paging_state>(*paging_state) : nullptr));
                    if (restrictions_need_filtering) {
                        _stats.filtered_rows_read_total += *results->row_count();
                        query::result_view::consume(*results, cmd->slice, cql3::selection::result_set_builder::visitor(builder, *_schema, *_selection,
@@ -1005,24 +1028,24 @@ indexed_table_select_statement::do_execute(service::storage_proxy& proxy,
                    } else {
                        query::result_view::consume(*results, cmd->slice, cql3::selection::result_set_builder::visitor(builder, *_schema, *_selection));
                    }
+                    bool has_more_pages = paging_state && paging_state->get_remaining() > 0;
+                    return stop_iteration(!has_more_pages);
                };

                if (whole_partitions || partition_slices) {
                    return find_index_partition_ranges(proxy, state, *internal_options).then_unpack(
                            [this, now, &state, &internal_options, &proxy, consume_results = std::move(consume_results)] (dht::partition_range_vector partition_ranges, lw_shared_ptr<const service::pager::paging_state> paging_state) {
-                        bool has_more_pages = paging_state && paging_state->get_remaining() > 0;
-                        internal_options.reset(new cql3::query_options(std::move(internal_options), paging_state ? make_lw_shared<service::pager::paging_state>(*paging_state) : nullptr));
-                        return do_execute_base_query(proxy, std::move(partition_ranges), state, *internal_options, now, std::move(paging_state)).then_unpack(consume_results).then([has_more_pages] {
-                            return stop_iteration(!has_more_pages);
+                        return do_execute_base_query(proxy, std::move(partition_ranges), state, *internal_options, now, paging_state)
+                        .then_unpack([paging_state, consume_results = std::move(consume_results)](foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd) {
+                            return consume_results(std::move(results), std::move(cmd), std::move(paging_state));
                        });
                    });
                } else {
                    return find_index_clustering_rows(proxy, state, *internal_options).then_unpack(
                            [this, now, &state, &internal_options, &proxy, consume_results = std::move(consume_results)] (std::vector<primary_key> primary_keys, lw_shared_ptr<const service::pager::paging_state> paging_state) {
-                        bool has_more_pages = paging_state && paging_state->get_remaining() > 0;
-                        internal_options.reset(new cql3::query_options(std::move(internal_options), paging_state ? make_lw_shared<service::pager::paging_state>(*paging_state) : nullptr));
-                        return this->do_execute_base_query(proxy, std::move(primary_keys), state, *internal_options, now, std::move(paging_state)).then_unpack(consume_results).then([has_more_pages] {
-                            return stop_iteration(!has_more_pages);
+                        return this->do_execute_base_query(proxy, std::move(primary_keys), state, *internal_options, now, paging_state)
+                        .then_unpack([paging_state, consume_results = std::move(consume_results)](foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd) {
+                            return consume_results(std::move(results), std::move(cmd), std::move(paging_state));
                        });
                    });
                }
@@ -1687,6 +1710,16 @@ std::vector<size_t> select_statement::prepare_group_by(const schema& schema, sel

 }

+future<> set_internal_paging_size(int paging_size) {
+    return seastar::smp::invoke_on_all([paging_size] {
+        internal_paging_size = paging_size;
+    });
+}
+
+future<> reset_internal_paging_size() {
+    return set_internal_paging_size(DEFAULT_INTERNAL_PAGING_SIZE);
+}
+
 }

 namespace util {
--- a/database.cc
+++ b/database.cc
@@ -801,7 +801,7 @@ future<> database::drop_column_family(const sstring& ks_name, const sstring& cf_
    remove(*cf);
    cf->clear_views();
    auto& ks = find_keyspace(ks_name);
-    return when_all_succeed(cf->await_pending_writes(), cf->await_pending_reads()).then_unpack([this, &ks, cf, tsf = std::move(tsf), snapshot] {
+    return cf->await_pending_ops().then([this, &ks, cf, tsf = std::move(tsf), snapshot] {
        return truncate(ks, *cf, std::move(tsf), snapshot).finally([this, cf] {
            return cf->stop();
        });
--- a/database.hh
+++ b/database.hh
@@ -505,6 +505,8 @@ private:
    utils::phased_barrier _pending_reads_phaser;
    // Corresponding phaser for in-progress streams
    utils::phased_barrier _pending_streams_phaser;
+    // Corresponding phaser for in-progress flushes
+    utils::phased_barrier _pending_flushes_phaser;

    // This field cashes the last truncation time for the table.
    // The master resides in system.truncated table
@@ -932,6 +934,14 @@ public:
        return _pending_streams_phaser.advance_and_await();
    }

+    future<> await_pending_flushes() {
+        return _pending_flushes_phaser.advance_and_await();
+    }
+
+    future<> await_pending_ops() {
+        return when_all(await_pending_reads(), await_pending_writes(), await_pending_streams(), await_pending_flushes()).discard_result();
+    }
+
    void add_or_update_view(view_ptr v);
    void remove_view(view_ptr v);
    void clear_views();
--- a/db/config.cc
+++ b/db/config.cc
@@ -31,6 +31,7 @@
 #include <seastar/core/print.hh>
 #include <seastar/util/log.hh>

+#include "cdc/cdc_extension.hh"
 #include "config.hh"
 #include "extensions.hh"
 #include "log.hh"
@@ -694,7 +695,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
    , replace_address(this, "replace_address", value_status::Used, "", "The listen_address or broadcast_address of the dead node to replace. Same as -Dcassandra.replace_address.")
    , replace_address_first_boot(this, "replace_address_first_boot", value_status::Used, "", "Like replace_address option, but if the node has been bootstrapped successfully it will be ignored. Same as -Dcassandra.replace_address_first_boot.")
    , override_decommission(this, "override_decommission", value_status::Used, false, "Set true to force a decommissioned node to join the cluster")
-    , enable_repair_based_node_ops(this, "enable_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, true, "Set true to use enable repair based node operations instead of streaming based")
+    , enable_repair_based_node_ops(this, "enable_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, false, "Set true to use enable repair based node operations instead of streaming based")
    , ring_delay_ms(this, "ring_delay_ms", value_status::Used, 30 * 1000, "Time a node waits to hear from other nodes before joining the ring in milliseconds. Same as -Dcassandra.ring_delay_ms in cassandra.")
    , shadow_round_ms(this, "shadow_round_ms", value_status::Used, 300 * 1000, "The maximum gossip shadow round time. Can be used to reduce the gossip feature check time during node boot up.")
    , fd_max_interval_ms(this, "fd_max_interval_ms", value_status::Used, 2 * 1000, "The maximum failure_detector interval time in milliseconds. Interval larger than the maximum will be ignored. Larger cluster may need to increase the default.")
@@ -792,6 +793,10 @@ db::config::config()
 db::config::~config()
 {}

+void db::config::add_cdc_extension() {
+    _extensions->add_schema_extension<cdc::cdc_extension>(cdc::cdc_extension::NAME);
+}
+
 void db::config::setup_directories() {
    maybe_in_workdir(commitlog_directory, "commitlog");
    maybe_in_workdir(data_file_directories, "data");
@@ -874,7 +879,7 @@ db::fs::path db::config::get_conf_sub(db::fs::path sub) {
 }

 bool db::config::check_experimental(experimental_features_t::feature f) const {
-    if (experimental() && f != experimental_features_t::UNUSED) {
+    if (experimental() && f != experimental_features_t::UNUSED && f != experimental_features_t::UNUSED_CDC) {
        return true;
    }
    const auto& optval = experimental_features();
@@ -928,11 +933,13 @@ std::unordered_map<sstring, db::experimental_features_t::feature> db::experiment
    // https://github.com/scylladb/scylla/pull/5369#discussion_r353614807
    // Lightweight transactions are no longer experimental. Map them
    // to UNUSED switch for a while, then remove altogether.
-    return {{"lwt", UNUSED}, {"udf", UDF}, {"cdc", CDC}};
+    // Change Data Capture is no longer experimental. Map it
+    // to UNUSED_CDC switch for a while, then remove altogether.
+    return {{"lwt", UNUSED}, {"udf", UDF}, {"cdc", UNUSED_CDC}, {"alternator-streams", ALTERNATOR_STREAMS}};
 }

 std::vector<enum_option<db::experimental_features_t>> db::experimental_features_t::all() {
-    return {UDF, CDC};
+    return {UDF, ALTERNATOR_STREAMS};
 }

 template struct utils::config_file::named_value<seastar::log_level>;
--- a/db/config.hh
+++ b/db/config.hh
@@ -81,7 +81,7 @@ namespace db {

 /// Enumeration of all valid values for the `experimental` config entry.
 struct experimental_features_t {
-    enum feature { UNUSED, UDF, CDC };
+    enum feature { UNUSED, UDF, UNUSED_CDC, ALTERNATOR_STREAMS };
    static std::unordered_map<sstring, feature> map(); // See enum_option.
    static std::vector<enum_option<experimental_features_t>> all();
 };
@@ -92,6 +92,9 @@ public:
    config(std::shared_ptr<db::extensions>);
    ~config();

+    // For testing only
+    void add_cdc_extension();
+
    /// True iff the feature is enabled.
    bool check_experimental(experimental_features_t::feature f) const;

--- a/db/large_data_handler.cc
+++ b/db/large_data_handler.cc
@@ -113,7 +113,7 @@ future<> cql_table_large_data_handler::record_large_cells(const sstables::sstabl
        auto ck_str = key_to_str(*clustering_key, s);
        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, format("{} {}", ck_str, column_name), extra_fields, ck_str, column_name);
    } else {
-        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, nullptr, column_name);
+        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, data_value::make_null(utf8_type), column_name);
    }
 }

@@ -125,7 +125,7 @@ future<> cql_table_large_data_handler::record_large_rows(const sstables::sstable
        std::string ck_str = key_to_str(*clustering_key, s);
        return try_record("row", sst, partition_key, int64_t(row_size), "row", ck_str, extra_fields,  ck_str);
    } else {
-        return try_record("row", sst, partition_key, int64_t(row_size), "static row", "", extra_fields, nullptr);
+        return try_record("row", sst, partition_key, int64_t(row_size), "static row", "", extra_fields, data_value::make_null(utf8_type));
    }
 }

--- a/db/large_data_handler.hh
+++ b/db/large_data_handler.hh
@@ -111,27 +111,12 @@ public:
        return make_ready_future<>();
    }

-    future<> maybe_delete_large_data_entries(const schema& s, sstring filename, uint64_t data_size) {
+    future<> maybe_delete_large_data_entries(const schema& /*s*/, sstring /*filename*/, uint64_t /*data_size*/) {
        assert(running());
-        future<> large_partitions = make_ready_future<>();
-        if (__builtin_expect(data_size > _partition_threshold_bytes, false)) {
-            large_partitions = with_sem([&s, filename, this] () mutable {
-                return delete_large_data_entries(s, std::move(filename), db::system_keyspace::LARGE_PARTITIONS);
-            });
-        }
-        future<> large_rows = make_ready_future<>();
-        if (__builtin_expect(data_size > _row_threshold_bytes, false)) {
-            large_rows = with_sem([&s, filename, this] () mutable {
-                return delete_large_data_entries(s, std::move(filename), db::system_keyspace::LARGE_ROWS);
-            });
-        }
-        future<> large_cells = make_ready_future<>();
-        if (__builtin_expect(data_size > _cell_threshold_bytes, false)) {
-            large_cells = with_sem([&s, filename, this] () mutable {
-                return delete_large_data_entries(s, std::move(filename), db::system_keyspace::LARGE_CELLS);
-            });
-        }
-        return when_all(std::move(large_partitions), std::move(large_rows), std::move(large_cells)).discard_result();
+
+        // Deletion of large data entries is disabled due to #7668
+        // They will evetually expire based on the 30 days TTL.
+        return make_ready_future<>();
    }

    const large_data_handler::stats& stats() const { return _stats; }
--- a/db/schema_tables.cc
+++ b/db/schema_tables.cc
@@ -58,6 +58,7 @@
 #include "schema_registry.hh"
 #include "mutation_query.hh"
 #include "system_keyspace.hh"
+#include "system_distributed_keyspace.hh"
 #include "cql3/cql3_type.hh"
 #include "cql3/functions/functions.hh"
 #include "cql3/util.hh"
@@ -104,6 +105,11 @@ using namespace std::chrono_literals;

 static logging::logger diff_logger("schema_diff");

+static bool is_extra_durable(const sstring& ks_name, const sstring& cf_name) {
+    return (is_system_keyspace(ks_name) && db::system_keyspace::is_extra_durable(cf_name))
+        || (ks_name == db::system_distributed_keyspace::NAME && db::system_distributed_keyspace::is_extra_durable(cf_name));
+}
+

 /** system.schema_* tables used to store keyspace/table/type attributes prior to C* 3.0 */
 namespace db {
@@ -2499,7 +2505,7 @@ schema_ptr create_table_from_mutations(const schema_ctxt& ctxt, schema_mutations
        builder.with_sharder(smp::count, ctxt.murmur3_partitioner_ignore_msb_bits());
    }

-    if (is_system_keyspace(ks_name) && is_extra_durable(cf_name)) {
+    if (is_extra_durable(ks_name, cf_name)) {
        builder.set_wait_for_sync_to_commitlog(true);
    }

@@ -3035,10 +3041,6 @@ future<> maybe_update_legacy_secondary_index_mv_schema(service::migration_manage
    // format, where "token" is not marked as computed. Once we're sure that all indexes have their
    // columns marked as computed (because they were either created on a node that supports computed
    // columns or were fixed by this utility function), it's safe to remove this function altogether.
-    if (!db.features().cluster_supports_computed_columns()) {
-        return make_ready_future<>();
-    }
-
    if (v->clustering_key_size() == 0) {
        return make_ready_future<>();
    }
--- a/db/size_estimates_virtual_reader.cc
+++ b/db/size_estimates_virtual_reader.cc
@@ -201,10 +201,10 @@ static future<std::vector<token_range>> get_local_ranges(database& db) {
        // All queries will be on that table, where all entries are text and there's no notion of
        // token ranges form the CQL point of view.
        auto left_inf = boost::find_if(ranges, [] (auto&& r) {
-            return !r.start() || r.start()->value() == dht::minimum_token();
+            return r.end() && (!r.start() || r.start()->value() == dht::minimum_token());
        });
        auto right_inf = boost::find_if(ranges, [] (auto&& r) {
-            return !r.end() || r.start()->value() == dht::maximum_token();
+            return r.start() && (!r.end() || r.end()->value() == dht::maximum_token());
        });
        if (left_inf != right_inf && left_inf != ranges.end() && right_inf != ranges.end()) {
            local_ranges.push_back(token_range{to_bytes(right_inf->start()), to_bytes(left_inf->end())});
--- a/db/system_distributed_keyspace.cc
+++ b/db/system_distributed_keyspace.cc
@@ -113,6 +113,10 @@ static std::vector<schema_ptr> all_tables() {
    };
 }

+bool system_distributed_keyspace::is_extra_durable(const sstring& cf_name) {
+    return cf_name == CDC_TOPOLOGY_DESCRIPTION;
+}
+
 system_distributed_keyspace::system_distributed_keyspace(cql3::query_processor& qp, service::migration_manager& mm)
        : _qp(qp)
        , _mm(mm) {
--- a/db/system_distributed_keyspace.hh
+++ b/db/system_distributed_keyspace.hh
@@ -64,6 +64,10 @@ private:
    service::migration_manager& _mm;

 public:
+    /* Should writes to the given table always be synchronized by commitlog (flushed to disk)
+     * before being acknowledged? */
+    static bool is_extra_durable(const sstring& cf_name);
+
    system_distributed_keyspace(cql3::query_processor&, service::migration_manager&);

    future<> start();
--- a/db/view/view.cc
+++ b/db/view/view.cc
@@ -1241,6 +1241,14 @@ future<> mutate_MV(
                }
            }
        }
+        // It's still possible that a target endpoint is dupliated in the remote endpoints list,
+        // so let's get rid of the duplicate if it exists
+        if (target_endpoint) {
+            auto remote_it = std::find(remote_endpoints.begin(), remote_endpoints.end(), *target_endpoint);
+            if (remote_it != remote_endpoints.end()) {
+                remote_endpoints.erase(remote_it);
+            }
+        }

        if (target_endpoint && *target_endpoint == my_address) {
            ++stats.view_updates_pushed_local;
--- a/dist/common/scripts/scylla_io_setup
+++ b/dist/common/scripts/scylla_io_setup
@@ -244,12 +244,12 @@ if __name__ == "__main__":
                # and https://cloud.google.com/compute/docs/disks/local-ssd#nvme
                # note that scylla iotune might measure more, this is GCP recommended
                mbs=1024*1024
-                if nr_disks >= 1 & nr_disks < 4:
+                if nr_disks >= 1 and nr_disks < 4:
                    disk_properties["read_iops"] = 170000 * nr_disks
                    disk_properties["read_bandwidth"] = 660 * mbs * nr_disks
                    disk_properties["write_iops"] = 90000 * nr_disks
                    disk_properties["write_bandwidth"] = 350 * mbs * nr_disks
-                elif nr_disks >= 4 & nr_disks <= 8:
+                elif nr_disks >= 4 and nr_disks <= 8:
                    disk_properties["read_iops"] = 680000
                    disk_properties["read_bandwidth"] = 2650 * mbs
                    disk_properties["write_iops"] = 360000
--- a/dist/common/scripts/scylla_raid_setup
+++ b/dist/common/scripts/scylla_raid_setup
@@ -36,7 +36,7 @@ if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Configure RAID volume for Scylla.')
    parser.add_argument('--disks', required=True,
                        help='specify disks for RAID')
-    parser.add_argument('--raiddev', default='/dev/md0',
+    parser.add_argument('--raiddev',
                        help='MD device name for RAID')
    parser.add_argument('--enable-on-nextboot', '--update-fstab', action='store_true', default=False,
                        help='mount RAID on next boot')
@@ -73,9 +73,25 @@ if __name__ == '__main__':
            print('{} is busy'.format(disk))
            sys.exit(1)

-    if os.path.exists(args.raiddev):
-        print('{} is already using'.format(args.raiddev))
-        sys.exit(1)
+    if len(disks) == 1 and not args.force_raid:
+        raid = False
+        fsdev = disks[0]
+    else:
+        raid = True
+        if args.raiddev is None:
+            raiddevs_to_try = [f'/dev/md{i}' for i in range(10)]
+        else:
+            raiddevs_to_try = [args.raiddev, ]
+        for fsdev in raiddevs_to_try:
+            raiddevname = os.path.basename(fsdev)
+            if not os.path.exists(f'/sys/block/{raiddevname}/md/array_state'):
+                break
+            print(f'{fsdev} is already using')
+        else:
+            if args.raiddev is None:
+                print("Can't find unused /dev/mdX")
+            sys.exit(1)
+        print(f'{fsdev} will be used to setup a RAID')

    if os.path.ismount(mount_at):
        print('{} is already mounted'.format(mount_at))
@@ -94,13 +110,6 @@ if __name__ == '__main__':
    except SystemdException:
        md_service = systemd_unit('mdadm.service')

-    if len(disks) == 1 and not args.force_raid:
-        raid = False
-        fsdev = disks[0]
-    else:
-        raid = True
-        fsdev = args.raiddev
-
    print('Creating {type} for scylla using {nr_disk} disk(s): {disks}'.format(type='RAID0' if raid else 'XFS volume', nr_disk=len(disks), disks=args.disks))
    if distro.name() == 'Ubuntu' and distro.version() == '14.04':
        if raid:
--- a/dist/common/scripts/scylla_util.py
+++ b/dist/common/scripts/scylla_util.py
@@ -92,7 +92,7 @@ def scyllabindir():


 # @param headers dict of k:v
-def curl(url, headers=None, byte=False, timeout=3, max_retries=5):
+def curl(url, headers=None, byte=False, timeout=3, max_retries=5, retry_interval=5):
    retries = 0
    while True:
        try:
@@ -102,9 +102,8 @@ def curl(url, headers=None, byte=False, timeout=3, max_retries=5):
                    return res.read()
                else:
                    return res.read().decode('utf-8')
-        except urllib.error.HTTPError:
-            logging.warning("Failed to grab %s..." % url)
-            time.sleep(5)
+        except urllib.error.URLError:
+            time.sleep(retry_interval)
            retries += 1
            if retries >= max_retries:
                raise
@@ -188,7 +187,7 @@ class gcp_instance:
        """get list of nvme disks from metadata server"""
        import json
        try:
-            disksREST=self.__instance_metadata("disks")
+            disksREST=self.__instance_metadata("disks", True)
            disksobj=json.loads(disksREST)
            nvmedisks=list(filter(self.isNVME, disksobj))
        except Exception as e:
@@ -236,7 +235,8 @@ class gcp_instance:

    def instance_size(self):
        """Returns the size of the instance we are running in. i.e.: 2"""
-        return self.instancetype.split("-")[2]
+        instancetypesplit = self.instancetype.split("-")
+        return instancetypesplit[2] if len(instancetypesplit)>2 else 0

    def instance_class(self):
        """Returns the class of the instance we are running in. i.e.: n2"""
@@ -298,22 +298,30 @@ class gcp_instance:
        return self.__firstNvmeSize

    def is_recommended_instance(self):
-        if self.is_recommended_instance_size() and not self.is_unsupported_instance_class() and self.is_supported_instance_class():
+        if not self.is_unsupported_instance_class() and self.is_supported_instance_class() and self.is_recommended_instance_size():
            # at least 1:2GB cpu:ram ratio , GCP is at 1:4, so this should be fine
            if self.cpu/self.memoryGB < 0.5:
-              # 30:1 Disk/RAM ratio must be kept at least(AWS), we relax this a little bit
-              # on GCP we are OK with 50:1 , n1-standard-2 can cope with 1 disk, not more
-              diskCount = self.nvmeDiskCount
-              # to reach max performance for > 16 disks we mandate 32 or more vcpus
-              # https://cloud.google.com/compute/docs/disks/local-ssd#performance
-              if diskCount >= 16 and self.cpu < 32:
-                  return False
-              diskSize= self.firstNvmeSize
-              if diskCount < 1:
-                  return False
-              disktoramratio = (diskCount*diskSize)/self.memoryGB
-              if (disktoramratio <= 50) and (disktoramratio > 0):
-                  return True
+                diskCount = self.nvmeDiskCount
+                # to reach max performance for > 16 disks we mandate 32 or more vcpus
+                # https://cloud.google.com/compute/docs/disks/local-ssd#performance
+                if diskCount >= 16 and self.cpu < 32:
+                    logging.warning(
+                        "This machine doesn't have enough CPUs for allocated number of NVMEs (at least 32 cpus for >=16 disks). Performance will suffer.")
+                    return False
+                diskSize = self.firstNvmeSize
+                if diskCount < 1:
+                    return False
+                max_disktoramratio = 105
+                # 30:1 Disk/RAM ratio must be kept at least(AWS), we relax this a little bit
+                # on GCP we are OK with {max_disktoramratio}:1 , n1-standard-2 can cope with 1 disk, not more
+                disktoramratio = (diskCount * diskSize) / self.memoryGB
+                if (disktoramratio > max_disktoramratio):
+                    logging.warning(
+                        f"Instance disk-to-RAM ratio is {disktoramratio}, which is higher than the recommended ratio {max_disktoramratio}. Performance may suffer.")
+                    return False
+                return True
+            else:
+                logging.warning("At least 2G of RAM per CPU is needed. Performance will suffer.")
        return False

    def private_ipv4(self):
@@ -365,6 +373,8 @@ class aws_instance:
            raise Exception("found more than one disk mounted at root'".format(root_dev_candidates))

        root_dev = root_dev_candidates[0].device
+        if root_dev == '/dev/root':
+            root_dev = run('findmnt -n -o SOURCE /', shell=True, check=True, capture_output=True, encoding='utf-8').stdout.strip()
        nvmes_present = list(filter(nvme_re.match, os.listdir("/dev")))
        return {"root": [ root_dev ], "ephemeral": [ x for x in nvmes_present if not root_dev.startswith(os.path.join("/dev/", x)) ] }

@@ -398,7 +408,7 @@ class aws_instance:
    def is_aws_instance(cls):
        """Check if it's AWS instance via query to metadata server."""
        try:
-            curl(cls.META_DATA_BASE_URL, max_retries=2)
+            curl(cls.META_DATA_BASE_URL, max_retries=2, retry_interval=1)
            return True
        except (urllib.error.URLError, urllib.error.HTTPError):
            return False
@@ -462,7 +472,7 @@ class aws_instance:

    def ebs_disks(self):
        """Returns all EBS disks"""
-        return set(self._disks["ephemeral"])
+        return set(self._disks["ebs"])

    def public_ipv4(self):
        """Returns the public IPv4 address of this instance"""
@@ -490,9 +500,7 @@ class aws_instance:
        return curl(self.META_DATA_BASE_URL + "user-data")


-# When a CLI tool is not installed, use relocatable CLI tool provided by Scylla
 scylla_env = os.environ.copy()
-scylla_env['PATH'] =  '{}:{}'.format(scyllabindir(), scylla_env['PATH'])
 scylla_env['DEBIAN_FRONTEND'] = 'noninteractive'

 def run(cmd, shell=False, silent=False, exception=True):
--- a/dist/common/sysctl.d/99-scylla-inotify.conf
+++ b/dist/common/sysctl.d/99-scylla-inotify.conf
@@ -0,0 +1,4 @@
+# allocate enough inotify instances for large machines
+# each tls instance needs 1 inotify instance, and there can be
+# multiple tls instances per shard.
+fs.inotify.max_user_instances = 1200
--- a/dist/debian/debian/scylla-kernel-conf.postinst
+++ b/dist/debian/debian/scylla-kernel-conf.postinst
@@ -11,6 +11,7 @@ else
    sysctl -p/usr/lib/sysctl.d/99-scylla-sched.conf || :
    sysctl -p/usr/lib/sysctl.d/99-scylla-aio.conf || :
    sysctl -p/usr/lib/sysctl.d/99-scylla-vm.conf || :
+    sysctl -p/usr/lib/sysctl.d/99-scylla-inotify.conf || :
 fi

 #DEBHELPER#
--- a/dist/docker/redhat/Dockerfile
+++ b/dist/docker/redhat/Dockerfile
@@ -5,8 +5,8 @@ MAINTAINER Avi Kivity <avi@cloudius-systems.com>
 ENV container docker

 # The SCYLLA_REPO_URL argument specifies the URL to the RPM repository this Docker image uses to install Scylla. The default value is the Scylla's unstable RPM repository, which contains the daily build.
-ARG SCYLLA_REPO_URL=http://downloads.scylladb.com/rpm/unstable/centos/master/latest/scylla.repo
-ARG VERSION=666.development
+ARG SCYLLA_REPO_URL=http://downloads.scylladb.com/rpm/unstable/centos/scylla-4.3/latest/scylla.repo
+ARG VERSION=4.3.rc0

 ADD scylla_bashrc /scylla_bashrc

--- a/dist/redhat/scylla.spec
+++ b/dist/redhat/scylla.spec
@@ -129,10 +129,9 @@ rm -rf $RPM_BUILD_ROOT
 %attr(0755,scylla,scylla) %dir %{_sharedstatedir}/scylla-housekeeping
 %ghost /etc/systemd/system/scylla-helper.slice.d/
 %ghost /etc/systemd/system/scylla-helper.slice.d/memory.conf
-%ghost /etc/systemd/system/scylla-server.service.d/
 %ghost /etc/systemd/system/scylla-server.service.d/capabilities.conf
 %ghost /etc/systemd/system/scylla-server.service.d/mounts.conf
-%ghost /etc/systemd/system/scylla-server.service.d/dependencies.conf
+/etc/systemd/system/scylla-server.service.d/dependencies.conf
 %ghost /etc/systemd/system/var-lib-systemd-coredump.mount
 %ghost /etc/systemd/system/scylla-cpupower.service
 %ghost /etc/systemd/system/var-lib-scylla.mount
@@ -190,6 +189,8 @@ Summary:        Scylla configuration package for the Linux kernel
 License:        AGPLv3
 URL:            http://www.scylladb.com/
 Requires:       kmod
+# tuned overwrites our sysctl settings
+Obsoletes:	tuned

 %description kernel-conf
 This package contains Linux kernel configuration changes for the Scylla database.  Install this package
@@ -201,6 +202,7 @@ if Scylla is the main application on your server and you wish to optimize its la
 /usr/lib/systemd/systemd-sysctl 99-scylla-sched.conf >/dev/null 2>&1 || :
 /usr/lib/systemd/systemd-sysctl 99-scylla-aio.conf >/dev/null 2>&1 || :
 /usr/lib/systemd/systemd-sysctl 99-scylla-vm.conf >/dev/null 2>&1 || :
+/usr/lib/systemd/systemd-sysctl 99-scylla-inotify.conf >/dev/null 2>&1 || :

 %files kernel-conf
 %defattr(-,root,root)
--- a/gms/feature.hh
+++ b/gms/feature.hh
@@ -143,6 +143,7 @@ extern const std::string_view LWT;
 extern const std::string_view PER_TABLE_PARTITIONERS;
 extern const std::string_view PER_TABLE_CACHING;
 extern const std::string_view DIGEST_FOR_NULL_VALUES;
+extern const std::string_view ALTERNATOR_STREAMS;

 }

--- a/gms/feature_service.cc
+++ b/gms/feature_service.cc
@@ -62,6 +62,7 @@ constexpr std::string_view features::LWT = "LWT";
 constexpr std::string_view features::PER_TABLE_PARTITIONERS = "PER_TABLE_PARTITIONERS";
 constexpr std::string_view features::PER_TABLE_CACHING = "PER_TABLE_CACHING";
 constexpr std::string_view features::DIGEST_FOR_NULL_VALUES = "DIGEST_FOR_NULL_VALUES";
+constexpr std::string_view features::ALTERNATOR_STREAMS = "ALTERNATOR_STREAMS";

 static logging::logger logger("features");

@@ -86,6 +87,7 @@ feature_service::feature_service(feature_config cfg) : _config(cfg)
        , _per_table_partitioners_feature(*this, features::PER_TABLE_PARTITIONERS)
        , _per_table_caching_feature(*this, features::PER_TABLE_CACHING)
        , _digest_for_null_values_feature(*this, features::DIGEST_FOR_NULL_VALUES)
+        , _alternator_streams_feature(*this, features::ALTERNATOR_STREAMS)
 {}

 feature_config feature_config_from_db_config(db::config& cfg, std::set<sstring> disabled) {
@@ -116,8 +118,8 @@ feature_config feature_config_from_db_config(db::config& cfg, std::set<sstring>
        }
    }

-    if (!cfg.check_experimental(db::experimental_features_t::CDC)) {
-        fcfg._disabled_features.insert(sstring(gms::features::CDC));
+    if (!cfg.check_experimental(db::experimental_features_t::ALTERNATOR_STREAMS)) {
+        fcfg._disabled_features.insert(sstring(gms::features::ALTERNATOR_STREAMS));
    }

    return fcfg;
@@ -187,6 +189,7 @@ std::set<std::string_view> feature_service::known_feature_set() {
        gms::features::UDF,
        gms::features::CDC,
        gms::features::DIGEST_FOR_NULL_VALUES,
+        gms::features::ALTERNATOR_STREAMS,
    };

    for (const sstring& s : _config._disabled_features) {
@@ -266,6 +269,7 @@ void feature_service::enable(const std::set<std::string_view>& list) {
        std::ref(_per_table_partitioners_feature),
        std::ref(_per_table_caching_feature),
        std::ref(_digest_for_null_values_feature),
+        std::ref(_alternator_streams_feature),
    })
    {
        if (list.contains(f.name())) {
--- a/gms/feature_service.hh
+++ b/gms/feature_service.hh
@@ -92,6 +92,7 @@ private:
    gms::feature _per_table_partitioners_feature;
    gms::feature _per_table_caching_feature;
    gms::feature _digest_for_null_values_feature;
+    gms::feature _alternator_streams_feature;

 public:
    bool cluster_supports_user_defined_functions() const {
@@ -160,6 +161,10 @@ public:
    bool cluster_supports_lwt() const {
        return bool(_lwt_feature);
    }
+
+    bool cluster_supports_alternator_streams() const {
+        return bool(_alternator_streams_feature);
+    }
 };

 } // namespace gms
--- a/install.sh
+++ b/install.sh
@@ -142,7 +142,7 @@ DEBIAN_SSL_CERT_FILE="/etc/ssl/certs/ca-certificates.crt"
 if [ -f "\${DEBIAN_SSL_CERT_FILE}" ]; then
  c=\${DEBIAN_SSL_CERT_FILE}
 fi
-PYTHONPATH="\${d}:\${d}/libexec:\$PYTHONPATH" PATH="\${d}/$pythonpath:\${PATH}" SSL_CERT_FILE="\${c}" exec -a "\$0" "\${d}/libexec/\${b}" "\$@"
+PYTHONPATH="\${d}:\${d}/libexec:\$PYTHONPATH" PATH="\${d}/../bin:\${d}/$pythonpath:\${PATH}" SSL_CERT_FILE="\${c}" exec -a "\$0" "\${d}/libexec/\${b}" "\$@"
 EOF
    chmod +x "$install"
 }
@@ -412,6 +412,10 @@ elif ! $packaging; then
    chown -R scylla:scylla $rdata
    chown -R scylla:scylla $rhkdata

+    for file in dist/common/sysctl.d/*.conf; do
+        bn=$(basename "$file")
+        sysctl -p "$rusr"/lib/sysctl.d/"$bn"
+    done
    $rprefix/scripts/scylla_post_install.sh
    echo "Scylla offline install completed."
 fi
--- a/main.cc
+++ b/main.cc
@@ -1023,8 +1023,7 @@ int main(int ac, char** av) {
            proxy.invoke_on_all([] (service::storage_proxy& local_proxy) {
                auto& ss = service::get_local_storage_service();
                ss.register_subscriber(&local_proxy);
-                //FIXME: discarded future
-                (void)local_proxy.start_hints_manager(gms::get_local_gossiper().shared_from_this(), ss.shared_from_this());
+                return local_proxy.start_hints_manager(gms::get_local_gossiper().shared_from_this(), ss.shared_from_this());
            }).get();

            supervisor::notify("starting messaging service");
--- a/mutation_reader.cc
+++ b/mutation_reader.cc
@@ -2044,11 +2044,13 @@ public:
        }
    }
    void abort(std::exception_ptr ep) {
-        _end_of_stream = true;
        _ex = std::move(ep);
        if (_full) {
            _full->set_exception(_ex);
            _full.reset();
+        } else if (_not_full) {
+            _not_full->set_exception(_ex);
+            _not_full.reset();
        }
    }
 };
--- a/mutation_writer/feed_writers.hh
+++ b/mutation_writer/feed_writers.hh
@@ -36,8 +36,14 @@ future<> feed_writer(flat_mutation_reader&& rd, Writer&& wr) {
                auto f2 = rd.is_buffer_empty() ? rd.fill_buffer(db::no_timeout) : make_ready_future<>();
                return when_all_succeed(std::move(f1), std::move(f2)).discard_result();
            });
-        }).finally([&wr] {
-            return wr.consume_end_of_stream();
+        }).then_wrapped([&wr] (future<> f) {
+            if (f.failed()) {
+                auto ex = f.get_exception();
+                wr.abort(ex);
+                return make_exception_future<>(ex);
+            } else {
+                return wr.consume_end_of_stream();
+            }
        });
    });
 }
--- a/mutation_writer/shard_based_splitting_writer.cc
+++ b/mutation_writer/shard_based_splitting_writer.cc
@@ -57,6 +57,9 @@ class shard_based_splitting_mutation_writer {
            }
            return std::move(_consume_fut);
        }
+        void abort(std::exception_ptr ep) {
+            _handle.abort(ep);
+        }
    };

 private:
@@ -110,6 +113,13 @@ public:
            return shard->consume_end_of_stream();
        });
    }
+    void abort(std::exception_ptr ep) {
+        for (auto&& shard : _shards) {
+            if (shard) {
+                shard->abort(ep);
+            }
+        }
+    }
 };

 future<> segregate_by_shard(flat_mutation_reader producer, reader_consumer consumer) {
--- a/mutation_writer/timestamp_based_splitting_writer.cc
+++ b/mutation_writer/timestamp_based_splitting_writer.cc
@@ -144,6 +144,9 @@ class timestamp_based_splitting_mutation_writer {
            }
            return std::move(_consume_fut);
        }
+        void abort(std::exception_ptr ep) {
+            _handle.abort(ep);
+        }
    };

 private:
@@ -188,6 +191,11 @@ public:
            return bucket.second.consume_end_of_stream();
        });
    }
+    void abort(std::exception_ptr ep) {
+        for (auto&& b : _buckets) {
+            b.second.abort(ep);
+        }
+    }
 };

 future<> timestamp_based_splitting_mutation_writer::write_to_bucket(bucket_id bucket, mutation_fragment&& mf) {
--- a/partition_version.cc
+++ b/partition_version.cc
@@ -542,12 +542,12 @@ partition_snapshot_ptr partition_entry::read(logalloc::region& r,
    return partition_snapshot_ptr(std::move(snp));
 }

-std::vector<range_tombstone>
+partition_snapshot::range_tombstone_result
 partition_snapshot::range_tombstones(position_in_partition_view start, position_in_partition_view end)
 {
    partition_version* v = &*version();
    if (!v->next()) {
-        return boost::copy_range<std::vector<range_tombstone>>(
+        return boost::copy_range<range_tombstone_result>(
            v->partition().row_tombstones().slice(*_schema, start, end));
    }
    range_tombstone_list list(*_schema);
@@ -557,10 +557,10 @@ partition_snapshot::range_tombstones(position_in_partition_view start, position_
        }
        v = v->next();
    }
-    return boost::copy_range<std::vector<range_tombstone>>(list.slice(*_schema, start, end));
+    return boost::copy_range<range_tombstone_result>(list.slice(*_schema, start, end));
 }

-std::vector<range_tombstone>
+partition_snapshot::range_tombstone_result
 partition_snapshot::range_tombstones()
 {
    return range_tombstones(
--- a/partition_version.hh
+++ b/partition_version.hh
@@ -26,6 +26,7 @@
 #include "utils/anchorless_list.hh"
 #include "utils/logalloc.hh"
 #include "utils/coroutine.hh"
+#include "utils/chunked_vector.hh"

 #include <boost/intrusive/parent_from_member.hpp>
 #include <boost/intrusive/slist.hpp>
@@ -400,10 +401,13 @@ public:
    ::static_row static_row(bool digest_requested) const;
    bool static_row_continuous() const;
    mutation_partition squashed() const;
+
+    using range_tombstone_result = utils::chunked_vector<range_tombstone>;
+
    // Returns range tombstones overlapping with [start, end)
-    std::vector<range_tombstone> range_tombstones(position_in_partition_view start, position_in_partition_view end);
+    range_tombstone_result range_tombstones(position_in_partition_view start, position_in_partition_view end);
    // Returns all range tombstones
-    std::vector<range_tombstone> range_tombstones();
+    range_tombstone_result range_tombstones();
 };

 class partition_snapshot_ptr {
--- a/repair/row_level.cc
+++ b/repair/row_level.cc
@@ -509,7 +509,7 @@ public:
    }
 };

-class repair_writer {
+class repair_writer : public enable_lw_shared_from_this<repair_writer> {
    schema_ptr _schema;
    reader_permit _permit;
    uint64_t _estimated_partitions;
@@ -569,8 +569,9 @@ public:
        table& t = db.local().find_column_family(_schema->id());
        auto [queue_reader, queue_handle] = make_queue_reader(_schema, _permit);
        _mq[node_idx] = std::move(queue_handle);
+        auto writer = shared_from_this();
        _writer_done[node_idx] = mutation_writer::distribute_reader_and_consume_on_shards(_schema, std::move(queue_reader),
-                [&db, reason = this->_reason, estimated_partitions = this->_estimated_partitions] (flat_mutation_reader reader) {
+                [&db, reason = this->_reason, estimated_partitions = this->_estimated_partitions, writer] (flat_mutation_reader reader) {
            auto& t = db.local().find_column_family(reader.schema());
            return db::view::check_needs_view_update_path(_sys_dist_ks->local(), t, reason).then([t = t.shared_from_this(), estimated_partitions, reader = std::move(reader)] (bool use_view_update_path) mutable {
                //FIXME: for better estimations this should be transmitted from remote
@@ -598,13 +599,13 @@ public:
                return consumer(std::move(reader));
            });
        },
-        t.stream_in_progress()).then([this, node_idx] (uint64_t partitions) {
+        t.stream_in_progress()).then([node_idx, writer] (uint64_t partitions) {
            rlogger.debug("repair_writer: keyspace={}, table={}, managed to write partitions={} to sstable",
-                _schema->ks_name(), _schema->cf_name(), partitions);
-        }).handle_exception([this, node_idx] (std::exception_ptr ep) {
+                writer->_schema->ks_name(), writer->_schema->cf_name(), partitions);
+        }).handle_exception([node_idx, writer] (std::exception_ptr ep) {
            rlogger.warn("repair_writer: keyspace={}, table={}, multishard_writer failed: {}",
-                    _schema->ks_name(), _schema->cf_name(), ep);
-            _mq[node_idx]->abort(ep);
+                    writer->_schema->ks_name(), writer->_schema->cf_name(), ep);
+            writer->_mq[node_idx]->abort(ep);
            return make_exception_future<>(std::move(ep));
        });
    }
@@ -718,7 +719,7 @@ private:
    size_t _nr_peer_nodes= 1;
    repair_stats _stats;
    repair_reader _repair_reader;
-    repair_writer _repair_writer;
+    lw_shared_ptr<repair_writer> _repair_writer;
    // Contains rows read from disk
    std::list<repair_row> _row_buf;
    // Contains rows we are working on to sync between peers
@@ -822,7 +823,7 @@ public:
                    _seed,
                    repair_reader::is_local_reader(_repair_master || _same_sharding_config)
              )
-            , _repair_writer(_schema, _permit, _estimated_partitions, _nr_peer_nodes, _reason)
+            , _repair_writer(make_lw_shared<repair_writer>(_schema, _permit, _estimated_partitions, _nr_peer_nodes, _reason))
            , _sink_source_for_get_full_row_hashes(_repair_meta_id, _nr_peer_nodes,
                    [&ms] (uint32_t repair_meta_id, netw::messaging_service::msg_addr addr) {
                        return ms.local().make_sink_and_source_for_repair_get_full_row_hashes_with_rpc_stream(repair_meta_id, addr);
@@ -855,7 +856,7 @@ public:
        auto f2 = _sink_source_for_get_row_diff.close();
        auto f3 = _sink_source_for_put_row_diff.close();
        return when_all_succeed(std::move(gate_future), std::move(f1), std::move(f2), std::move(f3)).discard_result().finally([this] {
-            return _repair_writer.wait_for_writer_done();
+            return _repair_writer->wait_for_writer_done();
        });
    }

@@ -1340,8 +1341,8 @@ private:

    future<> do_apply_rows(std::list<repair_row>&& row_diff, unsigned node_idx, update_working_row_buf update_buf) {
        return do_with(std::move(row_diff), [this, node_idx, update_buf] (std::list<repair_row>& row_diff) {
-            return with_semaphore(_repair_writer.sem(), 1, [this, node_idx, update_buf, &row_diff] {
-                _repair_writer.create_writer(_db, node_idx);
+            return with_semaphore(_repair_writer->sem(), 1, [this, node_idx, update_buf, &row_diff] {
+                _repair_writer->create_writer(_db, node_idx);
                return repeat([this, node_idx, update_buf, &row_diff] () mutable {
                    if (row_diff.empty()) {
                        return make_ready_future<stop_iteration>(stop_iteration::yes);
@@ -1355,7 +1356,7 @@ private:
                    // to_repair_rows_list above where the repair_row is created.
                    mutation_fragment mf = std::move(r.get_mutation_fragment());
                    auto dk_with_hash = r.get_dk_with_hash();
-                    return _repair_writer.do_write(node_idx, std::move(dk_with_hash), std::move(mf)).then([&row_diff] {
+                    return _repair_writer->do_write(node_idx, std::move(dk_with_hash), std::move(mf)).then([&row_diff] {
                        row_diff.pop_front();
                        return make_ready_future<stop_iteration>(stop_iteration::no);
                    });
--- a/row_cache.cc
+++ b/row_cache.cc
@@ -1263,7 +1263,9 @@ flat_mutation_reader cache_entry::read(row_cache& rc, read_context& reader, row_
 // Assumes reader is in the corresponding partition
 flat_mutation_reader cache_entry::do_read(row_cache& rc, read_context& reader) {
    auto snp = _pe.read(rc._tracker.region(), rc._tracker.cleaner(), _schema, &rc._tracker, reader.phase());
-    auto ckr = query::clustering_key_filter_ranges::get_ranges(*_schema, reader.slice(), _key.key());
+    auto ckr = with_linearized_managed_bytes([&] {
+        return query::clustering_key_filter_ranges::get_ranges(*_schema, reader.slice(), _key.key());
+    });
    auto r = make_cache_flat_mutation_reader(_schema, _key, std::move(ckr), rc, reader.shared_from_this(), std::move(snp));
    r.upgrade_schema(rc.schema());
    r.upgrade_schema(reader.schema());
--- a/2
+++ b/2
--- a/service/storage_proxy.cc
+++ b/service/storage_proxy.cc
@@ -4933,10 +4933,12 @@ void storage_proxy::init_messaging_service() {
            tracing::trace(trace_state_ptr, "read_data: message received from /{}", src_addr.addr);
        }
        auto da = oda.value_or(query::digest_algorithm::MD5);
+        auto sp = get_local_shared_storage_proxy();
        if (!cmd.max_result_size) {
-            cmd.max_result_size.emplace(cinfo.retrieve_auxiliary<uint64_t>("max_result_size"));
+            auto& cfg = sp->_db.local().get_config();
+            cmd.max_result_size.emplace(cfg.max_memory_for_unlimited_query_soft_limit(), cfg.max_memory_for_unlimited_query_hard_limit());
        }
-        return do_with(std::move(pr), get_local_shared_storage_proxy(), std::move(trace_state_ptr), [&cinfo, cmd = make_lw_shared<query::read_command>(std::move(cmd)), src_addr = std::move(src_addr), da, t] (::compat::wrapping_partition_range& pr, shared_ptr<storage_proxy>& p, tracing::trace_state_ptr& trace_state_ptr) mutable {
+        return do_with(std::move(pr), std::move(sp), std::move(trace_state_ptr), [&cinfo, cmd = make_lw_shared<query::read_command>(std::move(cmd)), src_addr = std::move(src_addr), da, t] (::compat::wrapping_partition_range& pr, shared_ptr<storage_proxy>& p, tracing::trace_state_ptr& trace_state_ptr) mutable {
            p->get_stats().replica_data_reads++;
            auto src_ip = src_addr.addr;
            return get_schema_for_read(cmd->schema_version, std::move(src_addr), p->_messaging).then([cmd, da, &pr, &p, &trace_state_ptr, t] (schema_ptr s) {
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -298,7 +298,7 @@ void storage_service::prepare_to_join(
        _token_metadata.update_normal_tokens(my_tokens, get_broadcast_address());

        _cdc_streams_ts = db::system_keyspace::get_saved_cdc_streams_timestamp().get0();
-        if (!_cdc_streams_ts && db().local().get_config().check_experimental(db::experimental_features_t::CDC)) {
+        if (!_cdc_streams_ts) {
            // We could not have completed joining if we didn't generate and persist a CDC streams timestamp,
            // unless we are restarting after upgrading from non-CDC supported version.
            // In that case we won't begin a CDC generation: it should be done by one of the nodes
@@ -550,7 +550,7 @@ void storage_service::join_token_ring(int delay) {
        assert(should_bootstrap() || db().local().is_replacing() || !_cdc_streams_ts);
    }

-    if (!_cdc_streams_ts && db().local().get_config().check_experimental(db::experimental_features_t::CDC)) {
+    if (!_cdc_streams_ts) {
        // If we didn't choose a CDC streams timestamp at this point, then either
        // 1. we're replacing a node which didn't gossip a CDC streams timestamp for whatever reason,
        // 2. we've already bootstrapped, but are upgrading from a non-CDC version,
@@ -570,10 +570,15 @@ void storage_service::join_token_ring(int delay) {
        if (!db().local().is_replacing()
                && (!db::system_keyspace::bootstrap_complete()
                    || cdc::should_propose_first_generation(get_broadcast_address(), _gossiper))) {
-
-            _cdc_streams_ts = cdc::make_new_cdc_generation(db().local().get_config(),
-                    _bootstrap_tokens, _token_metadata, _gossiper,
-                    _sys_dist_ks.local(), get_ring_delay(), _for_testing);
+            try {
+                _cdc_streams_ts = cdc::make_new_cdc_generation(db().local().get_config(),
+                        _bootstrap_tokens, _token_metadata, _gossiper,
+                        _sys_dist_ks.local(), get_ring_delay(), _for_testing);
+            } catch (...) {
+                cdc_log.warn(
+                    "Could not create a new CDC generation: {}. This may make it impossible to use CDC. Use nodetool checkAndRepairCdcStreams to fix CDC generation",
+                    std::current_exception());
+            }
        }
    }

@@ -893,24 +898,18 @@ void storage_service::bootstrap() {
        // It doesn't hurt: other nodes will (potentially) just do more generation switches.
        // We do this because with this new attempt at bootstrapping we picked a different set of tokens.

-        if (db().local().get_config().check_experimental(db::experimental_features_t::CDC)) {
-            // Update pending ranges now, so we correctly count ourselves as a pending replica
-            // when inserting the new CDC generation.
-            _token_metadata.add_bootstrap_tokens(_bootstrap_tokens, get_broadcast_address());
-            update_pending_ranges().get();
+        // Update pending ranges now, so we correctly count ourselves as a pending replica
+        // when inserting the new CDC generation.
+        _token_metadata.add_bootstrap_tokens(_bootstrap_tokens, get_broadcast_address());
+        update_pending_ranges().get();

-            // After we pick a generation timestamp, we start gossiping it, and we stick with it.
-            // We don't do any other generation switches (unless we crash before complecting bootstrap).
-            assert(!_cdc_streams_ts);
+        // After we pick a generation timestamp, we start gossiping it, and we stick with it.
+        // We don't do any other generation switches (unless we crash before complecting bootstrap).
+        assert(!_cdc_streams_ts);

-            _cdc_streams_ts = cdc::make_new_cdc_generation(db().local().get_config(),
-                    _bootstrap_tokens, _token_metadata, _gossiper,
-                    _sys_dist_ks.local(), get_ring_delay(), _for_testing);
-        } else {
-            // We should not be able to join the cluster if other nodes support CDC but we don't.
-            // The check should have been made somewhere in prepare_to_join (`check_knows_remote_features`).
-            assert(!_feature_service.cluster_supports_cdc());
-        }
+        _cdc_streams_ts = cdc::make_new_cdc_generation(db().local().get_config(),
+                _bootstrap_tokens, _token_metadata, _gossiper,
+                _sys_dist_ks.local(), get_ring_delay(), _for_testing);

        _gossiper.add_local_application_state({
            // Order is important: both the CDC streams timestamp and tokens must be known when a node handles our status.
@@ -2036,9 +2035,8 @@ future<> storage_service::start_gossiping(bind_messaging_port do_bind) {
        return seastar::async([&ss, do_bind] {
            if (!ss._initialized) {
                slogger.warn("Starting gossip by operator request");
-                bool cdc_enabled = ss.db().local().get_config().check_experimental(db::experimental_features_t::CDC);
                ss.set_gossip_tokens(db::system_keyspace::get_local_tokens().get0(),
-                        cdc_enabled ? std::make_optional(cdc::get_local_streams_timestamp().get0()) : std::nullopt);
+                        std::make_optional(cdc::get_local_streams_timestamp().get0()));
                ss._gossiper.force_newer_generation();
                ss._gossiper.start_gossiping(utils::get_generation_number(), gms::bind_messaging_port(bool(do_bind))).then([&ss] {
                    ss._initialized = true;
--- a/sstables/compaction.cc
+++ b/sstables/compaction.cc
@@ -212,16 +212,18 @@ public:
 };

 struct compaction_writer {
+    shared_sstable sst;
    // We use a ptr for pointer stability and so that it can be null
    // when using a noop monitor.
    sstable_writer writer;
    // The order in here is important. A monitor must be destroyed before the writer it is monitoring since it has a
    // periodic timer that checks the writer.
+    // The writer must be destroyed before the shared_sstable since the it may depend on the sstable
+    // (as in the mx::writer over compressed_file_data_sink_impl case that depends on sstables::compression).
    std::unique_ptr<compaction_write_monitor> monitor;
-    shared_sstable sst;

    compaction_writer(std::unique_ptr<compaction_write_monitor> monitor, sstable_writer writer, shared_sstable sst)
-        : writer(std::move(writer)), monitor(std::move(monitor)), sst(std::move(sst)) {}
+        : sst(std::move(sst)), writer(std::move(writer)), monitor(std::move(monitor)) {}
    compaction_writer(sstable_writer writer, shared_sstable sst)
        : compaction_writer(nullptr, std::move(writer), std::move(sst)) {}
 };
@@ -609,10 +611,12 @@ private:
                                         std::move(gc_consumer));

            return seastar::async([cfc = std::move(cfc), reader = std::move(reader), this] () mutable {
-                reader.consume_in_thread(std::move(cfc), make_partition_filter(), db::no_timeout);
+                reader.consume_in_thread(std::move(cfc), db::no_timeout);
            });
        });
-        return consumer(make_sstable_reader());
+        // producer will filter out a partition before it reaches the consumer(s)
+        auto producer = make_filtering_reader(make_sstable_reader(), make_partition_filter());
+        return consumer(std::move(producer));
    }

    virtual reader_consumer make_interposer_consumer(reader_consumer end_consumer) {
--- a/sstables/mp_row_consumer.hh
+++ b/sstables/mp_row_consumer.hh
@@ -378,6 +378,7 @@ private:
        _fwd_end = _fwd ? position_in_partition::before_all_clustered_rows() : position_in_partition::after_all_clustered_rows();
        _out_of_range = false;
        _range_tombstones.reset();
+        _ready = {};
        _first_row_encountered = false;
    }
 public:
--- a/sstables/time_window_compaction_strategy.cc
+++ b/sstables/time_window_compaction_strategy.cc
@@ -162,7 +162,7 @@ time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> i
    for (auto& pair : all_buckets.first) {
        auto ssts = std::move(pair.second);
        if (ssts.size() > offstrategy_threshold) {
-            ssts.resize(std::min(multi_window.size(), max_sstables));
+            ssts.resize(std::min(ssts.size(), max_sstables));
            compaction_descriptor desc(std::move(ssts), std::optional<sstables::sstable_set>(), iop);
            desc.options = compaction_options::make_reshape();
            return desc;
--- a/streaming/stream_session.cc
+++ b/streaming/stream_session.cc
@@ -403,7 +403,7 @@ future<prepare_message> stream_session::prepare(std::vector<stream_request> requ
            try {
                db.find_column_family(ks, cf);
            } catch (no_such_column_family&) {
-                auto err = format("[Stream #{{}}] prepare requested ks={{}} cf={{}} does not exist", ks, cf);
+                auto err = format("[Stream #{{}}] prepare requested ks={{}} cf={{}} does not exist", plan_id, ks, cf);
                sslog.warn(err.c_str());
                throw std::runtime_error(err);
            }
--- a/table.cc
+++ b/table.cc
@@ -832,7 +832,7 @@ table::stop() {
        return make_ready_future<>();
    }
    return _async_gate.close().then([this] {
-        return when_all(await_pending_writes(), await_pending_reads(), await_pending_streams()).discard_result().finally([this] {
+        return await_pending_ops().finally([this] {
            return _memtables->request_flush().finally([this] {
                return _compaction_manager.remove(this).then([this] {
                    // Nest, instead of using when_all, so we don't lose any exceptions.
@@ -1532,7 +1532,8 @@ future<std::unordered_map<sstring, table::snapshot_details>> table::get_snapshot
 }

 future<> table::flush() {
-    return _memtables->request_flush();
+    auto op = _pending_flushes_phaser.start();
+    return _memtables->request_flush().then([op = std::move(op)] {});
 }

 // FIXME: We can do much better than this in terms of cache management. Right
--- a/test/alternator/run
+++ b/test/alternator/run
@@ -86,7 +86,7 @@ ln -s "$SCYLLA" "$SCYLLA_LINK"
        --alternator-write-isolation=always_use_lwt \
        --alternator-streams-time-window-s=0 \
        --developer-mode=1 \
-        --experimental-features=cdc \
+        --experimental-features=alternator-streams \
        --ring-delay-ms 0 --collectd 0 \
        --smp 2 -m 1G \
        --overprovisioned --unsafe-bypass-fsync 1 \
--- a/test/alternator/test_condition_expression.py
+++ b/test/alternator/test_condition_expression.py
@@ -136,7 +136,7 @@ def test_update_condition_eq_different(test_table_s):
                        ConditionExpression='a = :val2',
                        ExpressionAttributeValues={':val1': val1, ':val2': val2})

-# Also check an actual case of same time, but inequality.
+# Also check an actual case of same type, but inequality.
 def test_update_condition_eq_unequal(test_table_s):
    p = random_string()
    test_table_s.update_item(Key={'p': p},
@@ -146,6 +146,13 @@ def test_update_condition_eq_unequal(test_table_s):
            UpdateExpression='SET a = :val1',
            ConditionExpression='a = :oldval',
            ExpressionAttributeValues={':val1': 3, ':oldval': 2})
+    # If the attribute being compared doesn't exist, it's considered a failed
+    # condition, not an error:
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET a = :val1',
+            ConditionExpression='q = :oldval',
+            ExpressionAttributeValues={':val1': 3, ':oldval': 2})

 # Check that set equality is checked correctly. Unlike string equality (for
 # example), it cannot be done with just naive string comparison of the JSON
@@ -269,15 +276,44 @@ def test_update_condition_lt(test_table_s):
            UpdateExpression='SET z = :newval',
            ConditionExpression='a < :oldval',
            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
-    # Trying to compare an unsupported type - e.g., in the following test
-    # a boolean, is unfortunately caught by boto3 and cannot be tested here...
-    #test_table_s.update_item(Key={'p': p},
-    #    AttributeUpdates={'d': {'Value': False, 'Action': 'PUT'}})
-    #with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
-    #    test_table_s.update_item(Key={'p': p},
-    #        UpdateExpression='SET z = :newval',
-    #        ConditionExpression='d < :oldval',
-    #        ExpressionAttributeValues={':newval': 2, ':oldval': True})
+    # If the attribute being compared doesn't even exist, this is also
+    # considered as a false condition - not an error.
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='q < :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval < q',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    # If a comparison parameter comes from a constant specified in the query,
+    # and it has a type not supported by the comparison (e.g., a list), it's
+    # not just a failed comparison - it is considered a ValidationException
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a < :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval < a',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    # However, if when the wrong type comes from an item attribute, not the
+    # query, the comparison is simply false - not a ValidationException.
+    test_table_s.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': [1,2,3], 'Action': 'PUT'}})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='x < :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval < x',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 4

 # Test for ConditionExpression with operator "<="
@@ -341,6 +377,44 @@ def test_update_condition_le(test_table_s):
            UpdateExpression='SET z = :newval',
            ConditionExpression='a <= :oldval',
            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    # If the attribute being compared doesn't even exist, this is also
+    # considered as a false condition - not an error.
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='q <= :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval <= q',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    # If a comparison parameter comes from a constant specified in the query,
+    # and it has a type not supported by the comparison (e.g., a list), it's
+    # not just a failed comparison - it is considered a ValidationException
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a <= :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval <= a',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    # However, if when the wrong type comes from an item attribute, not the
+    # query, the comparison is simply false - not a ValidationException.
+    test_table_s.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': [1,2,3], 'Action': 'PUT'}})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='x <= :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval <= x',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 7

 # Test for ConditionExpression with operator ">"
@@ -404,6 +478,44 @@ def test_update_condition_gt(test_table_s):
            UpdateExpression='SET z = :newval',
            ConditionExpression='a > :oldval',
            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    # If the attribute being compared doesn't even exist, this is also
+    # considered as a false condition - not an error.
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='q > :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval > q',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    # If a comparison parameter comes from a constant specified in the query,
+    # and it has a type not supported by the comparison (e.g., a list), it's
+    # not just a failed comparison - it is considered a ValidationException
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a > :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval > a',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    # However, if when the wrong type comes from an item attribute, not the
+    # query, the comparison is simply false - not a ValidationException.
+    test_table_s.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': [1,2,3], 'Action': 'PUT'}})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='x > :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval > x',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 4

 # Test for ConditionExpression with operator ">="
@@ -467,6 +579,44 @@ def test_update_condition_ge(test_table_s):
            UpdateExpression='SET z = :newval',
            ConditionExpression='a >= :oldval',
            ExpressionAttributeValues={':newval': 2, ':oldval': '0'})
+    # If the attribute being compared doesn't even exist, this is also
+    # considered as a false condition - not an error.
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='q >= :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval >= q',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    # If a comparison parameter comes from a constant specified in the query,
+    # and it has a type not supported by the comparison (e.g., a list), it's
+    # not just a failed comparison - it is considered a ValidationException
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a >= :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval >= a',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    # However, if when the wrong type comes from an item attribute, not the
+    # query, the comparison is simply false - not a ValidationException.
+    test_table_s.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': [1,2,3], 'Action': 'PUT'}})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='x >= :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval >= x',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 7

 # Test for ConditionExpression with ternary operator "BETWEEN" (checking
@@ -548,6 +698,60 @@ def test_update_condition_between(test_table_s):
            UpdateExpression='SET z = :newval',
            ConditionExpression='a BETWEEN :oldval1 AND :oldval2',
            ExpressionAttributeValues={':newval': 2, ':oldval1': '0', ':oldval2': '2'})
+    # If the attribute being compared doesn't even exist, this is also
+    # considered as a false condition - not an error.
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='q BETWEEN :oldval1 AND :oldval2',
+            ExpressionAttributeValues={':newval': 2, ':oldval1': b'dog', ':oldval2': b'zebra'})
+    # If and operand from the query, and it has a type not supported by the
+    # comparison (e.g., a list), it's not just a failed condition - it is
+    # considered a ValidationException
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN :oldval1 AND :oldval2',
+            ExpressionAttributeValues={':newval': 2, ':oldval1': [1,2], ':oldval2': [2,3]})
+    # However, if when the wrong type comes from an item attribute, not the
+    # query, the comparison is simply false - not a ValidationException.
+    test_table_s.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': [1,2,3], 'Action': 'PUT'},
+                                                             'y': {'Value': [2,3,4], 'Action': 'PUT'}})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN x and y',
+            ExpressionAttributeValues={':newval': 2})
+    # If the two operands come from the query (":val" references) then if they
+    # have different types or the wrong order, this is a ValidationException.
+    # But if one or more of the operands come from the item, this only causes
+    # a false condition - not a ValidationException.
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN :oldval1 AND :oldval2',
+            ExpressionAttributeValues={':newval': 2, ':oldval1': 2, ':oldval2': 1})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN :oldval1 AND :oldval2',
+            ExpressionAttributeValues={':newval': 2, ':oldval1': 2, ':oldval2': 'dog'})
+    test_table_s.update_item(Key={'p': p}, AttributeUpdates={'two': {'Value': 2, 'Action': 'PUT'}})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN two AND :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN :oldval AND two',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 3})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN two AND :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 'dog'})
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 9

 # Test for ConditionExpression with multi-operand operator "IN", checking
@@ -605,6 +809,13 @@ def test_update_condition_in(test_table_s):
            UpdateExpression='SET c = :val37',
            ConditionExpression='a IN ()',
            ExpressionAttributeValues=values)
+    # If the attribute being compared doesn't even exist, this is also
+    # considered as a false condition - not an error.
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET c = :val37',
+            ConditionExpression='q IN ({})'.format(','.join(values.keys())),
+            ExpressionAttributeValues=values)

 # Beyond the above operators, there are also test functions supported -
 # attribute_exists, attribute_not_exists, attribute_type, begins_with,
--- a/test/alternator/test_expected.py
+++ b/test/alternator/test_expected.py
@@ -237,6 +237,30 @@ def test_update_expected_1_le(test_table_s):
                            'AttributeValueList': [2, 3]}}
        )

+# Comparison operators like le work only on numbers, strings or bytes.
+# As noted in issue #8043, if any other type is included in *the query*,
+# the result should be a ValidationException, but if the wrong type appears
+# in the item, not the query, the result is a failed condition.
+def test_update_expected_1_le_validation(test_table_s):
+    p = random_string()
+    test_table_s.update_item(Key={'p': p},
+        AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
+                          'b': {'Value': [1,2], 'Action': 'PUT'}})
+    # Bad type (a list) in the query. Result is ValidationException.
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
+            Expected={'a': {'ComparisonOperator': 'LE',
+                            'AttributeValueList': [[1,2,3]]}}
+        )
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
+            Expected={'b': {'ComparisonOperator': 'LE',
+                            'AttributeValueList': [3]}}
+        )
+    assert not 'z' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']
+
 # Tests for Expected with ComparisonOperator = "LT":
 def test_update_expected_1_lt(test_table_s):
    p = random_string()
@@ -894,6 +918,34 @@ def test_update_expected_1_between(test_table_s):
            AttributeUpdates={'z': {'Value': 2, 'Action': 'PUT'}},
            Expected={'d': {'ComparisonOperator': 'BETWEEN', 'AttributeValueList': [set([1]), set([2])]}})

+# BETWEEN work only on numbers, strings or bytes. As noted in issue #8043,
+# if any other type is included in *the query*, the result should be a
+# ValidationException, but if the wrong type appears in the item, not the
+# query, the result is a failed condition.
+# BETWEEN should also generate ValidationException if the two ends of the
+# range are not of the same type or not in the correct order, but this
+# already is tested in the test above (test_update_expected_1_between).
+def test_update_expected_1_between_validation(test_table_s):
+    p = random_string()
+    test_table_s.update_item(Key={'p': p},
+        AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
+                          'b': {'Value': [1,2], 'Action': 'PUT'}})
+    # Bad type (a list) in the query. Result is ValidationException.
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
+            Expected={'a': {'ComparisonOperator': 'BETWEEN',
+                            'AttributeValueList': [[1,2,3], [2,3,4]]}}
+        )
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
+            Expected={'b': {'ComparisonOperator': 'BETWEEN',
+                            'AttributeValueList': [1,2]}}
+        )
+    assert not 'z' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']
+
+
 ##############################################################################
 # Instead of ComparisonOperator and AttributeValueList, one can specify either
 # Value or Exists:
--- a/test/alternator/test_filter_expression.py
+++ b/test/alternator/test_filter_expression.py
@@ -235,6 +235,30 @@ def test_filter_expression_ge(test_table_sn_with_data):
        expected_items = [item for item in items if item[xn] >= xv]
        assert(got_items == expected_items)

+# Comparison operators such as >= or BETWEEN only work on numbers, strings or
+# bytes. When an expression's operands come from the item and has a wrong type
+# (e.g., a list), the result is that the item is skipped - aborting the scan
+# with a ValidationException is a bug (this was issue #8043).
+def test_filter_expression_le_bad_type(test_table_sn_with_data):
+    table, p, items = test_table_sn_with_data
+    got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression='l <= :xv',
+        ExpressionAttributeValues={':p': p, ':xv': 3})
+    assert got_items == []
+    got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression=':xv <= l',
+        ExpressionAttributeValues={':p': p, ':xv': 3})
+    assert got_items == []
+def test_filter_expression_between_bad_type(test_table_sn_with_data):
+    table, p, items = test_table_sn_with_data
+    got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression='s between :xv and l',
+        ExpressionAttributeValues={':p': p, ':xv': 'cat'})
+    assert got_items == []
+    got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression='s between l and :xv',
+        ExpressionAttributeValues={':p': p, ':xv': 'cat'})
+    assert got_items == []
+    got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression='s between i and :xv',
+        ExpressionAttributeValues={':p': p, ':xv': 'cat'})
+    assert got_items == []
+
 # Test the "BETWEEN/AND" ternary operator on a numeric, string and bytes
 # attribute. These keywords are case-insensitive.
 def test_filter_expression_between(test_table_sn_with_data):
@@ -658,7 +682,6 @@ def test_filter_expression_and_sort_key_condition(test_table_sn_with_data):
 # In particular, test that FilterExpression may inspect attributes which will
 # not be returned by the query, because of the ProjectionExpression.
 # This test reproduces issue #6951.
-@pytest.mark.xfail(reason="issue #6951: cannot filter on non-returned attributes")
 def test_filter_expression_and_projection_expression(test_table):
    p = random_string()
    test_table.put_item(Item={'p': p, 'c': 'hi', 'x': 'dog', 'y': 'cat'})
--- a/test/alternator/test_query.py
+++ b/test/alternator/test_query.py
@@ -386,3 +386,38 @@ def test_query_missing_key(test_table):
        full_query(test_table, KeyConditions={})
    with pytest.raises(ClientError, match='ValidationException'):
        full_query(test_table)
+
+# The paging tests above used a numeric sort key. Let's now also test paging
+# with a bytes sort key. We already have above a test that bytes sort keys
+# work and are sorted correctly (test_query_sort_order_bytes), but the
+# following test adds a check that *paging* works correctly for such keys.
+# We used to have a bug in this (issue #7768) - the returned LastEvaluatedKey
+# was incorrectly formatted, breaking the boto3's parsing of the response.
+# Note we only check the case of bytes *sort* keys in this test. For bytes
+# *partition* keys, see test_scan_paging_bytes().
+def test_query_paging_bytes(test_table_sb):
+    p = random_string()
+    items = [{'p': p, 'c': random_bytes()} for i in range(10)]
+    with test_table_sb.batch_writer() as batch:
+        for item in items:
+            batch.put_item(item)
+    # Deliberately pass Limit=1 to enforce paging even though we have
+    # just 10 items in the partition.
+    got_items = full_query(test_table_sb, Limit=1,
+        KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})
+    got_sort_keys = [x['c'] for x in got_items]
+    expected_sort_keys = sorted(x['c'] for x in items)
+    assert got_sort_keys == expected_sort_keys
+
+# Similar for test for string clustering keys
+def test_query_paging_string(test_table_ss):
+    p = random_string()
+    items = [{'p': p, 'c': random_string()} for i in range(10)]
+    with test_table_ss.batch_writer() as batch:
+        for item in items:
+            batch.put_item(item)
+    got_items = full_query(test_table_ss, Limit=1,
+        KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})
+    got_sort_keys = [x['c'] for x in got_items]
+    expected_sort_keys = sorted(x['c'] for x in items)
+    assert got_sort_keys == expected_sort_keys
--- a/test/alternator/test_query_filter.py
+++ b/test/alternator/test_query_filter.py
@@ -539,7 +539,6 @@ def test_query_filter_paging(test_table_sn_with_data):
 # In particular, test that QueryFilter may inspect attributes which will
 # not be returned by the query, because the AttributesToGet.
 # This test reproduces issue #6951.
-@pytest.mark.xfail(reason="issue #6951: cannot filter on non-returned attributes")
 def test_query_filter_and_attributes_to_get(test_table):
    p = random_string()
    test_table.put_item(Item={'p': p, 'c': 'hi', 'x': 'dog', 'y': 'cat'})
--- a/test/alternator/test_scan.py
+++ b/test/alternator/test_scan.py
@@ -19,7 +19,7 @@

 import pytest
 from botocore.exceptions import ClientError
-from util import random_string, full_scan, full_scan_and_count, multiset
+from util import random_string, random_bytes, full_scan, full_scan_and_count, multiset
 from boto3.dynamodb.conditions import Attr

 # Test that scanning works fine with/without pagination
@@ -264,3 +264,20 @@ def test_scan_parallel_incorrect(filled_test_table):
    for segment in [7, 9]:
        with pytest.raises(ClientError, match='ValidationException.*Segment'):
            full_scan(test_table, TotalSegments=5, Segment=segment)
+
+# We used to have a bug with formatting of LastEvaluatedKey in the response
+# of Query and Scan with bytes keys (issue #7768). In test_query_paging_byte()
+# (test_query.py) we tested the case of bytes *sort* keys. In the following
+# test we check bytes *partition* keys.
+def test_scan_paging_bytes(test_table_b):
+    # We will not Scan the entire table - we have no idea what it contains.
+    # But we don't need to scan the entire table - we just need the table
+    # to contain at least two items, and then Scan it with Limit=1 and stop
+    # after one page. Before #7768 was fixed, the test failed when the
+    # LastEvaluatedKey in the response could not be parsed.
+    items = [{'p': random_bytes()}, {'p': random_bytes()}]
+    with test_table_b.batch_writer() as batch:
+        for item in items:
+            batch.put_item(item)
+    response = test_table_b.scan(ConsistentRead=True, Limit=1)
+    assert 'LastEvaluatedKey' in response
--- a/test/alternator/test_system_tables.py
+++ b/test/alternator/test_system_tables.py
@@ -41,8 +41,10 @@ def test_fetch_from_system_tables(scylla_only, dynamodb):

        key_columns = [item['column_name'] for item in col_response['Items'] if item['kind'] == 'clustering' or item['kind'] == 'partition_key']
        qualified_name = "{}{}.{}".format(internal_prefix, ks_name, table_name)
-        response = client.scan(TableName=qualified_name, AttributesToGet=key_columns)
-        print(ks_name, table_name, response)
+        import time
+        start = time.time()
+        response = client.scan(TableName=qualified_name, AttributesToGet=key_columns, Limit=50)
+        print(ks_name, table_name, len(str(response)), time.time()-start)

 def test_block_access_to_non_system_tables_with_virtual_interface(scylla_only, test_table_s, dynamodb):
    client = dynamodb.meta.client
--- a/test/alternator/test_update_expression.py
+++ b/test/alternator/test_update_expression.py
@@ -659,6 +659,24 @@ def test_update_expression_add_numbers(test_table_s):
            UpdateExpression='ADD b :val1',
            ExpressionAttributeValues={':val1': 1})

+# In test_update_expression_add_numbers() above we tested ADDing a number to
+# an existing number. The following test check that ADD can be used to
+# create a *new* number, as if it was added to zero.
+def test_update_expression_add_numbers_new(test_table_s):
+    # Test that "ADD" can create a new number attribute:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hello'})
+    test_table_s.update_item(Key={'p': p},
+        UpdateExpression='ADD b :val1',
+        ExpressionAttributeValues={':val1': 7})
+    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == 7
+    # Test that "ADD" can create an entirely new item:
+    p = random_string()
+    test_table_s.update_item(Key={'p': p},
+        UpdateExpression='ADD b :val1',
+        ExpressionAttributeValues={':val1': 8})
+    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == 8
+
 # Test "ADD" operation for sets
 def test_update_expression_add_sets(test_table_s):
    p = random_string()
@@ -687,6 +705,24 @@ def test_update_expression_add_sets(test_table_s):
            UpdateExpression='ADD a :val1',
            ExpressionAttributeValues={':val1': 'hello'})

+# In test_update_expression_add_sets() above we tested ADDing elements to an
+# existing set. The following test checks that ADD can be used to create a
+# *new* set, by adding its first item.
+def test_update_expression_add_sets_new(test_table_s):
+    # Test that "ADD" can create a new set attribute:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hello'})
+    test_table_s.update_item(Key={'p': p},
+        UpdateExpression='ADD b :val1',
+        ExpressionAttributeValues={':val1': set(['dog'])})
+    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == set(['dog'])
+    # Test that "ADD" can create an entirely new item:
+    p = random_string()
+    test_table_s.update_item(Key={'p': p},
+        UpdateExpression='ADD b :val1',
+        ExpressionAttributeValues={':val1': set(['cat'])})
+    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == set(['cat'])
+
 # Test "DELETE" operation for sets
 def test_update_expression_delete_sets(test_table_s):
    p = random_string()
--- a/test/boost/cdc_generation_test.cc
+++ b/test/boost/cdc_generation_test.cc
@@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2021 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define BOOST_TEST_MODULE core
+
+#include <boost/test/unit_test.hpp>
+#include <vector>
+
+#include "cdc/generation.hh"
+#include "test/lib/random_utils.hh"
+
+namespace cdc {
+
+size_t limit_of_streams_in_topology_description();
+topology_description limit_number_of_streams_if_needed(topology_description&& desc);
+
+} // namespace cdc
+
+static cdc::topology_description create_description(const std::vector<size_t>& streams_count_per_vnode) {
+    std::vector<cdc::token_range_description> result;
+    result.reserve(streams_count_per_vnode.size());
+    size_t vnode_index = 0;
+    int64_t token = std::numeric_limits<int64_t>::min() + 100;
+    for (size_t streams_count : streams_count_per_vnode) {
+        std::vector<cdc::stream_id> streams(streams_count);
+        token += 500;
+        for (size_t idx = 0; idx < streams_count; ++idx) {
+            streams[idx] = cdc::stream_id{dht::token::from_int64(token), vnode_index};
+            token += 100;
+        }
+        token += 10000;
+        // sharding_ignore_msb should not matter for limit_number_of_streams_if_needed
+        // so we're using sharding_ignore_msb equal to 12.
+        result.push_back(
+                cdc::token_range_description{dht::token::from_int64(token), std::move(streams), uint8_t{12}});
+        ++vnode_index;
+    }
+    return cdc::topology_description(std::move(result));
+}
+
+static void assert_streams_count(const cdc::topology_description& desc, const std::vector<size_t>& expected_count) {
+    BOOST_REQUIRE_EQUAL(expected_count.size(), desc.entries().size());
+
+    for (size_t idx = 0; idx < expected_count.size(); ++idx) {
+        BOOST_REQUIRE_EQUAL(expected_count[idx], desc.entries()[idx].streams.size());
+    }
+}
+
+static void assert_stream_ids_in_right_token_ranges(const cdc::topology_description& desc) {
+    dht::token start = desc.entries().back().token_range_end;
+    dht::token end = desc.entries().front().token_range_end;
+    for (auto& stream : desc.entries().front().streams) {
+        dht::token t = stream.token();
+        if (t > end) {
+            BOOST_REQUIRE(start < t);
+        } else {
+            BOOST_REQUIRE(t <= end);
+        }
+    }
+    for (size_t idx = 1; idx < desc.entries().size(); ++idx) {
+        for (auto& stream : desc.entries()[idx].streams) {
+            BOOST_REQUIRE(desc.entries()[idx - 1].token_range_end < stream.token());
+            BOOST_REQUIRE(stream.token() <= desc.entries()[idx].token_range_end);
+        }
+    }
+
+}
+
+cdc::stream_id get_stream(const std::vector<cdc::token_range_description>& entries, dht::token tok);
+
+static void assert_random_tokens_mapped_to_streams_with_tokens_in_the_same_token_range(const cdc::topology_description& desc) {
+    for (size_t count = 0; count < 100; ++count) {
+        int64_t token_value = tests::random::get_int(std::numeric_limits<int64_t>::min(), std::numeric_limits<int64_t>::max());
+        dht::token t = dht::token::from_int64(token_value);
+        auto stream = get_stream(desc.entries(), t);
+        auto& e = desc.entries().at(stream.index());
+        BOOST_REQUIRE(std::find(e.streams.begin(), e.streams.end(), stream) != e.streams.end());
+        if (stream.index() != 0) {
+            BOOST_REQUIRE(t <= e.token_range_end);
+            BOOST_REQUIRE(t > desc.entries().at(stream.index() - 1).token_range_end);
+        }
+    }
+}
+
+BOOST_AUTO_TEST_CASE(test_cdc_generation_limitting_single_vnode_should_not_limit) {
+    cdc::topology_description given = create_description({cdc::limit_of_streams_in_topology_description()});
+
+    cdc::topology_description result = cdc::limit_number_of_streams_if_needed(std::move(given));
+
+    assert_streams_count(result, {cdc::limit_of_streams_in_topology_description()});
+    assert_stream_ids_in_right_token_ranges(result);
+    assert_random_tokens_mapped_to_streams_with_tokens_in_the_same_token_range(result);
+}
+
+BOOST_AUTO_TEST_CASE(test_cdc_generation_limitting_single_vnode_should_limit) {
+    cdc::topology_description given = create_description({cdc::limit_of_streams_in_topology_description() + 1});
+
+    cdc::topology_description result = cdc::limit_number_of_streams_if_needed(std::move(given));
+
+    assert_streams_count(result, {cdc::limit_of_streams_in_topology_description()});
+    assert_stream_ids_in_right_token_ranges(result);
+    assert_random_tokens_mapped_to_streams_with_tokens_in_the_same_token_range(result);
+}
+
+BOOST_AUTO_TEST_CASE(test_cdc_generation_limitting_multiple_vnodes_should_not_limit) {
+    size_t total = 0;
+    std::vector<size_t> streams_count_per_vnode;
+    size_t count_for_next_vnode = 1;
+    while (total + count_for_next_vnode <= cdc::limit_of_streams_in_topology_description()) {
+        streams_count_per_vnode.push_back(count_for_next_vnode);
+        total += count_for_next_vnode;
+        ++count_for_next_vnode;
+    }
+    cdc::topology_description given = create_description(streams_count_per_vnode);
+
+    cdc::topology_description result = cdc::limit_number_of_streams_if_needed(std::move(given));
+
+    assert_streams_count(result, streams_count_per_vnode);
+    assert_stream_ids_in_right_token_ranges(result);
+    assert_random_tokens_mapped_to_streams_with_tokens_in_the_same_token_range(result);
+}
+
+BOOST_AUTO_TEST_CASE(test_cdc_generation_limitting_multiple_vnodes_should_limit) {
+    size_t total = 0;
+    std::vector<size_t> streams_count_per_vnode;
+    size_t count_for_next_vnode = 1;
+    while (total + count_for_next_vnode <= cdc::limit_of_streams_in_topology_description()) {
+        streams_count_per_vnode.push_back(count_for_next_vnode);
+        total += count_for_next_vnode;
+        ++count_for_next_vnode;
+    }
+    streams_count_per_vnode.push_back(cdc::limit_of_streams_in_topology_description() - total + 1);
+    cdc::topology_description given = create_description(streams_count_per_vnode);
+
+    cdc::topology_description result = cdc::limit_number_of_streams_if_needed(std::move(given));
+
+    assert(streams_count_per_vnode.size() <= cdc::limit_of_streams_in_topology_description());
+    size_t per_vnode_limit = cdc::limit_of_streams_in_topology_description() / streams_count_per_vnode.size();
+    for (auto& count : streams_count_per_vnode) {
+        count = std::min(count, per_vnode_limit);
+    }
+
+    assert_streams_count(result, streams_count_per_vnode);
+    assert_stream_ids_in_right_token_ranges(result);
+    assert_random_tokens_mapped_to_streams_with_tokens_in_the_same_token_range(result);
+}
+
--- a/test/boost/cdc_test.cc
+++ b/test/boost/cdc_test.cc
@@ -42,16 +42,6 @@

 using namespace std::string_literals;

-static cql_test_config mk_cdc_test_config() {
-    auto ext = std::make_shared<db::extensions>();
-    ext->add_schema_extension<cdc::cdc_extension>(cdc::cdc_extension::NAME);
-    auto cfg = ::make_shared<db::config>(std::move(ext));
-    auto features = cfg->experimental_features();
-    features.emplace_back(db::experimental_features_t::CDC);
-    cfg->experimental_features(features);
-    return cql_test_config(std::move(cfg));
-};
-
 namespace cdc {
 api::timestamp_type find_timestamp(const mutation&);
 utils::UUID generate_timeuuid(api::timestamp_type);
@@ -131,7 +121,7 @@ SEASTAR_THREAD_TEST_CASE(test_find_mutation_timestamp) {
        check_stmt("DELETE vut.b FROM t WHERE pk = 0 AND ck = 0");
        check_stmt("DELETE vfut FROM t WHERE pk = 0 AND ck = 0");
        check_stmt("DELETE vstatic FROM t WHERE pk = 0");
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 SEASTAR_THREAD_TEST_CASE(test_generate_timeuuid) {
@@ -199,7 +189,7 @@ SEASTAR_THREAD_TEST_CASE(test_with_cdc_parameter) {
        test("WITH cdc = {'enabled':'false'}", "{'enabled':'true'}", "{'enabled':'false'}", {false}, {true}, {false});
        test("", "{'enabled':'true','preimage':'true','postimage':'true','ttl':'1'}", "{'enabled':'false'}", {false}, {true, true, true, 1}, {false});
        test("WITH cdc = {'enabled':'true','preimage':'true','postimage':'true','ttl':'1'}", "{'enabled':'false'}", "{'enabled':'true','preimage':'false','postimage':'true','ttl':'2'}", {true, true, true, 1}, {false}, {true, false, true, 2});
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 SEASTAR_THREAD_TEST_CASE(test_detecting_conflict_of_cdc_log_table_with_existing_table) {
@@ -213,7 +203,7 @@ SEASTAR_THREAD_TEST_CASE(test_detecting_conflict_of_cdc_log_table_with_existing_
        e.execute_cql("CREATE TABLE ks.tbl (a int PRIMARY KEY)").get();
        e.require_table_exists("ks", "tbl").get();
        BOOST_REQUIRE_THROW(e.execute_cql("ALTER TABLE ks.tbl WITH cdc = {'enabled': true}").get(), exceptions::invalid_request_exception);
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 SEASTAR_THREAD_TEST_CASE(test_permissions_of_cdc_log_table) {
@@ -247,7 +237,7 @@ SEASTAR_THREAD_TEST_CASE(test_permissions_of_cdc_log_table) {

        // Disallow DROP
        assert_unauthorized("DROP TABLE " + log_table);
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 SEASTAR_THREAD_TEST_CASE(test_disallow_cdc_on_materialized_view) {
@@ -257,7 +247,7 @@ SEASTAR_THREAD_TEST_CASE(test_disallow_cdc_on_materialized_view) {

        BOOST_REQUIRE_THROW(e.execute_cql("CREATE MATERIALIZED VIEW ks.mv AS SELECT a FROM ks.tbl PRIMARY KEY (a) WITH cdc = {'enabled': true}").get(), exceptions::invalid_request_exception);
        e.require_table_does_not_exist("ks", "mv").get();
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 SEASTAR_THREAD_TEST_CASE(test_permissions_of_cdc_description) {
@@ -285,7 +275,7 @@ SEASTAR_THREAD_TEST_CASE(test_permissions_of_cdc_description) {

        test_table("cdc_streams_descriptions");
        test_table("cdc_generation_descriptions");
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 SEASTAR_THREAD_TEST_CASE(test_cdc_log_schema) {
@@ -326,6 +316,7 @@ SEASTAR_THREAD_TEST_CASE(test_cdc_log_schema) {
        // cdc log clustering key
        assert_has_column(cdc::log_meta_column_name("operation"), byte_type);
        assert_has_column(cdc::log_meta_column_name("ttl"), long_type);
+        assert_has_column(cdc::log_meta_column_name("end_of_batch"), boolean_type);

        // pk
        assert_has_column(cdc::log_data_column_name("pk"), int32_type);
@@ -370,7 +361,7 @@ SEASTAR_THREAD_TEST_CASE(test_cdc_log_schema) {

        // Check if we missed something
        BOOST_REQUIRE_EQUAL(required_column_count, log_schema->all_columns_count());
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 static std::vector<std::vector<bytes_opt>> to_bytes(const cql_transport::messages::result_message::rows& rows) {
@@ -512,7 +503,7 @@ SEASTAR_THREAD_TEST_CASE(test_primary_key_logging) {
        // DELETE FROM ks.tbl WHERE pk = 1 AND pk2 = 11
        assert_row(1, 11);
        BOOST_REQUIRE(actual_i == actual_end);
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 SEASTAR_THREAD_TEST_CASE(test_pre_post_image_logging) {
@@ -534,6 +525,7 @@ SEASTAR_THREAD_TEST_CASE(test_pre_post_image_logging) {
            auto val_index = column_index(*rows, cdc::log_data_column_name("val"));
            auto val2_index = column_index(*rows, cdc::log_data_column_name("val2"));
            auto ttl_index = column_index(*rows, cdc::log_meta_column_name("ttl"));
+            auto eor_index = column_index(*rows, cdc::log_meta_column_name("end_of_batch"));

            auto val_type = int32_type;
            auto val = *first[0][val_index];
@@ -567,7 +559,7 @@ SEASTAR_THREAD_TEST_CASE(test_pre_post_image_logging) {
                    BOOST_REQUIRE_EQUAL(pre_image.size(), i + 1);

                    val = *pre_image.back()[val_index];
-                    // note: no val2 in pre-image, because we are not modifying it. 
+                    // note: no val2 in pre-image, because we are not modifying it.
                    BOOST_REQUIRE_EQUAL(int32_type->decompose(1111), *pre_image.back()[ck2_index]);
                    BOOST_REQUIRE_EQUAL(data_value(last), val_type->deserialize(bytes_view(val)));
                    BOOST_REQUIRE_EQUAL(bytes_opt(), pre_image.back()[ttl_index]);
@@ -583,10 +575,12 @@ SEASTAR_THREAD_TEST_CASE(test_pre_post_image_logging) {
                if (post_enabled) {
                    val = *post_image.back()[val_index];
                    val2 = *post_image.back()[val2_index];
+                    auto eor = *post_image.back()[eor_index];

                    BOOST_REQUIRE_EQUAL(int32_type->decompose(1111), *post_image.back()[ck2_index]);
                    BOOST_REQUIRE_EQUAL(data_value(nv), val_type->deserialize(bytes_view(val)));
                    BOOST_REQUIRE_EQUAL(data_value(22222), val_type->deserialize(bytes_view(val2)));
+                    BOOST_REQUIRE_EQUAL(data_value(true), boolean_type->deserialize(bytes_view(eor)));
                }

                const auto& ttl_cell = second[second.size() - 2][ttl_index];
@@ -608,7 +602,7 @@ SEASTAR_THREAD_TEST_CASE(test_pre_post_image_logging) {
                }
            }
        }
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 SEASTAR_THREAD_TEST_CASE(test_pre_post_image_logging_static_row) {
@@ -682,7 +676,7 @@ SEASTAR_THREAD_TEST_CASE(test_pre_post_image_logging_static_row) {
        test(true, false);
        test(false, true);
        test(false, false);
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 SEASTAR_THREAD_TEST_CASE(test_range_deletion) {
@@ -691,7 +685,7 @@ SEASTAR_THREAD_TEST_CASE(test_range_deletion) {
        cquery_nofail(e, "DELETE FROM ks.tbl WHERE pk = 123 AND ck > 1 AND ck < 23");
        cquery_nofail(e, "DELETE FROM ks.tbl WHERE pk = 123 AND ck >= 4 AND ck <= 56");

-        auto msg = e.execute_cql(format("SELECT \"{}\", \"{}\", \"{}\", \"{}\" FROM ks.{}", 
+        auto msg = e.execute_cql(format("SELECT \"{}\", \"{}\", \"{}\", \"{}\" FROM ks.{}",
            cdc::log_meta_column_name("time"),
            cdc::log_data_column_name("pk"),
            cdc::log_data_column_name("ck"),
@@ -726,7 +720,7 @@ SEASTAR_THREAD_TEST_CASE(test_range_deletion) {
        // ck >= 4 AND ck <= 56
        check_row(4, cdc::operation::range_delete_start_inclusive);
        check_row(56, cdc::operation::range_delete_end_inclusive);
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 SEASTAR_THREAD_TEST_CASE(test_add_columns) {
@@ -750,11 +744,11 @@ SEASTAR_THREAD_TEST_CASE(test_add_columns) {
        auto kokos = *inserts.back()[kokos_index];

        BOOST_REQUIRE_EQUAL(data_value("kaka"), kokos_type->deserialize(bytes_view(kokos)));
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

-// #5582 - just quickly test that we can create the cdc enabled table on a different shard 
-// and still get the logs proper. 
+// #5582 - just quickly test that we can create the cdc enabled table on a different shard
+// and still get the logs proper.
 SEASTAR_THREAD_TEST_CASE(test_cdc_across_shards) {
    do_with_cql_env_thread([](cql_test_env& e) {
        if (smp::count < 2) {
@@ -772,7 +766,7 @@ SEASTAR_THREAD_TEST_CASE(test_cdc_across_shards) {
        auto rows = select_log(e, "tbl");

        BOOST_REQUIRE(!to_bytes_filtered(*rows, cdc::operation::insert).empty());
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 SEASTAR_THREAD_TEST_CASE(test_negative_ttl_fail) {
@@ -780,7 +774,7 @@ SEASTAR_THREAD_TEST_CASE(test_negative_ttl_fail) {
        BOOST_REQUIRE_EXCEPTION(e.execute_cql("CREATE TABLE ks.fail (a int PRIMARY KEY, b int) WITH cdc = {'enabled':true,'ttl':'-1'}").get0(),
                exceptions::configuration_exception,
                exception_predicate::message_contains("ttl"));
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 SEASTAR_THREAD_TEST_CASE(test_ttls) {
@@ -830,11 +824,11 @@ SEASTAR_THREAD_TEST_CASE(test_ttls) {
                auto cell_ttl_seconds = value_cast<int32_t>(cell_ttl);
                // 30% tolerance in case of slow execution (a little flaky...)
                BOOST_REQUIRE_CLOSE((float)cell_ttl_seconds, (float)ttl_seconds, 30.f);
-            }            
+            }
        };
        test_ttl(0);
        test_ttl(10);
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 // helper funcs + structs for collection testing
@@ -851,13 +845,13 @@ struct col_test {
    data_value post = data_value::make_null(int32_type); // whatever
 };

-// iterate a set of updates and verify pre and delta values. 
+// iterate a set of updates and verify pre and delta values.
 static void test_collection(cql_test_env& e, data_type val_type, data_type del_type, std::vector<col_test> tests, translate_func f = [](data_value v) { return v; }) {
    auto col_type = val_type;

    for (auto& t : tests) {
        cquery_nofail(e, t.update);
-        
+
        auto rows = select_log(e, "tbl");
        auto pre_image = to_bytes_filtered(*rows, cdc::operation::pre_image);
        auto updates = to_bytes_filtered(*rows, cdc::operation::update);
@@ -918,7 +912,7 @@ SEASTAR_THREAD_TEST_CASE(test_map_logging) {
        auto map_keys_type = set_type_impl::get_instance(utf8_type, false);

        test_collection(e, map_type, map_keys_type, {
-            { 
+            {
                "UPDATE ks.tbl set val = { 'apa':'ko' } where pk=1 and pk2=11 and ck=111",
                data_value::make_null(map_type), // no previous value
                {
@@ -930,7 +924,7 @@ SEASTAR_THREAD_TEST_CASE(test_map_logging) {
                },
                ::make_map_value(map_type, { { "apa", "ko" } })
            },
-            { 
+            {
                "UPDATE ks.tbl set val = val + { 'ninja':'mission' } where pk=1 and pk2=11 and ck=111",
                ::make_map_value(map_type, { { "apa", "ko" } }),
                {
@@ -941,9 +935,9 @@ SEASTAR_THREAD_TEST_CASE(test_map_logging) {
                },
                ::make_map_value(map_type, { { "apa", "ko" }, { "ninja", "mission" } })
            },
-            { 
+            {
                "UPDATE ks.tbl set val['ninja'] = 'shuriken' where pk=1 and pk2=11 and ck=111",
-                ::make_map_value(map_type, { { "apa", "ko" }, { "ninja", "mission" } }), 
+                ::make_map_value(map_type, { { "apa", "ko" }, { "ninja", "mission" } }),
                {
                    {
                        ::make_map_value(map_type, { { "ninja", "shuriken" } }),
@@ -952,9 +946,9 @@ SEASTAR_THREAD_TEST_CASE(test_map_logging) {
                },
                ::make_map_value(map_type, { { "apa", "ko" }, { "ninja", "shuriken" } })
            },
-            { 
+            {
                "UPDATE ks.tbl set val['apa'] = null where pk=1 and pk2=11 and ck=111",
-                ::make_map_value(map_type, { { "apa", "ko" }, { "ninja", "shuriken" } }), 
+                ::make_map_value(map_type, { { "apa", "ko" }, { "ninja", "shuriken" } }),
                {
                    {
                        data_value::make_null(map_type),
@@ -963,9 +957,9 @@ SEASTAR_THREAD_TEST_CASE(test_map_logging) {
                },
                ::make_map_value(map_type, { { "ninja", "shuriken" } })
            },
-            { 
+            {
                "UPDATE ks.tbl set val['ninja'] = null, val['ola'] = 'kokos' where pk=1 and pk2=11 and ck=111",
-                ::make_map_value(map_type, { { "ninja", "shuriken" } }), 
+                ::make_map_value(map_type, { { "ninja", "shuriken" } }),
                {
                    {
                        ::make_map_value(map_type, { { "ola", "kokos" } }),
@@ -974,9 +968,9 @@ SEASTAR_THREAD_TEST_CASE(test_map_logging) {
                },
                ::make_map_value(map_type, { { "ola", "kokos" } })
            },
-            { 
+            {
                "UPDATE ks.tbl set val = { 'bolla':'trolla', 'kork':'skruv' } where pk=1 and pk2=11 and ck=111",
-                ::make_map_value(map_type, { { "ola", "kokos" } }), 
+                ::make_map_value(map_type, { { "ola", "kokos" } }),
                {
                    {
                        ::make_map_value(map_type, { { "bolla", "trolla" }, { "kork", "skruv" } }),
@@ -988,7 +982,7 @@ SEASTAR_THREAD_TEST_CASE(test_map_logging) {
            }

        });
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 SEASTAR_THREAD_TEST_CASE(test_set_logging) {
@@ -999,7 +993,7 @@ SEASTAR_THREAD_TEST_CASE(test_set_logging) {
        });

        auto set_type = set_type_impl::get_instance(utf8_type, false);
-        
+
        test_collection(e, set_type, set_type, {
            {
                "UPDATE ks.tbl set val = { 'apa', 'ko' } where pk=1 and pk2=11 and ck=111",
@@ -1026,7 +1020,7 @@ SEASTAR_THREAD_TEST_CASE(test_set_logging) {
            },
            {
                "UPDATE ks.tbl set val = val - { 'apa' } where pk=1 and pk2=11 and ck=111",
-                ::make_set_value(set_type, { "apa", "ko", "mission", "ninja" }), 
+                ::make_set_value(set_type, { "apa", "ko", "mission", "ninja" }),
                {
                    {
                        data_value::make_null(set_type),
@@ -1037,7 +1031,7 @@ SEASTAR_THREAD_TEST_CASE(test_set_logging) {
            },
            {
                "UPDATE ks.tbl set val = val - { 'mission' }, val = val + { 'nils' } where pk=1 and pk2=11 and ck=111",
-                ::make_set_value(set_type, { "ko", "mission", "ninja" }), 
+                ::make_set_value(set_type, { "ko", "mission", "ninja" }),
                {
                    {
                        ::make_set_value(set_type, { "nils" }),
@@ -1059,7 +1053,7 @@ SEASTAR_THREAD_TEST_CASE(test_set_logging) {
                ::make_set_value(set_type, { "bolla", "trolla" })
            }
        });
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 SEASTAR_THREAD_TEST_CASE(test_list_logging) {
@@ -1072,11 +1066,11 @@ SEASTAR_THREAD_TEST_CASE(test_list_logging) {
        auto list_type = list_type_impl::get_instance(utf8_type, false);
        auto uuids_type = set_type_impl::get_instance(timeuuid_type, false);
        auto val_type = map_type_impl::get_instance(list_type->name_comparator(), list_type->value_comparator(), false);
-        
+
        test_collection(e, val_type, uuids_type, {
            {
                "UPDATE ks.tbl set val = [ 'apa', 'ko' ] where pk=1 and pk2=11 and ck=111",
-                data_value::make_null(list_type), 
+                data_value::make_null(list_type),
                {
                    {
                        ::make_list_value(list_type, { "apa", "ko" }),
@@ -1121,7 +1115,7 @@ SEASTAR_THREAD_TEST_CASE(test_list_logging) {
            },
            {
                "UPDATE ks.tbl set val[0] = 'babar' where pk=1 and pk2=11 and ck=111",
-                ::make_list_value(list_type, { "apa", "ko", "ninja", "mission" }), 
+                ::make_list_value(list_type, { "apa", "ko", "ninja", "mission" }),
                {
                    {
                        ::make_list_value(list_type, { "babar" }),
@@ -1151,7 +1145,7 @@ SEASTAR_THREAD_TEST_CASE(test_list_logging) {
            }
            return ::make_list_value(list_type, std::move(cpy));
        });
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 SEASTAR_THREAD_TEST_CASE(test_udt_logging) {
@@ -1163,7 +1157,7 @@ SEASTAR_THREAD_TEST_CASE(test_udt_logging) {
            e.execute_cql("DROP TYPE ks.mytype").get();
        });

-        auto udt_type = user_type_impl::get_instance("ks", to_bytes("mytype"), 
+        auto udt_type = user_type_impl::get_instance("ks", to_bytes("mytype"),
            { to_bytes("field0"), to_bytes("field1") },
            { int32_type, utf8_type },
            false
@@ -1171,18 +1165,18 @@ SEASTAR_THREAD_TEST_CASE(test_udt_logging) {
        auto index_set_type = set_type_impl::get_instance(short_type, false);
        auto f0_type = int32_type;
        auto f1_type = utf8_type;
-        
+
        auto make_tuple = [&](std::optional<std::optional<int32_t>> i, std::optional<std::optional<sstring>> s) {
            return ::make_user_value(udt_type, {
                i ? ::data_value(*i) : data_value::make_null(f0_type),
                s ? ::data_value(*s) : data_value::make_null(f1_type),
            });
        };
-        
+
        test_collection(e, udt_type, index_set_type, {
            {
                "UPDATE ks.tbl set val = { field0: 12, field1: 'ko' } where pk=1 and pk2=11 and ck=111",
-                data_value::make_null(udt_type), 
+                data_value::make_null(udt_type),
                {
                    {
                        make_tuple(12, "ko"),
@@ -1238,7 +1232,7 @@ SEASTAR_THREAD_TEST_CASE(test_udt_logging) {
                make_tuple(1, "bolla")
            },
        });
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 SEASTAR_THREAD_TEST_CASE(test_frozen_logging) {
@@ -1289,7 +1283,7 @@ SEASTAR_THREAD_TEST_CASE(test_frozen_logging) {
        test_frozen("frozen<set<text>>", "{'a', 'bb', 'ccc'}");
        test_frozen("frozen<map<text, text>>", "{'a': 'bb', 'ccc': 'dddd'}");
        test_frozen("frozen<udt>", "{a: 'bb', ccc: 'dddd'}");
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 SEASTAR_THREAD_TEST_CASE(test_update_insert_delete_distinction) {
@@ -1321,7 +1315,32 @@ SEASTAR_THREAD_TEST_CASE(test_update_insert_delete_distinction) {

        BOOST_REQUIRE_EQUAL(results[3].size(), 1);
        BOOST_REQUIRE_EQUAL(*results[3].front(), data_value(static_cast<int8_t>(cdc::operation::row_delete)).serialize_nonnull()); // log entry from (3)
-    }, mk_cdc_test_config()).get();
+    }).get();
+}
+
+static std::vector<std::vector<data_value>> get_result(cql_test_env& e,
+        const std::vector<data_type>& col_types, const sstring& query) {
+    auto deser = [] (const data_type& t, const bytes_opt& b) -> data_value {
+        if (!b) {
+            return data_value::make_null(t);
+        }
+        return t->deserialize(*b);
+    };
+
+    auto msg = e.execute_cql(query).get0();
+    auto rows = dynamic_pointer_cast<cql_transport::messages::result_message::rows>(msg);
+    BOOST_REQUIRE(rows);
+
+    std::vector<std::vector<data_value>> res;
+    for (auto&& r: to_bytes(*rows)) {
+        BOOST_REQUIRE_LE(col_types.size(), r.size());
+        std::vector<data_value> res_r;
+        for (size_t i = 0; i < col_types.size(); ++i) {
+            res_r.push_back(deser(col_types[i], r[i]));
+        }
+        res.push_back(std::move(res_r));
+    }
+    return res;
 }

 SEASTAR_THREAD_TEST_CASE(test_change_splitting) {
@@ -1346,28 +1365,8 @@ SEASTAR_THREAD_TEST_CASE(test_change_splitting) {
            return make_set_value(keys_type, std::move(s));
        };

-        auto deser = [] (const data_type& t, const bytes_opt& b) -> data_value {
-            if (!b) {
-                return data_value::make_null(t);
-            }
-            return t->deserialize(*b);
-        };
-
        auto get_result = [&] (const std::vector<data_type>& col_types, const sstring& s) -> std::vector<std::vector<data_value>> {
-            auto msg = e.execute_cql(s).get0();
-            auto rows = dynamic_pointer_cast<cql_transport::messages::result_message::rows>(msg);
-            BOOST_REQUIRE(rows);
-
-            std::vector<std::vector<data_value>> res;
-            for (auto&& r: to_bytes(*rows)) {
-                BOOST_REQUIRE_LE(col_types.size(), r.size());
-                std::vector<data_value> res_r;
-                for (size_t i = 0; i < col_types.size(); ++i) {
-                    res_r.push_back(deser(col_types[i], r[i]));
-                }
-                res.push_back(std::move(res_r));
-            }
-            return res;
+            return ::get_result(e, col_types, s);
        };

        cquery_nofail(e, "create table ks.t (pk int, ck int, s int static, v1 int, v2 int, m map<int, int>, primary key (pk, ck)) with cdc = {'enabled':true}");
@@ -1566,7 +1565,7 @@ SEASTAR_THREAD_TEST_CASE(test_change_splitting) {
            };
            BOOST_REQUIRE_EQUAL(expected, result);
        }
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 SEASTAR_THREAD_TEST_CASE(test_batch_with_row_delete) {
@@ -1630,7 +1629,7 @@ SEASTAR_THREAD_TEST_CASE(test_batch_with_row_delete) {
            BOOST_REQUIRE_EQUAL(deser(s_type, r[3]), er[3]);
            BOOST_REQUIRE_EQUAL(deser(oper_type, r[4]), er[4]);
        }
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 struct image_set {
@@ -1939,7 +1938,7 @@ void test_batch_images(bool preimage, bool postimage) {
                }
            }
        }, preimage, postimage);
-    }, mk_cdc_test_config()).get();
+    }).get();
 }

 SEASTAR_THREAD_TEST_CASE(test_batch_pre_image) {
@@ -1953,3 +1952,24 @@ SEASTAR_THREAD_TEST_CASE(test_batch_post_image) {
 SEASTAR_THREAD_TEST_CASE(test_batch_pre_post_image) {
    test_batch_images(true, true);
 }
+
+// Regression test for #7716
+SEASTAR_THREAD_TEST_CASE(test_postimage_with_no_regular_columns) {
+    do_with_cql_env_thread([] (cql_test_env& e) {
+        using oper_ut = std::underlying_type_t<cdc::operation>;
+
+        cquery_nofail(e, "create table ks.t (pk int, ck int, primary key (pk, ck)) with cdc = {'enabled': true, 'postimage': true}");
+        cquery_nofail(e, "insert into ks.t (pk, ck) values (1, 2)");
+
+        auto result = get_result(e,
+            {data_type_for<oper_ut>(), int32_type, int32_type},
+            "select \"cdc$operation\", pk, ck from ks.t_scylla_cdc_log");
+
+        std::vector<std::vector<data_value>> expected = {
+            { oper_ut(cdc::operation::insert), int32_t(1), int32_t(2) },
+            { oper_ut(cdc::operation::post_image), int32_t(1), int32_t(2) },
+        };
+
+        BOOST_REQUIRE_EQUAL(expected, result);
+    }).get();
+}
--- a/test/boost/config_test.cc
+++ b/test/boost/config_test.cc
@@ -931,10 +931,11 @@ SEASTAR_TEST_CASE(test_parse_experimental_features_cdc) {
    auto cfg_ptr = std::make_unique<config>();
    config& cfg = *cfg_ptr;
    cfg.read_from_yaml("experimental_features:\n    - cdc\n", throw_on_error);
-    BOOST_CHECK_EQUAL(cfg.experimental_features(), features{ef::CDC});
-    BOOST_CHECK(cfg.check_experimental(ef::CDC));
+    BOOST_CHECK_EQUAL(cfg.experimental_features(), features{ef::UNUSED_CDC});
+    BOOST_CHECK(cfg.check_experimental(ef::UNUSED_CDC));
    BOOST_CHECK(!cfg.check_experimental(ef::UNUSED));
    BOOST_CHECK(!cfg.check_experimental(ef::UDF));
+    BOOST_CHECK(!cfg.check_experimental(ef::ALTERNATOR_STREAMS));
    return make_ready_future();
 }

@@ -943,9 +944,10 @@ SEASTAR_TEST_CASE(test_parse_experimental_features_unused) {
    config& cfg = *cfg_ptr;
    cfg.read_from_yaml("experimental_features:\n    - lwt\n", throw_on_error);
    BOOST_CHECK_EQUAL(cfg.experimental_features(), features{ef::UNUSED});
-    BOOST_CHECK(!cfg.check_experimental(ef::CDC));
+    BOOST_CHECK(!cfg.check_experimental(ef::UNUSED_CDC));
    BOOST_CHECK(cfg.check_experimental(ef::UNUSED));
    BOOST_CHECK(!cfg.check_experimental(ef::UDF));
+    BOOST_CHECK(!cfg.check_experimental(ef::ALTERNATOR_STREAMS));
    return make_ready_future();
 }

@@ -954,9 +956,22 @@ SEASTAR_TEST_CASE(test_parse_experimental_features_udf) {
    config& cfg = *cfg_ptr;
    cfg.read_from_yaml("experimental_features:\n    - udf\n", throw_on_error);
    BOOST_CHECK_EQUAL(cfg.experimental_features(), features{ef::UDF});
-    BOOST_CHECK(!cfg.check_experimental(ef::CDC));
+    BOOST_CHECK(!cfg.check_experimental(ef::UNUSED_CDC));
    BOOST_CHECK(!cfg.check_experimental(ef::UNUSED));
    BOOST_CHECK(cfg.check_experimental(ef::UDF));
+    BOOST_CHECK(!cfg.check_experimental(ef::ALTERNATOR_STREAMS));
+    return make_ready_future();
+}
+
+SEASTAR_TEST_CASE(test_parse_experimental_features_alternator_streams) {
+    auto cfg_ptr = std::make_unique<config>();
+    config& cfg = *cfg_ptr;
+    cfg.read_from_yaml("experimental_features:\n    - alternator-streams\n", throw_on_error);
+    BOOST_CHECK_EQUAL(cfg.experimental_features(), features{ef::ALTERNATOR_STREAMS});
+    BOOST_CHECK(!cfg.check_experimental(ef::UNUSED_CDC));
+    BOOST_CHECK(!cfg.check_experimental(ef::UNUSED));
+    BOOST_CHECK(!cfg.check_experimental(ef::UDF));
+    BOOST_CHECK(cfg.check_experimental(ef::ALTERNATOR_STREAMS));
    return make_ready_future();
 }

@@ -964,10 +979,11 @@ SEASTAR_TEST_CASE(test_parse_experimental_features_multiple) {
    auto cfg_ptr = std::make_unique<config>();
    config& cfg = *cfg_ptr;
    cfg.read_from_yaml("experimental_features:\n    - cdc\n    - lwt\n    - cdc\n", throw_on_error);
-    BOOST_CHECK_EQUAL(cfg.experimental_features(), (features{ef::CDC, ef::UNUSED, ef::CDC}));
-    BOOST_CHECK(cfg.check_experimental(ef::CDC));
+    BOOST_CHECK_EQUAL(cfg.experimental_features(), (features{ef::UNUSED_CDC, ef::UNUSED, ef::UNUSED_CDC}));
+    BOOST_CHECK(cfg.check_experimental(ef::UNUSED_CDC));
    BOOST_CHECK(cfg.check_experimental(ef::UNUSED));
    BOOST_CHECK(!cfg.check_experimental(ef::UDF));
+    BOOST_CHECK(!cfg.check_experimental(ef::ALTERNATOR_STREAMS));
    return make_ready_future();
 }

@@ -979,9 +995,10 @@ SEASTAR_TEST_CASE(test_parse_experimental_features_invalid) {
                       [&cfg] (const sstring& opt, const sstring& msg, std::optional<value_status> status) {
                           BOOST_REQUIRE_EQUAL(opt, "experimental_features");
                           BOOST_REQUIRE_NE(msg.find("line 2, column 7"), msg.npos);
-                           BOOST_CHECK(!cfg.check_experimental(ef::CDC));
+                           BOOST_CHECK(!cfg.check_experimental(ef::UNUSED_CDC));
                           BOOST_CHECK(!cfg.check_experimental(ef::UNUSED));
                           BOOST_CHECK(!cfg.check_experimental(ef::UDF));
+                           BOOST_CHECK(!cfg.check_experimental(ef::ALTERNATOR_STREAMS));
                       });
    return make_ready_future();
 }
@@ -990,9 +1007,10 @@ SEASTAR_TEST_CASE(test_parse_experimental_true) {
    auto cfg_ptr = std::make_unique<config>();
    config& cfg = *cfg_ptr;
    cfg.read_from_yaml("experimental: true", throw_on_error);
-    BOOST_CHECK(cfg.check_experimental(ef::CDC));
+    BOOST_CHECK(!cfg.check_experimental(ef::UNUSED_CDC));
    BOOST_CHECK(!cfg.check_experimental(ef::UNUSED));
    BOOST_CHECK(cfg.check_experimental(ef::UDF));
+    BOOST_CHECK(cfg.check_experimental(ef::ALTERNATOR_STREAMS));
    return make_ready_future();
 }

@@ -1000,8 +1018,9 @@ SEASTAR_TEST_CASE(test_parse_experimental_false) {
    auto cfg_ptr = std::make_unique<config>();
    config& cfg = *cfg_ptr;
    cfg.read_from_yaml("experimental: false", throw_on_error);
-    BOOST_CHECK(!cfg.check_experimental(ef::CDC));
+    BOOST_CHECK(!cfg.check_experimental(ef::UNUSED_CDC));
    BOOST_CHECK(!cfg.check_experimental(ef::UNUSED));
    BOOST_CHECK(!cfg.check_experimental(ef::UDF));
+    BOOST_CHECK(!cfg.check_experimental(ef::ALTERNATOR_STREAMS));
    return make_ready_future();
 }
--- a/test/boost/cql_query_large_test.cc
+++ b/test/boost/cql_query_large_test.cc
@@ -128,12 +128,14 @@ SEASTAR_THREAD_TEST_CASE(test_large_data) {
            });
        }).get();

+        // Since deletion of large data entries has been deleted,
+        // expect the record to be present.
        assert_that(e.execute_cql("select partition_key from system.large_rows where table_name = 'tbl' allow filtering;").get0())
            .is_rows()
-            .is_empty();
+            .with_size(1);
        assert_that(e.execute_cql("select partition_key from system.large_cells where table_name = 'tbl' allow filtering;").get0())
            .is_rows()
-            .is_empty();
+            .with_size(1);

        return make_ready_future<>();
    }, cfg).get();
--- a/test/boost/extensions_test.cc
+++ b/test/boost/extensions_test.cc
@@ -118,7 +118,6 @@ SEASTAR_TEST_CASE(cdc_schema_extension) {
    // Extensions have to be registered here - config needs to have them before construction of test env.
    ext->add_schema_extension<cdc::cdc_extension>(cdc::cdc_extension::NAME);
    auto cfg = ::make_shared<db::config>(ext);
-    cfg->experimental_features({db::experimental_features_t::feature::CDC});

    return do_with_cql_env([] (cql_test_env& e) {
        auto assert_ext_correctness = [] (cql_test_env& e, cdc::cdc_extension expected_ext) {
--- a/test/boost/mutation_reader_test.cc
+++ b/test/boost/mutation_reader_test.cc
@@ -2715,7 +2715,7 @@ SEASTAR_THREAD_TEST_CASE(test_queue_reader) {
        }
    }

-    // abort()
+    // abort() -- check that consumer is aborted
    {
        auto [reader, handle] = make_queue_reader(gen.schema(), tests::make_permit());
        auto fill_buffer_fut = reader.fill_buffer(db::no_timeout);
@@ -2730,6 +2730,28 @@ SEASTAR_THREAD_TEST_CASE(test_queue_reader) {

        BOOST_REQUIRE_THROW(fill_buffer_fut.get(), std::runtime_error);
        BOOST_REQUIRE_THROW(handle.push(mutation_fragment(*gen.schema(), tests::make_permit(), partition_end{})).get(), std::runtime_error);
+        BOOST_REQUIRE(!reader.is_end_of_stream());
+    }
+
+    // abort() -- check that producer is aborted
+    {
+        auto [reader, handle] = make_queue_reader(gen.schema(), tests::make_permit());
+        reader.set_max_buffer_size(1);
+
+        auto expected_reader = flat_mutation_reader_from_mutations(tests::make_permit(), expected_muts);
+
+        auto push_fut = make_ready_future<>();
+        while (push_fut.available()) {
+            push_fut = handle.push(std::move(*expected_reader(db::no_timeout).get0()));
+        }
+
+        BOOST_REQUIRE(!push_fut.available());
+
+        handle.abort(std::make_exception_ptr<std::runtime_error>(std::runtime_error("error")));
+
+        BOOST_REQUIRE_THROW(reader.fill_buffer(db::no_timeout).get(), std::runtime_error);
+        BOOST_REQUIRE_THROW(push_fut.get(), std::runtime_error);
+        BOOST_REQUIRE(!reader.is_end_of_stream());
    }

    // Detached handle
--- a/test/boost/mvcc_test.cc
+++ b/test/boost/mvcc_test.cc
@@ -49,7 +49,7 @@ static thread_local mutation_application_stats app_stats_for_tests;
 // Verifies that tombstones in "list" are monotonic, overlap with the requested range,
 // and have information equivalent with "expected" in that range.
 static
-void check_tombstone_slice(const schema& s, std::vector<range_tombstone> list,
+void check_tombstone_slice(const schema& s, const utils::chunked_vector<range_tombstone>& list,
    const query::clustering_range& range,
    std::initializer_list<range_tombstone> expected)
 {
--- a/test/boost/schema_change_test.cc
+++ b/test/boost/schema_change_test.cc
@@ -607,7 +607,7 @@ future<> test_schema_digest_does_not_change_with_disabled_features(sstring data_
    auto db_cfg_ptr = ::make_shared<db::config>(std::move(extensions));
    auto& db_cfg = *db_cfg_ptr;
    db_cfg.enable_user_defined_functions({true}, db::config::config_source::CommandLine);
-    db_cfg.experimental_features({experimental_features_t::UDF, experimental_features_t::CDC}, db::config::config_source::CommandLine);
+    db_cfg.experimental_features({experimental_features_t::UDF}, db::config::config_source::CommandLine);
    if (regenerate) {
        db_cfg.data_file_directories({data_dir}, db::config::config_source::CommandLine);
    } else {
--- a/test/boost/secondary_index_test.cc
+++ b/test/boost/secondary_index_test.cc
@@ -29,6 +29,7 @@
 #include "types/set.hh"
 #include "test/lib/exception_utils.hh"
 #include "cql3/statements/select_statement.hh"
+#include "test/lib/select_statement_utils.hh"


 SEASTAR_TEST_CASE(test_secondary_index_regular_column_query) {
@@ -1208,6 +1209,293 @@ SEASTAR_TEST_CASE(test_indexing_paging_and_aggregation) {
    });
 }

+// Verifies that both "SELECT * [rest_of_query]" and "SELECT count(*) [rest_of_query]" 
+// return expected count of rows.
+void assert_select_count_and_select_rows_has_size(
+        cql_test_env& e, 
+        const sstring& rest_of_query, int64_t expected_count, 
+        const std::experimental::source_location& loc = std::experimental::source_location::current()) {
+    eventually([&] { 
+        require_rows(e, "SELECT count(*) " + rest_of_query, {
+            { long_type->decompose(expected_count) }
+        }, loc);
+        auto res = cquery_nofail(e, "SELECT * " + rest_of_query, nullptr, loc);
+        try {
+            assert_that(res).is_rows().with_size(expected_count);
+        } catch (const std::exception& e) {
+            BOOST_FAIL(format("is_rows/with_size failed: {}\n{}:{}: originally from here",
+                              e.what(), loc.file_name(), loc.line()));
+        }
+    });
+}
+
+static constexpr int page_scenarios_page_size = 20;
+static constexpr int page_scenarios_row_count = 2 * page_scenarios_page_size + 5;
+static constexpr int page_scenarios_initial_count = 3;
+static constexpr int page_scenarios_window_size = 4;
+static constexpr int page_scenarios_just_before_first_page = page_scenarios_page_size - page_scenarios_window_size;
+static constexpr int page_scenarios_just_after_first_page = page_scenarios_page_size + page_scenarios_window_size;    
+static constexpr int page_scenarios_just_before_second_page = 2 * page_scenarios_page_size - page_scenarios_window_size;
+static constexpr int page_scenarios_just_after_second_page = 2 * page_scenarios_page_size + page_scenarios_window_size;    
+
+static_assert(page_scenarios_initial_count < page_scenarios_row_count);
+static_assert(page_scenarios_window_size < page_scenarios_page_size);
+static_assert(page_scenarios_just_after_second_page < page_scenarios_row_count);
+
+// Executes `insert` lambda page_scenarios_row_count times. 
+// Runs `validate` lambda in a few scenarios:
+//
+// 1. After a small number of `insert`s
+// 2. In a window from just before and just after `insert`s were executed
+//    DEFAULT_COUNT_PAGE_SIZE times
+// 3. In a window from just before and just after `insert`s were executed
+//    2 * DEFAULT_COUNT_PAGE_SIZE times
+// 4. After all `insert`s
+void test_with_different_page_scenarios(
+    noncopyable_function<void (int)> insert, noncopyable_function<void (int)> validate) {
+
+    int current_row = 0;
+    for (; current_row < page_scenarios_initial_count; current_row++) {
+        insert(current_row);
+        validate(current_row + 1);
+    }
+
+    for (; current_row < page_scenarios_just_before_first_page; current_row++) {
+        insert(current_row);
+    }
+
+    for (; current_row < page_scenarios_just_after_first_page; current_row++) {
+        insert(current_row);
+        validate(current_row + 1);
+    }
+
+    for (; current_row < page_scenarios_just_before_second_page; current_row++) {
+        insert(current_row);
+    }
+
+    for (; current_row < page_scenarios_just_after_second_page; current_row++) {
+        insert(current_row);
+        validate(current_row + 1);
+    }   
+
+    for (; current_row < page_scenarios_row_count; current_row++) {
+        insert(current_row);
+    }
+
+    // No +1, because we just left for loop and current_row was incremented.
+    validate(current_row);
+}
+
+SEASTAR_TEST_CASE(test_secondary_index_on_ck_first_column_and_aggregation) {
+    // Tests aggregation on table with secondary index on first column
+    // of clustering key. This is the "partition_slices" case of 
+    // indexed_table_select_statement::do_execute.
+
+    return do_with_cql_env_thread([] (cql_test_env& e) {
+        cql3::statements::set_internal_paging_size(page_scenarios_page_size).get();
+
+        // Explicitly reproduce the first failing example in issue #7355.
+        cquery_nofail(e, "CREATE TABLE t1 (pk1 int, pk2 int, ck int, primary key((pk1, pk2), ck))");
+        cquery_nofail(e, "CREATE INDEX ON t1(ck)");
+
+        cquery_nofail(e, "INSERT INTO t1(pk1, pk2, ck) VALUES (1, 2, 3)");
+        assert_select_count_and_select_rows_has_size(e, "FROM t1 WHERE ck = 3", 1);
+
+        cquery_nofail(e, "INSERT INTO t1(pk1, pk2, ck) VALUES (1, 2, 4)");
+        cquery_nofail(e, "INSERT INTO t1(pk1, pk2, ck) VALUES (1, 2, 5)");
+        assert_select_count_and_select_rows_has_size(e, "FROM t1 WHERE ck = 3", 1);
+
+        cquery_nofail(e, "INSERT INTO t1(pk1, pk2, ck) VALUES (2, 2, 3)");
+        assert_select_count_and_select_rows_has_size(e, "FROM t1 WHERE ck = 3", 2);
+
+        cquery_nofail(e, "INSERT INTO t1(pk1, pk2, ck) VALUES (2, 1, 3)");
+        assert_select_count_and_select_rows_has_size(e, "FROM t1 WHERE ck = 3", 3);
+
+        // Test a case when there are a lot of small partitions (more than a page size).
+        cquery_nofail(e, "CREATE TABLE t2 (pk int, ck int, primary key(pk, ck))");
+        cquery_nofail(e, "CREATE INDEX ON t2(ck)");
+
+        // "Decoy" rows - they should be not counted (previously they were incorrectly counted in,
+        // see issue #7355).
+        cquery_nofail(e, "INSERT INTO t2(pk, ck) VALUES (0, -2)");
+        cquery_nofail(e, "INSERT INTO t2(pk, ck) VALUES (0, 3)");
+        cquery_nofail(e, format("INSERT INTO t2(pk, ck) VALUES ({}, 3)", page_scenarios_just_after_first_page).c_str());
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t2(pk, ck) VALUES ({}, 1)", current_row).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t2 WHERE ck = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk FROM t2 WHERE ck = 1 GROUP BY pk");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            res = cquery_nofail(e, "SELECT pk, ck FROM t2 WHERE ck = 1 GROUP BY pk, ck");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT sum(pk) FROM t2 WHERE ck = 1", {
+               { int32_type->decompose(int32_t(rows_inserted * (rows_inserted - 1) / 2)) }
+            });
+          });
+        });
+
+        // Test a case when there is a single large partition (larger than a page size).
+        cquery_nofail(e, "CREATE TABLE t3 (pk int, ck1 int, ck2 int, primary key(pk, ck1, ck2))");
+        cquery_nofail(e, "CREATE INDEX ON t3(ck1)");
+
+        // "Decoy" rows
+        cquery_nofail(e, "INSERT INTO t3(pk, ck1, ck2) VALUES (1, 0, 0)");
+        cquery_nofail(e, "INSERT INTO t3(pk, ck1, ck2) VALUES (1, 2, 0)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t3(pk, ck1, ck2) VALUES (1, 1, {})", current_row).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t3 WHERE ck1 = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk FROM t3 WHERE ck1 = 1 GROUP BY pk");
+            assert_that(res).is_rows().with_size(1);
+            res = cquery_nofail(e, "SELECT pk, ck1 FROM t3 WHERE ck1 = 1 GROUP BY pk, ck1");
+            assert_that(res).is_rows().with_size(1);
+            res = cquery_nofail(e, "SELECT pk, ck1, ck2 FROM t3 WHERE ck1 = 1 GROUP BY pk, ck1, ck2");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT avg(ck2) FROM t3 WHERE ck1 = 1", {
+                { int32_type->decompose(int32_t((rows_inserted * (rows_inserted - 1) / 2) / rows_inserted)) }
+            }); 
+          });
+        });
+
+        cql3::statements::reset_internal_paging_size().get();
+    });
+}
+
+SEASTAR_TEST_CASE(test_secondary_index_on_pk_column_and_aggregation) {
+    // Tests aggregation on table with secondary index on a column
+    // of partition key. This is the "whole_partitions" case of 
+    // indexed_table_select_statement::do_execute.
+
+    return do_with_cql_env_thread([] (cql_test_env& e) {
+        cql3::statements::set_internal_paging_size(page_scenarios_page_size).get();
+
+        // Explicitly reproduce the second failing example in issue #7355.
+        // This a case with a single large partition.
+        cquery_nofail(e, "CREATE TABLE t1 (pk1 int, pk2 int, ck int, primary key((pk1, pk2), ck))");
+        cquery_nofail(e, "CREATE INDEX ON t1(pk2)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t1(pk1, pk2, ck) VALUES (1, 1, {})", current_row).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t1 WHERE pk2 = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk1, pk2 FROM t1 WHERE pk2 = 1 GROUP BY pk1, pk2");
+            assert_that(res).is_rows().with_size(1);
+            res = cquery_nofail(e, "SELECT pk1, pk2, ck FROM t1 WHERE pk2 = 1 GROUP BY pk1, pk2, ck");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT min(pk1) FROM t1 WHERE pk2 = 1", {
+                { int32_type->decompose(1) }
+            });
+          });
+        });
+
+        // Test a case when there are a lot of small partitions (more than a page size)
+        // and there is a clustering key in base table.
+        cquery_nofail(e, "CREATE TABLE t2 (pk1 int, pk2 int, ck int, primary key((pk1, pk2), ck))");
+        cquery_nofail(e, "CREATE INDEX ON t2(pk2)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t2(pk1, pk2, ck) VALUES ({}, 1, {})", 
+                current_row, current_row % 20).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t2 WHERE pk2 = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk1, pk2 FROM t2 WHERE pk2 = 1 GROUP BY pk1, pk2");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT max(pk1) FROM t2 WHERE pk2 = 1", {
+                { int32_type->decompose(int32_t(rows_inserted - 1)) }
+            });
+          });
+        });
+
+        // Test a case when there are a lot of small partitions (more than a page size)
+        // and there is NO clustering key in base table.
+        cquery_nofail(e, "CREATE TABLE t3 (pk1 int, pk2 int, primary key((pk1, pk2)))");
+        cquery_nofail(e, "CREATE INDEX ON t3(pk2)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t3(pk1, pk2) VALUES ({}, 1)", current_row).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t3 WHERE pk2 = 1", rows_inserted);
+        });
+
+        cql3::statements::reset_internal_paging_size().get();
+    });
+}
+
+SEASTAR_TEST_CASE(test_secondary_index_on_non_pk_ck_column_and_aggregation) {
+    // Tests aggregation on table with secondary index on a column
+    // that is not a part of partition key and clustering key. 
+    // This is the non-"whole_partitions" and non-"partition_slices"
+    // case of indexed_table_select_statement::do_execute.
+
+    return do_with_cql_env_thread([] (cql_test_env& e) {
+        cql3::statements::set_internal_paging_size(page_scenarios_page_size).get();
+
+        // Test a case when there are a lot of small partitions (more than a page size)
+        // and there is a clustering key in base table.
+        cquery_nofail(e, "CREATE TABLE t (pk int, ck int, v int, primary key(pk, ck))");
+        cquery_nofail(e, "CREATE INDEX ON t(v)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t(pk, ck, v) VALUES ({}, {}, 1)", 
+                current_row, current_row % 20).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t WHERE v = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk FROM t WHERE v = 1 GROUP BY pk");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT sum(v) FROM t WHERE v = 1", {
+                { int32_type->decompose(int32_t(rows_inserted)) }
+            });
+          });
+        });
+
+        // Test a case when there are a lot of small partitions (more than a page size)
+        // and there is NO clustering key in base table.
+        cquery_nofail(e, "CREATE TABLE t2 (pk int, v int, primary key(pk))");
+        cquery_nofail(e, "CREATE INDEX ON t2(v)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t2(pk, v) VALUES ({}, 1)", current_row).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t2 WHERE v = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk FROM t2 WHERE v = 1 GROUP BY pk");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT sum(pk) FROM t2 WHERE v = 1", {
+                { int32_type->decompose(int32_t(rows_inserted * (rows_inserted - 1) / 2)) }
+            });
+          });
+        });
+
+        // Test a case when there is a single large partition (larger than a page size).
+        cquery_nofail(e, "CREATE TABLE t3 (pk int, ck int, v int, primary key(pk, ck))");
+        cquery_nofail(e, "CREATE INDEX ON t3(v)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t3(pk, ck, v) VALUES (1, {}, 1)", current_row).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t3 WHERE v = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk FROM t3 WHERE v = 1 GROUP BY pk");
+            assert_that(res).is_rows().with_size(1);
+            res = cquery_nofail(e, "SELECT pk, ck FROM t3 WHERE v = 1 GROUP BY pk, ck");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT max(ck) FROM t3 WHERE v = 1", {
+                { int32_type->decompose(int32_t(rows_inserted - 1)) }
+            });
+          });
+        });
+
+        cql3::statements::reset_internal_paging_size().get();
+    });
+}
+
 SEASTAR_TEST_CASE(test_computed_columns) {
    return do_with_cql_env_thread([] (auto& e) {
        e.execute_cql("CREATE TABLE t (p1 int, p2 int, c1 int, c2 int, v int, PRIMARY KEY ((p1,p2),c1,c2))").get();
--- a/test/boost/types_test.cc
+++ b/test/boost/types_test.cc
@@ -100,6 +100,13 @@ BOOST_AUTO_TEST_CASE(test_byte_type_string_conversions) {
    BOOST_REQUIRE_EQUAL(byte_type->to_string(bytes()), "");
 }

+BOOST_AUTO_TEST_CASE(test_ascii_type_string_conversions) {
+    BOOST_REQUIRE(ascii_type->equal(ascii_type->from_string("ascii"), ascii_type->decompose("ascii")));
+    BOOST_REQUIRE_EQUAL(ascii_type->to_string(ascii_type->decompose("ascii")), "ascii");
+
+    test_parsing_fails(ascii_type, "¡Hola!");
+}
+
 BOOST_AUTO_TEST_CASE(test_short_type_string_conversions) {
    BOOST_REQUIRE(short_type->equal(short_type->from_string("12345"), short_type->decompose(int16_t(12345))));
    BOOST_REQUIRE_EQUAL(short_type->to_string(short_type->decompose(int16_t(12345))), "12345");
--- a/test/cql-pytest/test_large_cells_rows.py
+++ b/test/cql-pytest/test_large_cells_rows.py
@@ -0,0 +1,43 @@
+# Copyright 2020 ScyllaDB
+#
+# This file is part of Scylla.
+#
+# Scylla is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Scylla is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+
+from util import new_test_table
+
+import requests
+
+def test_create_large_static_cells_and_rows(cql, test_keyspace):
+    '''Test that `large_data_handler` successfully reports large static cells
+    and static rows and this doesn't cause a crash of Scylla server.
+
+    This is a regression test for https://github.com/scylladb/scylla/issues/6780'''
+    schema = "pk int, ck int, user_ids set<text> static, PRIMARY KEY (pk, ck)"
+    with new_test_table(cql, test_keyspace, schema) as table:
+        insert_stmt = cql.prepare(f"INSERT INTO {table} (pk, ck, user_ids) VALUES (?, ?, ?) USING TIMEOUT 5m")
+        # Default large data threshold for cells is 1 mb, for rows it is 10 mb.
+        # Take 10 mb cell to trigger large data reporting code both for
+        # static cells and static rows simultaneously.
+        large_set = {'x' * 1024 * 1024 * 10}
+        cql.execute(insert_stmt, [1, 1, large_set])
+
+        # REST API endpoint address for test scylla node
+        node_address = f'http://{cql.cluster.contact_points[0]}:10000'
+        # Execute force flush of test table to persistent storage, which is necessary to trigger
+        # `large_data_handler` execution.
+        table_without_ks = table[table.find('.') + 1:] # strip keyspace part from the table name
+        requests.post(f'{node_address}/storage_service/keyspace_flush/{test_keyspace}', params={'cf' : table_without_ks})
+        # No need to check that the Scylla server is running here, since the test will
+        # fail automatically in case Scylla crashes.
--- a/test/lib/cql_test_env.cc
+++ b/test/lib/cql_test_env.cc
@@ -75,6 +75,8 @@ cql_test_config::cql_test_config(shared_ptr<db::config> cfg)
    // which all get thrown away when the test is done. This can cause timeouts
    // if /tmp is not tmpfs.
    db_config->commitlog_use_o_dsync.set(false);
+
+    db_config->add_cdc_extension();
 }

 cql_test_config::cql_test_config(const cql_test_config&) = default;
--- a/test/lib/select_statement_utils.hh
+++ b/test/lib/select_statement_utils.hh
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2020 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <seastar/core/future.hh>
+
+namespace cql3 {
+
+namespace statements {
+
+	future<> set_internal_paging_size(int internal_paging_size);
+	future<> reset_internal_paging_size();
+
+}
+
+}
--- a/tools/jmx
+++ b/tools/jmx
--- a/tools/scyllatop/livedata.py
+++ b/tools/scyllatop/livedata.py
@@ -39,7 +39,7 @@ class LiveData(object):
    def _discoverMetrics(self):
        results = metric.Metric.discover(self._metric_source)
        logging.debug('_discoverMetrics: {} results discovered'.format(len(results)))
-        for symbol in results:
+        for symbol in list(results):
            if not self._matches(symbol, self._metricPatterns):
                results.pop(symbol)
        logging.debug('_initializeMetrics: {} results matched'.format(len(results)))
--- a/types.cc
+++ b/types.cc
@@ -2360,6 +2360,14 @@ struct from_string_visitor {
    sstring_view s;
    bytes operator()(const reversed_type_impl& r) { return r.underlying_type()->from_string(s); }
    template <typename T> bytes operator()(const integer_type_impl<T>& t) { return decompose_value(parse_int(t, s)); }
+    bytes operator()(const ascii_type_impl&) {
+        auto bv = bytes_view(reinterpret_cast<const int8_t*>(s.begin()), s.size());
+        if (utils::ascii::validate(bv)) {
+            return to_bytes(bv);
+        } else {
+            throw marshal_exception(format("Value not compatible with type {}: '{}'", ascii_type_name, s));
+        }
+    }
    bytes operator()(const string_type_impl&) {
        return to_bytes(bytes_view(reinterpret_cast<const int8_t*>(s.begin()), s.size()));
    }
--- a/types.hh
+++ b/types.hh
@@ -380,6 +380,14 @@ public:
    data_value(const std::string&);
    data_value(const sstring&);

+    // Do not allow construction of a data_value from nullptr. The reason is
+    // that this is error prone, for example: it conflicts with `const char*` overload
+    // which tries to allocate a value from it and will cause UB.
+    //
+    // We want the null value semantics here instead. So the user will be forced
+    // to explicitly call `make_null()` instead.
+    data_value(std::nullptr_t) = delete;
+
    data_value(ascii_native_type);
    data_value(bool);
    data_value(int8_t);