schema.cc/describe: fix invalid compaction options in schema

There is a typo in schema.cql of snapshot, lack of comma after compaction strategy. It will fail to restore schema by the file. AND compaction = {'class': 'SizeTieredCompactionStrategy''max_compaction_threshold': '32'} map_as_cql_param() function has a `first` parameter to smartly add comma, the compaction_strategy_options is always not the first. Fixes #7741 Signed-off-by: Amos Kong <amos@scylladb.com> Closes #7734 (cherry picked from commit 6b1659ee80)
sstable: writer: ka/la: Write row marker cell after row tombstone
2021-03-24 12:58:11 +02:00 · 2021-03-24 10:42:11 +02:00 · 2021-03-21 10:51:36 +02:00 · 2021-03-18 19:20:10 +02:00 · 2021-03-18 14:29:38 +02:00 · 2021-03-11 08:24:56 +02:00
64 changed files with 1392 additions and 318 deletions
--- a/2
+++ b/2
@@ -1,7 +1,7 @@
 #!/bin/sh

 PRODUCT=scylla
-VERSION=4.2.0
+VERSION=4.2.4

 if test -f version
 then
--- a/alternator/conditions.cc
+++ b/alternator/conditions.cc
@@ -159,23 +159,40 @@ static bool check_NE(const rjson::value* v1, const rjson::value& v2) {
 }

 // Check if two JSON-encoded values match with the BEGINS_WITH relation
-static bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2) {
-    // BEGINS_WITH requires that its single operand (v2) be a string or
-    // binary - otherwise it's a validation error. However, problems with
-    // the stored attribute (v1) will just return false (no match).
-    if (!v2.IsObject() || v2.MemberCount() != 1) {
-        throw api_error("ValidationException", format("BEGINS_WITH operator encountered malformed AttributeValue: {}", v2));
-    }
-    auto it2 = v2.MemberBegin();
-    if (it2->name != "S" && it2->name != "B") {
-        throw api_error("ValidationException", format("BEGINS_WITH operator requires String or Binary type in AttributeValue, got {}", it2->name));
-    }
-
-
+bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2,
+                       bool v1_from_query, bool v2_from_query) {
+    bool bad = false;
    if (!v1 || !v1->IsObject() || v1->MemberCount() != 1) {
+        if (v1_from_query) {
+            throw api_error("ValidationException", "begins_with() encountered malformed argument");
+        } else {
+            bad = true;
+        }
+    } else if (v1->MemberBegin()->name != "S" && v1->MemberBegin()->name != "B") {
+        if (v1_from_query) {
+            throw api_error("ValidationException", format("begins_with supports only string or binary type, got: {}", *v1));
+        } else {
+            bad = true;
+        }
+    }
+    if (!v2.IsObject() || v2.MemberCount() != 1) {
+        if (v2_from_query) {
+            throw api_error("ValidationException", "begins_with() encountered malformed argument");
+        } else {
+            bad = true;
+        }
+    } else if (v2.MemberBegin()->name != "S" && v2.MemberBegin()->name != "B") {
+        if (v2_from_query) {
+            throw api_error("ValidationException", format("begins_with() supports only string or binary type, got: {}", v2));
+        } else {
+            bad = true;
+        }
+    }
+    if (bad) {
        return false;
    }
    auto it1 = v1->MemberBegin();
+    auto it2 = v2.MemberBegin();
    if (it1->name != it2->name) {
        return false;
    }
@@ -279,24 +296,38 @@ static bool check_NOT_NULL(const rjson::value* val) {
    return val != nullptr;
 }

+// Only types S, N or B (string, number or bytes) may be compared by the
+// various comparion operators - lt, le, gt, ge, and between.
+static bool check_comparable_type(const rjson::value& v) {
+    if (!v.IsObject() || v.MemberCount() != 1) {
+        return false;
+    }
+    const rjson::value& type = v.MemberBegin()->name;
+    return type == "S" || type == "N" || type == "B";
+}
+
 // Check if two JSON-encoded values match with cmp.
 template <typename Comparator>
-bool check_compare(const rjson::value* v1, const rjson::value& v2, const Comparator& cmp) {
-    if (!v2.IsObject() || v2.MemberCount() != 1) {
-        throw api_error("ValidationException",
-                        format("{} requires a single AttributeValue of type String, Number, or Binary",
-                               cmp.diagnostic));
+bool check_compare(const rjson::value* v1, const rjson::value& v2, const Comparator& cmp,
+                   bool v1_from_query, bool v2_from_query) {
+    bool bad = false;
+    if (!v1 || !check_comparable_type(*v1)) {
+        if (v1_from_query) {
+            throw api_error("ValidationException", format("{} allow only the types String, Number, or Binary", cmp.diagnostic));
+        }
+        bad = true;
    }
-    const auto& kv2 = *v2.MemberBegin();
-    if (kv2.name != "S" && kv2.name != "N" && kv2.name != "B") {
-        throw api_error("ValidationException",
-                        format("{} requires a single AttributeValue of type String, Number, or Binary",
-                               cmp.diagnostic));
+    if (!check_comparable_type(v2)) {
+        if (v2_from_query) {
+            throw api_error("ValidationException", format("{} allow only the types String, Number, or Binary", cmp.diagnostic));
+        }
+        bad = true;
    }
-    if (!v1 || !v1->IsObject() || v1->MemberCount() != 1) {
+    if (bad) {
        return false;
    }
    const auto& kv1 = *v1->MemberBegin();
+    const auto& kv2 = *v2.MemberBegin();
    if (kv1.name != kv2.name) {
        return false;
    }
@@ -310,7 +341,8 @@ bool check_compare(const rjson::value* v1, const rjson::value& v2, const Compara
    if (kv1.name == "B") {
        return cmp(base64_decode(kv1.value), base64_decode(kv2.value));
    }
-    clogger.error("check_compare panic: LHS type equals RHS type, but one is in {N,S,B} while the other isn't");
+    // cannot reach here, as check_comparable_type() verifies the type is one
+    // of the above options.
    return false;
 }

@@ -341,57 +373,71 @@ struct cmp_gt {
    static constexpr const char* diagnostic = "GT operator";
 };

-// True if v is between lb and ub, inclusive.  Throws if lb > ub.
+// True if v is between lb and ub, inclusive.  Throws or returns false
+// (depending on bounds_from_query parameter) if lb > ub.
 template <typename T>
-static bool check_BETWEEN(const T& v, const T& lb, const T& ub) {
+static bool check_BETWEEN(const T& v, const T& lb, const T& ub, bool bounds_from_query) {
    if (cmp_lt()(ub, lb)) {
-        throw api_error("ValidationException",
-                        format("BETWEEN operator requires lower_bound <= upper_bound, but {} > {}", lb, ub));
+        if (bounds_from_query) {
+            throw api_error("ValidationException",
+                format("BETWEEN operator requires lower_bound <= upper_bound, but {} > {}", lb, ub));
+        } else {
+            return false;
+        }
    }
    return cmp_ge()(v, lb) && cmp_le()(v, ub);
 }

-static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const rjson::value& ub) {
-    if (!v) {
+static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const rjson::value& ub,
+                          bool v_from_query, bool lb_from_query, bool ub_from_query) {
+    if ((v && v_from_query && !check_comparable_type(*v)) ||
+        (lb_from_query && !check_comparable_type(lb)) ||
+        (ub_from_query && !check_comparable_type(ub))) {
+        throw api_error("ValidationException", "between allow only the types String, Number, or Binary");
+
+    }
+    if (!v || !v->IsObject() || v->MemberCount() != 1 ||
+        !lb.IsObject() || lb.MemberCount() != 1 ||
+        !ub.IsObject() || ub.MemberCount() != 1) {
        return false;
    }
-    if (!v->IsObject() || v->MemberCount() != 1) {
-        throw api_error("ValidationException", format("BETWEEN operator encountered malformed AttributeValue: {}", *v));
-    }
-    if (!lb.IsObject() || lb.MemberCount() != 1) {
-        throw api_error("ValidationException", format("BETWEEN operator encountered malformed AttributeValue: {}", lb));
-    }
-    if (!ub.IsObject() || ub.MemberCount() != 1) {
-        throw api_error("ValidationException", format("BETWEEN operator encountered malformed AttributeValue: {}", ub));
-    }

    const auto& kv_v = *v->MemberBegin();
    const auto& kv_lb = *lb.MemberBegin();
    const auto& kv_ub = *ub.MemberBegin();
+    bool bounds_from_query = lb_from_query && ub_from_query;
    if (kv_lb.name != kv_ub.name) {
-        throw api_error(
-                "ValidationException",
+        if (bounds_from_query) {
+           throw api_error("ValidationException",
                format("BETWEEN operator requires the same type for lower and upper bound; instead got {} and {}",
                       kv_lb.name, kv_ub.name));
+        } else {
+            return false;
+        }
    }
    if (kv_v.name != kv_lb.name) { // Cannot compare different types, so v is NOT between lb and ub.
        return false;
    }
    if (kv_v.name == "N") {
        const char* diag = "BETWEEN operator";
-        return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag));
+        return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag), bounds_from_query);
    }
    if (kv_v.name == "S") {
        return check_BETWEEN(std::string_view(kv_v.value.GetString(), kv_v.value.GetStringLength()),
                             std::string_view(kv_lb.value.GetString(), kv_lb.value.GetStringLength()),
-                             std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()));
+                             std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()),
+                             bounds_from_query);
    }
    if (kv_v.name == "B") {
-        return check_BETWEEN(base64_decode(kv_v.value), base64_decode(kv_lb.value), base64_decode(kv_ub.value));
+        return check_BETWEEN(base64_decode(kv_v.value), base64_decode(kv_lb.value), base64_decode(kv_ub.value), bounds_from_query);
    }
-    throw api_error("ValidationException",
-        format("BETWEEN operator requires AttributeValueList elements to be of type String, Number, or Binary; instead got {}",
+    if (v_from_query) {
+        throw api_error("ValidationException",
+            format("BETWEEN operator requires AttributeValueList elements to be of type String, Number, or Binary; instead got {}",
               kv_lb.name));
+    } else {
+        return false;
+    }
 }

 // Verify one Expect condition on one attribute (whose content is "got")
@@ -438,19 +484,19 @@ static bool verify_expected_one(const rjson::value& condition, const rjson::valu
            return check_NE(got, (*attribute_value_list)[0]);
        case comparison_operator_type::LT:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_lt{});
+            return check_compare(got, (*attribute_value_list)[0], cmp_lt{}, false, true);
        case comparison_operator_type::LE:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_le{});
+            return check_compare(got, (*attribute_value_list)[0], cmp_le{}, false, true);
        case comparison_operator_type::GT:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_gt{});
+            return check_compare(got, (*attribute_value_list)[0], cmp_gt{}, false, true);
        case comparison_operator_type::GE:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_ge{});
+            return check_compare(got, (*attribute_value_list)[0], cmp_ge{}, false, true);
        case comparison_operator_type::BEGINS_WITH:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_BEGINS_WITH(got, (*attribute_value_list)[0]);
+            return check_BEGINS_WITH(got, (*attribute_value_list)[0], false, true);
        case comparison_operator_type::IN:
            verify_operand_count(attribute_value_list, nonempty(), *comparison_operator);
            return check_IN(got, *attribute_value_list);
@@ -462,7 +508,8 @@ static bool verify_expected_one(const rjson::value& condition, const rjson::valu
            return check_NOT_NULL(got);
        case comparison_operator_type::BETWEEN:
            verify_operand_count(attribute_value_list, exact_size(2), *comparison_operator);
-            return check_BETWEEN(got, (*attribute_value_list)[0], (*attribute_value_list)[1]);
+            return check_BETWEEN(got, (*attribute_value_list)[0], (*attribute_value_list)[1],
+                                 false, true, true);
        case comparison_operator_type::CONTAINS:
            {
                verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
@@ -574,7 +621,8 @@ static bool calculate_primitive_condition(const parsed::primitive_condition& con
            // Shouldn't happen unless we have a bug in the parser
            throw std::logic_error(format("Wrong number of values {} in BETWEEN primitive_condition", cond._values.size()));
        }
-        return check_BETWEEN(&calculated_values[0], calculated_values[1], calculated_values[2]);
+        return check_BETWEEN(&calculated_values[0], calculated_values[1], calculated_values[2],
+                             cond._values[0].is_constant(), cond._values[1].is_constant(), cond._values[2].is_constant());
    case parsed::primitive_condition::type::IN:
        return check_IN(calculated_values);
    case parsed::primitive_condition::type::VALUE:
@@ -605,13 +653,17 @@ static bool calculate_primitive_condition(const parsed::primitive_condition& con
    case parsed::primitive_condition::type::NE:
        return check_NE(&calculated_values[0], calculated_values[1]);
    case parsed::primitive_condition::type::GT:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_gt{});
+        return check_compare(&calculated_values[0], calculated_values[1], cmp_gt{},
+            cond._values[0].is_constant(), cond._values[1].is_constant());
    case parsed::primitive_condition::type::GE:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_ge{});
+        return check_compare(&calculated_values[0], calculated_values[1], cmp_ge{},
+            cond._values[0].is_constant(), cond._values[1].is_constant());
    case parsed::primitive_condition::type::LT:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_lt{});
+        return check_compare(&calculated_values[0], calculated_values[1], cmp_lt{},
+            cond._values[0].is_constant(), cond._values[1].is_constant());
    case parsed::primitive_condition::type::LE:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_le{});
+        return check_compare(&calculated_values[0], calculated_values[1], cmp_le{},
+            cond._values[0].is_constant(), cond._values[1].is_constant());
    default:
        // Shouldn't happen unless we have a bug in the parser
        throw std::logic_error(format("Unknown type {} in primitive_condition object", (int)(cond._op)));
--- a/alternator/conditions.hh
+++ b/alternator/conditions.hh
@@ -52,6 +52,7 @@ bool verify_expected(const rjson::value& req, const rjson::value* previous_item)
 bool verify_condition(const rjson::value& condition, bool require_all, const rjson::value* previous_item);

 bool check_CONTAINS(const rjson::value* v1, const rjson::value& v2);
+bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2, bool v1_from_query, bool v2_from_query);

 bool verify_condition_expression(
        const parsed::condition_expression& condition_expression,
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
@@ -1795,7 +1795,8 @@ static std::string get_item_type_string(const rjson::value& v) {

 // calculate_attrs_to_get() takes either AttributesToGet or
 // ProjectionExpression parameters (having both is *not* allowed),
-// and returns the list of cells we need to read.
+// and returns the list of cells we need to read, or an empty set when
+// *all* attributes are to be returned.
 // In our current implementation, only top-level attributes are stored
 // as cells, and nested documents are stored serialized as JSON.
 // So this function currently returns only the the top-level attributes
@@ -2133,19 +2134,30 @@ update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::t
                    rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item.get());
                    rjson::value v2 = calculate_value(addition, calculate_value_caller::UpdateExpression, previous_item.get());
                    rjson::value result;
-                    std::string v1_type = get_item_type_string(v1);
-                    if (v1_type == "N") {
-                        if (get_item_type_string(v2) != "N") {
-                            throw api_error("ValidationException", format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
+                    // An ADD can be used to create a new attribute (when
+                    // v1.IsNull()) or to add to a pre-existing attribute:
+                    if (v1.IsNull()) {
+                        std::string v2_type = get_item_type_string(v2);
+                        if (v2_type == "N" || v2_type == "SS" || v2_type == "NS" || v2_type == "BS") {
+                            result = v2;
+                        } else {
+                            throw api_error("ValidationException", format("An operand in the update expression has an incorrect data type: {}", v2));
                        }
-                        result = number_add(v1, v2);
-                    } else if (v1_type == "SS" || v1_type == "NS" || v1_type == "BS") {
-                        if (get_item_type_string(v2) != v1_type) {
-                            throw api_error("ValidationException", format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
-                        }
-                        result = set_sum(v1, v2);
                    } else {
-                        throw api_error("ValidationException", format("An operand in the update expression has an incorrect data type: {}", v1));
+                        std::string v1_type = get_item_type_string(v1);
+                        if (v1_type == "N") {
+                            if (get_item_type_string(v2) != "N") {
+                                throw api_error("ValidationException", format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
+                            }
+                            result = number_add(v1, v2);
+                        } else if (v1_type == "SS" || v1_type == "NS" || v1_type == "BS") {
+                            if (get_item_type_string(v2) != v1_type) {
+                                throw api_error("ValidationException", format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
+                            }
+                            result = set_sum(v1, v2);
+                        } else {
+                            throw api_error("ValidationException", format("An operand in the update expression has an incorrect data type: {}", v1));
+                        }
                    }
                    do_update(to_bytes(column_name), result);
                },
@@ -2459,6 +2471,10 @@ public:
            std::unordered_set<std::string>& used_attribute_values);
    bool check(const rjson::value& item) const;
    bool filters_on(std::string_view attribute) const;
+    // for_filters_on() runs the given function on the attributes that the
+    // filter works on. It may run for the same attribute more than once if
+    // used more than once in the filter.
+    void for_filters_on(const noncopyable_function<void(std::string_view)>& func) const;
    operator bool() const { return bool(_imp); }
 };

@@ -2539,10 +2555,26 @@ bool filter::filters_on(std::string_view attribute) const {
    }, *_imp);
 }

+void filter::for_filters_on(const noncopyable_function<void(std::string_view)>& func) const {
+    if (_imp) {
+        std::visit(overloaded_functor {
+            [&] (const conditions_filter& f) -> void {
+                for (auto it = f.conditions.MemberBegin(); it != f.conditions.MemberEnd(); ++it) {
+                    func(rjson::to_string_view(it->name));
+                }
+            },
+            [&] (const expression_filter& f) -> void {
+                return for_condition_expression_on(f.expression, func);
+            }
+        }, *_imp);
+    }
+}
+
 class describe_items_visitor {
    typedef std::vector<const column_definition*> columns_t;
    const columns_t& _columns;
    const std::unordered_set<std::string>& _attrs_to_get;
+    std::unordered_set<std::string> _extra_filter_attrs;
    const filter& _filter;
    typename columns_t::const_iterator _column_it;
    rjson::value _item;
@@ -2558,7 +2590,20 @@ public:
            , _item(rjson::empty_object())
            , _items(rjson::empty_array())
            , _scanned_count(0)
-    { }
+    {
+        // _filter.check() may need additional attributes not listed in
+        // _attrs_to_get (i.e., not requested as part of the output).
+        // We list those in _extra_filter_attrs. We will include them in
+        // the JSON but take them out before finally returning the JSON.
+        if (!_attrs_to_get.empty()) {
+            _filter.for_filters_on([&] (std::string_view attr) {
+                std::string a(attr); // no heterogenous maps searches :-(
+                if (!_attrs_to_get.contains(a)) {
+                    _extra_filter_attrs.emplace(std::move(a));
+                }
+            });
+        }
+    }

    void start_row() {
        _column_it = _columns.begin();
@@ -2572,7 +2617,7 @@ public:
        result_bytes_view->with_linearized([this] (bytes_view bv) {
            std::string column_name = (*_column_it)->name_as_text();
            if (column_name != executor::ATTRS_COLUMN_NAME) {
-                if (_attrs_to_get.empty() || _attrs_to_get.count(column_name) > 0) {
+                if (_attrs_to_get.empty() || _attrs_to_get.contains(column_name) || _extra_filter_attrs.contains(column_name)) {
                    if (!_item.HasMember(column_name.c_str())) {
                        rjson::set_with_string_name(_item, column_name, rjson::empty_object());
                    }
@@ -2584,7 +2629,7 @@ public:
                auto keys_and_values = value_cast<map_type_impl::native_type>(deserialized);
                for (auto entry : keys_and_values) {
                    std::string attr_name = value_cast<sstring>(entry.first);
-                    if (_attrs_to_get.empty() || _attrs_to_get.count(attr_name) > 0) {
+                    if (_attrs_to_get.empty() || _attrs_to_get.contains(attr_name) || _extra_filter_attrs.contains(attr_name)) {
                        bytes value = value_cast<bytes>(entry.second);
                        rjson::set_with_string_name(_item, attr_name, deserialize_item(value));
                    }
@@ -2596,6 +2641,11 @@ public:

    void end_row() {
        if (_filter.check(_item)) {
+            // Remove the extra attributes _extra_filter_attrs which we had
+            // to add just for the filter, and not requested to be returned:
+            for (const auto& attr : _extra_filter_attrs) {
+                rjson::remove_member(_item, attr);
+            }
            rjson::push_back(_items, std::move(_item));
        }
        _item = rjson::empty_object();
@@ -2630,7 +2680,7 @@ static rjson::value encode_paging_state(const schema& schema, const service::pag
    for (const column_definition& cdef : schema.partition_key_columns()) {
        rjson::set_with_string_name(last_evaluated_key, std::string_view(cdef.name_as_text()), rjson::empty_object());
        rjson::value& key_entry = last_evaluated_key[cdef.name_as_text()];
-        rjson::set_with_string_name(key_entry, type_to_string(cdef.type), rjson::parse(to_json_string(*cdef.type, *exploded_pk_it)));
+        rjson::set_with_string_name(key_entry, type_to_string(cdef.type), json_key_column_value(*exploded_pk_it, cdef));
        ++exploded_pk_it;
    }
    auto ck = paging_state.get_clustering_key();
@@ -2640,7 +2690,7 @@ static rjson::value encode_paging_state(const schema& schema, const service::pag
        for (const column_definition& cdef : schema.clustering_key_columns()) {
            rjson::set_with_string_name(last_evaluated_key, std::string_view(cdef.name_as_text()), rjson::empty_object());
            rjson::value& key_entry = last_evaluated_key[cdef.name_as_text()];
-            rjson::set_with_string_name(key_entry, type_to_string(cdef.type), rjson::parse(to_json_string(*cdef.type, *exploded_ck_it)));
+            rjson::set_with_string_name(key_entry, type_to_string(cdef.type), json_key_column_value(*exploded_ck_it, cdef));
            ++exploded_ck_it;
        }
    }
--- a/alternator/expressions.cc
+++ b/alternator/expressions.cc
@@ -348,6 +348,39 @@ bool condition_expression_on(const parsed::condition_expression& ce, std::string
    }, ce._expression);
 }

+// for_condition_expression_on() runs a given function over all the attributes
+// mentioned in the expression. If the same attribute is mentioned more than
+// once, the function will be called more than once for the same attribute.
+
+static void for_value_on(const parsed::value& v, const noncopyable_function<void(std::string_view)>& func) {
+    std::visit(overloaded_functor {
+        [&] (const parsed::constant& c) { },
+        [&] (const parsed::value::function_call& f) {
+            for (const parsed::value& value : f._parameters) {
+                for_value_on(value, func);
+            }
+        },
+        [&] (const parsed::path& p) {
+            func(p.root());
+        }
+    }, v._value);
+}
+
+void for_condition_expression_on(const parsed::condition_expression& ce, const noncopyable_function<void(std::string_view)>& func) {
+    std::visit(overloaded_functor {
+        [&] (const parsed::primitive_condition& cond) {
+            for (const parsed::value& value : cond._values) {
+                for_value_on(value, func);
+            }
+        },
+        [&] (const parsed::condition_expression::condition_list& list) {
+            for (const parsed::condition_expression& cond : list.conditions) {
+                for_condition_expression_on(cond, func);
+            }
+        }
+    }, ce._expression);
+}
+
 // The following calculate_value() functions calculate, or evaluate, a parsed
 // expression. The parsed expression is assumed to have been "resolved", with
 // the matching resolve_* function.
@@ -570,52 +603,8 @@ std::unordered_map<std::string_view, function_handler_type*> function_handlers {
            }
            rjson::value v1 = calculate_value(f._parameters[0], caller, previous_item);
            rjson::value v2 = calculate_value(f._parameters[1], caller, previous_item);
-            // TODO: There's duplication here with check_BEGINS_WITH().
-            // But unfortunately, the two functions differ a bit.
-
-            // If one of v1 or v2 is malformed or has an unsupported type
-            // (not B or S), what we do depends on whether it came from
-            // the user's query (is_constant()), or the item. Unsupported
-            // values in the query result in an error, but if they are in
-            // the item, we silently return false (no match).
-            bool bad = false;
-            if (!v1.IsObject() || v1.MemberCount() != 1) {
-                bad = true;
-                if (f._parameters[0].is_constant()) {
-                    throw api_error("ValidationException", format("{}: begins_with() encountered malformed AttributeValue: {}", caller, v1));
-                }
-            } else if (v1.MemberBegin()->name != "S" && v1.MemberBegin()->name != "B") {
-                bad = true;
-                if (f._parameters[0].is_constant()) {
-                    throw api_error("ValidationException", format("{}: begins_with() supports only string or binary in AttributeValue: {}", caller, v1));
-                }
-            }
-            if (!v2.IsObject() || v2.MemberCount() != 1) {
-                bad = true;
-                if (f._parameters[1].is_constant()) {
-                    throw api_error("ValidationException", format("{}: begins_with() encountered malformed AttributeValue: {}", caller, v2));
-                }
-            } else if (v2.MemberBegin()->name != "S" && v2.MemberBegin()->name != "B") {
-                bad = true;
-                if (f._parameters[1].is_constant()) {
-                    throw api_error("ValidationException", format("{}: begins_with() supports only string or binary in AttributeValue: {}", caller, v2));
-                }
-            }
-            bool ret = false;
-            if (!bad) {
-                auto it1 = v1.MemberBegin();
-                auto it2 = v2.MemberBegin();
-                if (it1->name == it2->name) {
-                    if (it2->name == "S") {
-                        std::string_view val1 = rjson::to_string_view(it1->value);
-                        std::string_view val2 = rjson::to_string_view(it2->value);
-                        ret = val1.starts_with(val2);
-                    } else /* it2->name == "B" */ {
-                        ret = base64_begins_with(rjson::to_string_view(it1->value), rjson::to_string_view(it2->value));
-                    }
-                }
-            }
-            return to_bool_json(ret);
+            return to_bool_json(check_BEGINS_WITH(v1.IsNull() ? nullptr : &v1,  v2,
+                                    f._parameters[0].is_constant(), f._parameters[1].is_constant()));
        }
    },
    {"contains", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
--- a/alternator/expressions.hh
+++ b/alternator/expressions.hh
@@ -27,6 +27,8 @@
 #include <unordered_set>
 #include <string_view>

+#include <seastar/util/noncopyable_function.hh>
+
 #include "expressions_types.hh"
 #include "rjson.hh"

@@ -59,6 +61,11 @@ void validate_value(const rjson::value& v, const char* caller);

 bool condition_expression_on(const parsed::condition_expression& ce, std::string_view attribute);

+// for_condition_expression_on() runs the given function on the attributes
+// that the expression uses. It may run for the same attribute more than once
+// if the same attribute is used more than once in the expression.
+void for_condition_expression_on(const parsed::condition_expression& ce, const noncopyable_function<void(std::string_view)>& func);
+
 // calculate_value() behaves slightly different (especially, different
 // functions supported) when used in different types of expressions, as
 // enumerated in this enum:
--- a/api/column_family.cc
+++ b/api/column_family.cc
@@ -650,7 +650,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->filter_size();
+                return s + sst->filter_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -658,7 +658,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->filter_size();
+                return s + sst->filter_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -666,7 +666,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->filter_memory_size();
+                return s + sst->filter_memory_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -674,7 +674,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->filter_memory_size();
+                return s + sst->filter_memory_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -682,7 +682,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->get_summary().memory_footprint();
+                return s + sst->get_summary().memory_footprint();
            });
        }, std::plus<uint64_t>());
    });
@@ -690,7 +690,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->get_summary().memory_footprint();
+                return s + sst->get_summary().memory_footprint();
            });
        }, std::plus<uint64_t>());
    });
--- a/cql3/statements/select_statement.cc
+++ b/cql3/statements/select_statement.cc
@@ -59,6 +59,7 @@
 #include "db/timeout_clock.hh"
 #include "db/consistency_level_validations.hh"
 #include "database.hh"
+#include "test/lib/select_statement_utils.hh"
 #include <boost/algorithm/cxx11/any_of.hpp>

 bool is_system_keyspace(const sstring& name);
@@ -67,6 +68,8 @@ namespace cql3 {

 namespace statements {

+static constexpr int DEFAULT_INTERNAL_PAGING_SIZE = select_statement::DEFAULT_COUNT_PAGE_SIZE;
+thread_local int internal_paging_size = DEFAULT_INTERNAL_PAGING_SIZE;
 thread_local const lw_shared_ptr<const select_statement::parameters> select_statement::_default_parameters = make_lw_shared<select_statement::parameters>();

 select_statement::parameters::parameters()
@@ -333,7 +336,7 @@ select_statement::do_execute(service::storage_proxy& proxy,
    const bool aggregate = _selection->is_aggregate() || has_group_by();
    const bool nonpaged_filtering = restrictions_need_filtering && page_size <= 0;
    if (aggregate || nonpaged_filtering) {
-        page_size = DEFAULT_COUNT_PAGE_SIZE;
+        page_size = internal_paging_size;
    }

    auto key_ranges = _restrictions->get_partition_key_ranges(options);
@@ -530,13 +533,29 @@ indexed_table_select_statement::do_execute_base_query(
            if (old_paging_state && concurrency == 1) {
                auto base_pk = generate_base_key_from_index_pk<partition_key>(old_paging_state->get_partition_key(),
                        old_paging_state->get_clustering_key(), *_schema, *_view_schema);
+                auto row_ranges = command->slice.default_row_ranges();
                if (old_paging_state->get_clustering_key() && _schema->clustering_key_size() > 0) {
                    auto base_ck = generate_base_key_from_index_pk<clustering_key>(old_paging_state->get_partition_key(),
                            old_paging_state->get_clustering_key(), *_schema, *_view_schema);
-                    command->slice.set_range(*_schema, base_pk,
-                            std::vector<query::clustering_range>{query::clustering_range::make_starting_with(range_bound<clustering_key>(base_ck, false))});
+
+                    query::trim_clustering_row_ranges_to(*_schema, row_ranges, base_ck, false);
+                    command->slice.set_range(*_schema, base_pk, row_ranges);
                } else {
-                    command->slice.set_range(*_schema, base_pk, std::vector<query::clustering_range>{query::clustering_range::make_open_ended_both_sides()});
+                    // There is no clustering key in old_paging_state and/or no clustering key in 
+                    // _schema, therefore read an entire partition (whole clustering range).
+                    //
+                    // The only exception to applying no restrictions on clustering key
+                    // is a case when we have a secondary index on the first column
+                    // of clustering key. In such a case we should not read the
+                    // entire clustering range - only a range in which first column
+                    // of clustering key has the correct value. 
+                    //
+                    // This means that we should not set a open_ended_both_sides
+                    // clustering range on base_pk, instead intersect it with
+                    // _row_ranges (which contains the restrictions neccessary for the
+                    // case described above). The result of such intersection is just
+                    // _row_ranges, which we explicity set on base_pk.
+                    command->slice.set_range(*_schema, base_pk, row_ranges);
                }
            }
            concurrency *= 2;
@@ -974,12 +993,16 @@ indexed_table_select_statement::do_execute(service::storage_proxy& proxy,
    const bool aggregate = _selection->is_aggregate() || has_group_by();
    if (aggregate) {
        const bool restrictions_need_filtering = _restrictions->need_filtering();
-        return do_with(cql3::selection::result_set_builder(*_selection, now, options.get_cql_serialization_format()), std::make_unique<cql3::query_options>(cql3::query_options(options)),
+        return do_with(cql3::selection::result_set_builder(*_selection, now, options.get_cql_serialization_format(), *_group_by_cell_indices), std::make_unique<cql3::query_options>(cql3::query_options(options)),
                [this, &options, &proxy, &state, now, whole_partitions, partition_slices, restrictions_need_filtering] (cql3::selection::result_set_builder& builder, std::unique_ptr<cql3::query_options>& internal_options) {
            // page size is set to the internal count page size, regardless of the user-provided value
-            internal_options.reset(new cql3::query_options(std::move(internal_options), options.get_paging_state(), DEFAULT_COUNT_PAGE_SIZE));
+            internal_options.reset(new cql3::query_options(std::move(internal_options), options.get_paging_state(), internal_paging_size));
            return repeat([this, &builder, &options, &internal_options, &proxy, &state, now, whole_partitions, partition_slices, restrictions_need_filtering] () {
-                auto consume_results = [this, &builder, &options, &internal_options, restrictions_need_filtering] (foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd) {
+                auto consume_results = [this, &builder, &options, &internal_options, &proxy, &state, restrictions_need_filtering] (foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd, lw_shared_ptr<const service::pager::paging_state> paging_state) {
+                    if (paging_state) {
+                        paging_state = generate_view_paging_state_from_base_query_results(paging_state, results, proxy, state, options);
+                    }
+                    internal_options.reset(new cql3::query_options(std::move(internal_options), paging_state ? make_lw_shared<service::pager::paging_state>(*paging_state) : nullptr));
                    if (restrictions_need_filtering) {
                        _stats.filtered_rows_read_total += *results->row_count();
                        query::result_view::consume(*results, cmd->slice, cql3::selection::result_set_builder::visitor(builder, *_schema, *_selection,
@@ -987,24 +1010,24 @@ indexed_table_select_statement::do_execute(service::storage_proxy& proxy,
                    } else {
                        query::result_view::consume(*results, cmd->slice, cql3::selection::result_set_builder::visitor(builder, *_schema, *_selection));
                    }
+                    bool has_more_pages = paging_state && paging_state->get_remaining() > 0;
+                    return stop_iteration(!has_more_pages);
                };

                if (whole_partitions || partition_slices) {
                    return find_index_partition_ranges(proxy, state, *internal_options).then(
                            [this, now, &state, &internal_options, &proxy, consume_results = std::move(consume_results)] (dht::partition_range_vector partition_ranges, lw_shared_ptr<const service::pager::paging_state> paging_state) {
-                        bool has_more_pages = paging_state && paging_state->get_remaining() > 0;
-                        internal_options.reset(new cql3::query_options(std::move(internal_options), paging_state ? make_lw_shared<service::pager::paging_state>(*paging_state) : nullptr));
-                        return do_execute_base_query(proxy, std::move(partition_ranges), state, *internal_options, now, std::move(paging_state)).then(consume_results).then([has_more_pages] {
-                            return stop_iteration(!has_more_pages);
+                        return do_execute_base_query(proxy, std::move(partition_ranges), state, *internal_options, now, paging_state)
+                        .then([paging_state, consume_results = std::move(consume_results)](foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd) {
+                            return consume_results(std::move(results), std::move(cmd), std::move(paging_state));
                        });
                    });
                } else {
                    return find_index_clustering_rows(proxy, state, *internal_options).then(
                            [this, now, &state, &internal_options, &proxy, consume_results = std::move(consume_results)] (std::vector<primary_key> primary_keys, lw_shared_ptr<const service::pager::paging_state> paging_state) {
-                        bool has_more_pages = paging_state && paging_state->get_remaining() > 0;
-                        internal_options.reset(new cql3::query_options(std::move(internal_options), paging_state ? make_lw_shared<service::pager::paging_state>(*paging_state) : nullptr));
-                        return this->do_execute_base_query(proxy, std::move(primary_keys), state, *internal_options, now, std::move(paging_state)).then(consume_results).then([has_more_pages] {
-                            return stop_iteration(!has_more_pages);
+                        return this->do_execute_base_query(proxy, std::move(primary_keys), state, *internal_options, now, paging_state)
+                        .then([paging_state, consume_results = std::move(consume_results)](foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd) {
+                            return consume_results(std::move(results), std::move(cmd), std::move(paging_state));
                        });
                    });
                }
@@ -1661,6 +1684,16 @@ std::vector<size_t> select_statement::prepare_group_by(const schema& schema, sel

 }

+future<> set_internal_paging_size(int paging_size) {
+    return seastar::smp::invoke_on_all([paging_size] {
+        internal_paging_size = paging_size;
+    });
+}
+
+future<> reset_internal_paging_size() {
+    return set_internal_paging_size(DEFAULT_INTERNAL_PAGING_SIZE);
+}
+
 }

 namespace util {
--- a/database.cc
+++ b/database.cc
@@ -767,7 +767,7 @@ future<> database::drop_column_family(const sstring& ks_name, const sstring& cf_
    remove(*cf);
    cf->clear_views();
    auto& ks = find_keyspace(ks_name);
-    return when_all_succeed(cf->await_pending_writes(), cf->await_pending_reads()).then_unpack([this, &ks, cf, tsf = std::move(tsf), snapshot] {
+    return cf->await_pending_ops().then([this, &ks, cf, tsf = std::move(tsf), snapshot] {
        return truncate(ks, *cf, std::move(tsf), snapshot).finally([this, cf] {
            return cf->stop();
        });
--- a/database.hh
+++ b/database.hh
@@ -541,6 +541,8 @@ private:
    utils::phased_barrier _pending_reads_phaser;
    // Corresponding phaser for in-progress streams
    utils::phased_barrier _pending_streams_phaser;
+    // Corresponding phaser for in-progress flushes
+    utils::phased_barrier _pending_flushes_phaser;

    // This field cashes the last truncation time for the table.
    // The master resides in system.truncated table
@@ -986,6 +988,14 @@ public:
        return _pending_streams_phaser.advance_and_await();
    }

+    future<> await_pending_flushes() {
+        return _pending_flushes_phaser.advance_and_await();
+    }
+
+    future<> await_pending_ops() {
+        return when_all(await_pending_reads(), await_pending_writes(), await_pending_streams(), await_pending_flushes()).discard_result();
+    }
+
    void add_or_update_view(view_ptr v);
    void remove_view(view_ptr v);
    void clear_views();
--- a/db/consistency_level.cc
+++ b/db/consistency_level.cc
@@ -61,7 +61,8 @@ namespace db {
 logging::logger cl_logger("consistency");

 size_t quorum_for(const keyspace& ks) {
-    return (ks.get_replication_strategy().get_replication_factor() / 2) + 1;
+    size_t replication_factor = ks.get_replication_strategy().get_replication_factor();
+    return replication_factor ? (replication_factor / 2) + 1 : 0;
 }

 size_t local_quorum_for(const keyspace& ks, const sstring& dc) {
@@ -72,8 +73,8 @@ size_t local_quorum_for(const keyspace& ks, const sstring& dc) {
    if (rs.get_type() == replication_strategy_type::network_topology) {
        const network_topology_strategy* nrs =
            static_cast<const network_topology_strategy*>(&rs);
-
-        return (nrs->get_replication_factor(dc) / 2) + 1;
+        size_t replication_factor = nrs->get_replication_factor(dc);
+        return replication_factor ? (replication_factor / 2) + 1 : 0;
    }

    return quorum_for(ks);
--- a/db/hints/manager.cc
+++ b/db/hints/manager.cc
@@ -817,7 +817,7 @@ void manager::end_point_hints_manager::sender::send_hints_maybe() noexcept {
    int replayed_segments_count = 0;

    try {
-        while (replay_allowed() && have_segments()) {
+        while (replay_allowed() && have_segments() && can_send()) {
            if (!send_one_file(*_segments_to_replay.begin())) {
                break;
            }
--- a/db/large_data_handler.cc
+++ b/db/large_data_handler.cc
@@ -113,7 +113,7 @@ future<> cql_table_large_data_handler::record_large_cells(const sstables::sstabl
        auto ck_str = key_to_str(*clustering_key, s);
        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, format("{} {}", ck_str, column_name), extra_fields, ck_str, column_name);
    } else {
-        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, nullptr, column_name);
+        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, data_value::make_null(utf8_type), column_name);
    }
 }

@@ -125,7 +125,7 @@ future<> cql_table_large_data_handler::record_large_rows(const sstables::sstable
        std::string ck_str = key_to_str(*clustering_key, s);
        return try_record("row", sst, partition_key, int64_t(row_size), "row", ck_str, extra_fields,  ck_str);
    } else {
-        return try_record("row", sst, partition_key, int64_t(row_size), "static row", "", extra_fields, nullptr);
+        return try_record("row", sst, partition_key, int64_t(row_size), "static row", "", extra_fields, data_value::make_null(utf8_type));
    }
 }

--- a/db/large_data_handler.hh
+++ b/db/large_data_handler.hh
@@ -111,27 +111,12 @@ public:
        return make_ready_future<>();
    }

-    future<> maybe_delete_large_data_entries(const schema& s, sstring filename, uint64_t data_size) {
+    future<> maybe_delete_large_data_entries(const schema& /*s*/, sstring /*filename*/, uint64_t /*data_size*/) {
        assert(running());
-        future<> large_partitions = make_ready_future<>();
-        if (__builtin_expect(data_size > _partition_threshold_bytes, false)) {
-            large_partitions = with_sem([&s, filename, this] () mutable {
-                return delete_large_data_entries(s, std::move(filename), db::system_keyspace::LARGE_PARTITIONS);
-            });
-        }
-        future<> large_rows = make_ready_future<>();
-        if (__builtin_expect(data_size > _row_threshold_bytes, false)) {
-            large_rows = with_sem([&s, filename, this] () mutable {
-                return delete_large_data_entries(s, std::move(filename), db::system_keyspace::LARGE_ROWS);
-            });
-        }
-        future<> large_cells = make_ready_future<>();
-        if (__builtin_expect(data_size > _cell_threshold_bytes, false)) {
-            large_cells = with_sem([&s, filename, this] () mutable {
-                return delete_large_data_entries(s, std::move(filename), db::system_keyspace::LARGE_CELLS);
-            });
-        }
-        return when_all(std::move(large_partitions), std::move(large_rows), std::move(large_cells)).discard_result();
+
+        // Deletion of large data entries is disabled due to #7668
+        // They will evetually expire based on the 30 days TTL.
+        return make_ready_future<>();
    }

    const large_data_handler::stats& stats() const { return _stats; }
--- a/db/schema_tables.cc
+++ b/db/schema_tables.cc
@@ -2925,10 +2925,6 @@ future<> maybe_update_legacy_secondary_index_mv_schema(service::migration_manage
    // format, where "token" is not marked as computed. Once we're sure that all indexes have their
    // columns marked as computed (because they were either created on a node that supports computed
    // columns or were fixed by this utility function), it's safe to remove this function altogether.
-    if (!db.features().cluster_supports_computed_columns()) {
-        return make_ready_future<>();
-    }
-
    if (v->clustering_key_size() == 0) {
        return make_ready_future<>();
    }
--- a/db/size_estimates_virtual_reader.cc
+++ b/db/size_estimates_virtual_reader.cc
@@ -201,10 +201,10 @@ static future<std::vector<token_range>> get_local_ranges(database& db) {
        // All queries will be on that table, where all entries are text and there's no notion of
        // token ranges form the CQL point of view.
        auto left_inf = boost::find_if(ranges, [] (auto&& r) {
-            return !r.start() || r.start()->value() == dht::minimum_token();
+            return r.end() && (!r.start() || r.start()->value() == dht::minimum_token());
        });
        auto right_inf = boost::find_if(ranges, [] (auto&& r) {
-            return !r.end() || r.start()->value() == dht::maximum_token();
+            return r.start() && (!r.end() || r.end()->value() == dht::maximum_token());
        });
        if (left_inf != right_inf && left_inf != ranges.end() && right_inf != ranges.end()) {
            local_ranges.push_back(token_range{to_bytes(right_inf->start()), to_bytes(left_inf->end())});
--- a/dist/common/scripts/node_exporter_install
+++ b/dist/common/scripts/node_exporter_install
@@ -42,6 +42,11 @@ if __name__ == '__main__':
    if node_exporter_p.exists() or (bindir_p() / 'prometheus-node_exporter').exists():
        if force:
            print('node_exporter already installed, reinstalling')
+            try:
+                node_exporter = systemd_unit('node-exporter.service')
+                node_exporter.stop()
+            except:
+                pass
        else:
            print('node_exporter already installed, you can use `--force` to force reinstallation')
            sys.exit(1)
--- a/dist/common/scripts/scylla_util.py
+++ b/dist/common/scripts/scylla_util.py
@@ -98,7 +98,7 @@ def curl(url, byte=False):
                    return res.read()
                else:
                    return res.read().decode('utf-8')
-        except urllib.error.HTTPError:
+        except urllib.error.URLError:
            logging.warn("Failed to grab %s..." % url)
            time.sleep(5)
            retries += 1
@@ -133,6 +133,8 @@ class aws_instance:
            raise Exception("found more than one disk mounted at root'".format(root_dev_candidates))

        root_dev = root_dev_candidates[0].device
+        if root_dev == '/dev/root':
+            root_dev = run('findmnt -n -o SOURCE /', shell=True, check=True, capture_output=True, encoding='utf-8').stdout.strip()
        nvmes_present = list(filter(nvme_re.match, os.listdir("/dev")))
        return {"root": [ root_dev ], "ephemeral": [ x for x in nvmes_present if not root_dev.startswith(os.path.join("/dev/", x)) ] }

@@ -221,7 +223,7 @@ class aws_instance:

    def ebs_disks(self):
        """Returns all EBS disks"""
-        return set(self._disks["ephemeral"])
+        return set(self._disks["ebs"])

    def public_ipv4(self):
        """Returns the public IPv4 address of this instance"""
@@ -329,9 +331,7 @@ class scylla_cpuinfo:
            return len(self._cpu_data["system"])


-# When a CLI tool is not installed, use relocatable CLI tool provided by Scylla
 scylla_env = os.environ.copy()
-scylla_env['PATH'] =  '{}:{}'.format(scyllabindir(), scylla_env['PATH'])

 def run(cmd, shell=False, silent=False, exception=True):
    stdout = subprocess.DEVNULL if silent else None
--- a/dist/common/sysctl.d/99-scylla-inotify.conf
+++ b/dist/common/sysctl.d/99-scylla-inotify.conf
@@ -0,0 +1,4 @@
+# allocate enough inotify instances for large machines
+# each tls instance needs 1 inotify instance, and there can be
+# multiple tls instances per shard.
+fs.inotify.max_user_instances = 1200
--- a/dist/debian/debian/scylla-kernel-conf.postinst
+++ b/dist/debian/debian/scylla-kernel-conf.postinst
@@ -11,6 +11,7 @@ else
    sysctl -p/usr/lib/sysctl.d/99-scylla-sched.conf || :
    sysctl -p/usr/lib/sysctl.d/99-scylla-aio.conf || :
    sysctl -p/usr/lib/sysctl.d/99-scylla-vm.conf || :
+    sysctl -p/usr/lib/sysctl.d/99-scylla-inotify.conf || :
 fi

 #DEBHELPER#
--- a/dist/redhat/scylla.spec
+++ b/dist/redhat/scylla.spec
@@ -129,10 +129,9 @@ rm -rf $RPM_BUILD_ROOT
 %attr(0755,scylla,scylla) %dir %{_sharedstatedir}/scylla-housekeeping
 %ghost /etc/systemd/system/scylla-helper.slice.d/
 %ghost /etc/systemd/system/scylla-helper.slice.d/memory.conf
-%ghost /etc/systemd/system/scylla-server.service.d/
 %ghost /etc/systemd/system/scylla-server.service.d/capabilities.conf
 %ghost /etc/systemd/system/scylla-server.service.d/mounts.conf
-%ghost /etc/systemd/system/scylla-server.service.d/dependencies.conf
+/etc/systemd/system/scylla-server.service.d/dependencies.conf
 %ghost /etc/systemd/system/var-lib-systemd-coredump.mount
 %ghost /etc/systemd/system/scylla-cpupower.service

@@ -189,6 +188,8 @@ Summary:        Scylla configuration package for the Linux kernel
 License:        AGPLv3
 URL:            http://www.scylladb.com/
 Requires:       kmod
+# tuned overwrites our sysctl settings
+Obsoletes:	tuned

 %description kernel-conf
 This package contains Linux kernel configuration changes for the Scylla database.  Install this package
@@ -200,6 +201,7 @@ if Scylla is the main application on your server and you wish to optimize its la
 /usr/lib/systemd/systemd-sysctl 99-scylla-sched.conf >/dev/null 2>&1 || :
 /usr/lib/systemd/systemd-sysctl 99-scylla-aio.conf >/dev/null 2>&1 || :
 /usr/lib/systemd/systemd-sysctl 99-scylla-vm.conf >/dev/null 2>&1 || :
+/usr/lib/systemd/systemd-sysctl 99-scylla-inotify.conf >/dev/null 2>&1 || :

 %files kernel-conf
 %defattr(-,root,root)
--- a/install.sh
+++ b/install.sh
@@ -144,7 +144,7 @@ DEBIAN_SSL_CERT_FILE="/etc/ssl/certs/ca-certificates.crt"
 if [ -f "\${DEBIAN_SSL_CERT_FILE}" ]; then
  c=\${DEBIAN_SSL_CERT_FILE}
 fi
-PYTHONPATH="\${d}:\${d}/libexec:\$PYTHONPATH" PATH="\${d}/$pythonpath:\${PATH}" SSL_CERT_FILE="\${c}" exec -a "\$0" "\${d}/libexec/\${b}" "\$@"
+PYTHONPATH="\${d}:\${d}/libexec:\$PYTHONPATH" PATH="\${d}/../bin:\${d}/$pythonpath:\${PATH}" SSL_CERT_FILE="\${c}" exec -a "\$0" "\${d}/libexec/\${b}" "\$@"
 EOF
    chmod +x "$install"
 }
@@ -379,5 +379,9 @@ elif ! $packaging; then
    chown -R scylla:scylla $rdata
    chown -R scylla:scylla $rhkdata

+    for file in dist/common/sysctl.d/*.conf; do
+        bn=$(basename "$file")
+        sysctl -p "$rusr"/lib/sysctl.d/"$bn"
+    done
    $rprefix/scripts/scylla_post_install.sh
 fi
--- a/mutation_reader.cc
+++ b/mutation_reader.cc
@@ -2048,11 +2048,13 @@ public:
        }
    }
    void abort(std::exception_ptr ep) {
-        _end_of_stream = true;
        _ex = std::move(ep);
        if (_full) {
            _full->set_exception(_ex);
            _full.reset();
+        } else if (_not_full) {
+            _not_full->set_exception(_ex);
+            _not_full.reset();
        }
    }
 };
--- a/mutation_writer/feed_writers.hh
+++ b/mutation_writer/feed_writers.hh
@@ -36,8 +36,14 @@ future<> feed_writer(flat_mutation_reader&& rd, Writer&& wr) {
                auto f2 = rd.is_buffer_empty() ? rd.fill_buffer(db::no_timeout) : make_ready_future<>();
                return when_all_succeed(std::move(f1), std::move(f2)).discard_result();
            });
-        }).finally([&wr] {
-            return wr.consume_end_of_stream();
+        }).then_wrapped([&wr] (future<> f) {
+            if (f.failed()) {
+                auto ex = f.get_exception();
+                wr.abort(ex);
+                return make_exception_future<>(ex);
+            } else {
+                return wr.consume_end_of_stream();
+            }
        });
    });
 }
--- a/mutation_writer/shard_based_splitting_writer.cc
+++ b/mutation_writer/shard_based_splitting_writer.cc
@@ -57,6 +57,9 @@ class shard_based_splitting_mutation_writer {
            }
            return std::move(_consume_fut);
        }
+        void abort(std::exception_ptr ep) {
+            _handle.abort(ep);
+        }
    };

 private:
@@ -108,6 +111,13 @@ public:
            return shard->consume_end_of_stream();
        });
    }
+    void abort(std::exception_ptr ep) {
+        for (auto&& shard : _shards) {
+            if (shard) {
+                shard->abort(ep);
+            }
+        }
+    }
 };

 future<> segregate_by_shard(flat_mutation_reader producer, reader_consumer consumer) {
--- a/mutation_writer/timestamp_based_splitting_writer.cc
+++ b/mutation_writer/timestamp_based_splitting_writer.cc
@@ -144,6 +144,9 @@ class timestamp_based_splitting_mutation_writer {
            }
            return std::move(_consume_fut);
        }
+        void abort(std::exception_ptr ep) {
+            _handle.abort(ep);
+        }
    };

 private:
@@ -186,6 +189,11 @@ public:
            return bucket.second.consume_end_of_stream();
        });
    }
+    void abort(std::exception_ptr ep) {
+        for (auto&& b : _buckets) {
+            b.second.abort(ep);
+        }
+    }
 };

 future<> timestamp_based_splitting_mutation_writer::write_to_bucket(bucket_id bucket, mutation_fragment&& mf) {
--- a/partition_version.cc
+++ b/partition_version.cc
@@ -640,12 +640,12 @@ partition_snapshot_ptr partition_entry::read(logalloc::region& r,
    return partition_snapshot_ptr(std::move(snp));
 }

-std::vector<range_tombstone>
+partition_snapshot::range_tombstone_result
 partition_snapshot::range_tombstones(position_in_partition_view start, position_in_partition_view end)
 {
    partition_version* v = &*version();
    if (!v->next()) {
-        return boost::copy_range<std::vector<range_tombstone>>(
+        return boost::copy_range<range_tombstone_result>(
            v->partition().row_tombstones().slice(*_schema, start, end));
    }
    range_tombstone_list list(*_schema);
@@ -655,10 +655,10 @@ partition_snapshot::range_tombstones(position_in_partition_view start, position_
        }
        v = v->next();
    }
-    return boost::copy_range<std::vector<range_tombstone>>(list.slice(*_schema, start, end));
+    return boost::copy_range<range_tombstone_result>(list.slice(*_schema, start, end));
 }

-std::vector<range_tombstone>
+partition_snapshot::range_tombstone_result
 partition_snapshot::range_tombstones()
 {
    return range_tombstones(
--- a/partition_version.hh
+++ b/partition_version.hh
@@ -26,6 +26,7 @@
 #include "utils/anchorless_list.hh"
 #include "utils/logalloc.hh"
 #include "utils/coroutine.hh"
+#include "utils/chunked_vector.hh"

 #include <boost/intrusive/parent_from_member.hpp>
 #include <boost/intrusive/slist.hpp>
@@ -400,10 +401,13 @@ public:
    ::static_row static_row(bool digest_requested) const;
    bool static_row_continuous() const;
    mutation_partition squashed() const;
+
+    using range_tombstone_result = utils::chunked_vector<range_tombstone>;
+
    // Returns range tombstones overlapping with [start, end)
-    std::vector<range_tombstone> range_tombstones(position_in_partition_view start, position_in_partition_view end);
+    range_tombstone_result range_tombstones(position_in_partition_view start, position_in_partition_view end);
    // Returns all range tombstones
-    std::vector<range_tombstone> range_tombstones();
+    range_tombstone_result range_tombstones();
 };

 class partition_snapshot_ptr {
--- a/repair/row_level.cc
+++ b/repair/row_level.cc
@@ -460,7 +460,7 @@ public:
    }
 };

-class repair_writer {
+class repair_writer : public enable_lw_shared_from_this<repair_writer> {
    schema_ptr _schema;
    uint64_t _estimated_partitions;
    size_t _nr_peer_nodes;
@@ -517,6 +517,7 @@ public:
        auto [queue_reader, queue_handle] = make_queue_reader(_schema);
        _mq[node_idx] = std::move(queue_handle);
        table& t = db.local().find_column_family(_schema->id());
+        auto writer = shared_from_this();
        _writer_done[node_idx] = mutation_writer::distribute_reader_and_consume_on_shards(_schema, std::move(queue_reader),
                [&db, reason = this->_reason, estimated_partitions = this->_estimated_partitions] (flat_mutation_reader reader) {
            auto& t = db.local().find_column_family(reader.schema());
@@ -546,13 +547,13 @@ public:
                return consumer(std::move(reader));
            });
        },
-        t.stream_in_progress()).then([this, node_idx] (uint64_t partitions) {
+        t.stream_in_progress()).then([node_idx, writer] (uint64_t partitions) {
            rlogger.debug("repair_writer: keyspace={}, table={}, managed to write partitions={} to sstable",
-                _schema->ks_name(), _schema->cf_name(), partitions);
-        }).handle_exception([this, node_idx] (std::exception_ptr ep) {
+                writer->_schema->ks_name(), writer->_schema->cf_name(), partitions);
+        }).handle_exception([node_idx, writer] (std::exception_ptr ep) {
            rlogger.warn("repair_writer: keyspace={}, table={}, multishard_writer failed: {}",
-                    _schema->ks_name(), _schema->cf_name(), ep);
-            _mq[node_idx]->abort(ep);
+                    writer->_schema->ks_name(), writer->_schema->cf_name(), ep);
+            writer->_mq[node_idx]->abort(ep);
            return make_exception_future<>(std::move(ep));
        });
    }
@@ -655,7 +656,7 @@ private:
    size_t _nr_peer_nodes= 1;
    repair_stats _stats;
    repair_reader _repair_reader;
-    repair_writer _repair_writer;
+    lw_shared_ptr<repair_writer> _repair_writer;
    // Contains rows read from disk
    std::list<repair_row> _row_buf;
    // Contains rows we are working on to sync between peers
@@ -736,7 +737,7 @@ public:
                    _seed,
                    repair_reader::is_local_reader(_repair_master || _same_sharding_config)
              )
-            , _repair_writer(_schema, _estimated_partitions, _nr_peer_nodes, _reason)
+            , _repair_writer(make_lw_shared<repair_writer>(_schema, _estimated_partitions, _nr_peer_nodes, _reason))
            , _sink_source_for_get_full_row_hashes(_repair_meta_id, _nr_peer_nodes,
                    [] (uint32_t repair_meta_id, netw::messaging_service::msg_addr addr) {
                        return netw::get_local_messaging_service().make_sink_and_source_for_repair_get_full_row_hashes_with_rpc_stream(repair_meta_id, addr);
@@ -759,7 +760,7 @@ public:
        auto f2 = _sink_source_for_get_row_diff.close();
        auto f3 = _sink_source_for_put_row_diff.close();
        return when_all_succeed(std::move(gate_future), std::move(f1), std::move(f2), std::move(f3)).discard_result().finally([this] {
-            return _repair_writer.wait_for_writer_done();
+            return _repair_writer->wait_for_writer_done();
        });
    }

@@ -1241,8 +1242,8 @@ private:

    future<> do_apply_rows(std::list<repair_row>&& row_diff, unsigned node_idx, update_working_row_buf update_buf) {
        return do_with(std::move(row_diff), [this, node_idx, update_buf] (std::list<repair_row>& row_diff) {
-            return with_semaphore(_repair_writer.sem(), 1, [this, node_idx, update_buf, &row_diff] {
-                _repair_writer.create_writer(_db, node_idx);
+            return with_semaphore(_repair_writer->sem(), 1, [this, node_idx, update_buf, &row_diff] {
+                _repair_writer->create_writer(_db, node_idx);
                return repeat([this, node_idx, update_buf, &row_diff] () mutable {
                    if (row_diff.empty()) {
                        return make_ready_future<stop_iteration>(stop_iteration::yes);
@@ -1256,7 +1257,7 @@ private:
                    // to_repair_rows_list above where the repair_row is created.
                    mutation_fragment mf = std::move(r.get_mutation_fragment());
                    auto dk_with_hash = r.get_dk_with_hash();
-                    return _repair_writer.do_write(node_idx, std::move(dk_with_hash), std::move(mf)).then([&row_diff] {
+                    return _repair_writer->do_write(node_idx, std::move(dk_with_hash), std::move(mf)).then([&row_diff] {
                        row_diff.pop_front();
                        return make_ready_future<stop_iteration>(stop_iteration::no);
                    });
--- a/row_cache.cc
+++ b/row_cache.cc
@@ -1272,7 +1272,9 @@ flat_mutation_reader cache_entry::read(row_cache& rc, read_context& reader, row_
 // Assumes reader is in the corresponding partition
 flat_mutation_reader cache_entry::do_read(row_cache& rc, read_context& reader) {
    auto snp = _pe.read(rc._tracker.region(), rc._tracker.cleaner(), _schema, &rc._tracker, reader.phase());
-    auto ckr = query::clustering_key_filter_ranges::get_ranges(*_schema, reader.slice(), _key.key());
+    auto ckr = with_linearized_managed_bytes([&] {
+        return query::clustering_key_filter_ranges::get_ranges(*_schema, reader.slice(), _key.key());
+    });
    auto r = make_cache_flat_mutation_reader(_schema, _key, std::move(ckr), rc, reader.shared_from_this(), std::move(snp));
    r.upgrade_schema(rc.schema());
    r.upgrade_schema(reader.schema());
--- a/schema.cc
+++ b/schema.cc
@@ -827,7 +827,7 @@ std::ostream& schema::describe(std::ostream& os) const {
    os << "}";
    os << "\n    AND comment = '" << comment()<< "'";
    os << "\n    AND compaction = {'class': '" <<  sstables::compaction_strategy::name(compaction_strategy()) << "'";
-    map_as_cql_param(os, compaction_strategy_options()) << "}";
+    map_as_cql_param(os, compaction_strategy_options(), false) << "}";
    os << "\n    AND compression = {";
    map_as_cql_param(os,  get_compressor_params().get_options());
    os << "}";
--- a/2
+++ b/2
--- a/service/pager/query_pagers.cc
+++ b/service/pager/query_pagers.cc
@@ -347,7 +347,7 @@ public:
            _max = _max - row_count;
            _exhausted = (row_count < page_size && !results->is_short_read()) || _max == 0;

-            if (!_exhausted || row_count > 0) {
+            if (!_exhausted && row_count > 0) {
                if (_last_pkey) {
                    update_slice(*_last_pkey);
                }
--- a/service/storage_proxy.cc
+++ b/service/storage_proxy.cc
@@ -2585,14 +2585,13 @@ future<> storage_proxy::send_hint_to_endpoint(frozen_mutation_and_schema fm_a_s,
 }

 future<> storage_proxy::send_hint_to_all_replicas(frozen_mutation_and_schema fm_a_s) {
-    const auto timeout = db::timeout_clock::now() + 1h;
    if (!_features.cluster_supports_hinted_handoff_separate_connection()) {
        std::array<mutation, 1> ms{fm_a_s.fm.unfreeze(fm_a_s.s)};
-        return mutate_internal(std::move(ms), db::consistency_level::ALL, false, nullptr, empty_service_permit(), timeout);
+        return mutate_internal(std::move(ms), db::consistency_level::ALL, false, nullptr, empty_service_permit());
    }

    std::array<hint_wrapper, 1> ms{hint_wrapper { std::move(fm_a_s.fm.unfreeze(fm_a_s.s)) }};
-    return mutate_internal(std::move(ms), db::consistency_level::ALL, false, nullptr, empty_service_permit(), timeout);
+    return mutate_internal(std::move(ms), db::consistency_level::ALL, false, nullptr, empty_service_permit());
 }

 /**
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -2546,7 +2546,7 @@ future<> storage_service::rebuild(sstring source_dc) {
                    slogger.info("Streaming for rebuild successful");
                }).handle_exception([] (auto ep) {
                    // This is used exclusively through JMX, so log the full trace but only throw a simple RTE
-                    slogger.warn("Error while rebuilding node: {}", std::current_exception());
+                    slogger.warn("Error while rebuilding node: {}", ep);
                    return make_exception_future<>(std::move(ep));
                });
            });
--- a/sstables/compaction.cc
+++ b/sstables/compaction.cc
@@ -145,8 +145,8 @@ static std::vector<shared_sstable> get_uncompacting_sstables(column_family& cf,
 class compaction;

 struct compaction_writer {
-    sstable_writer writer;
    shared_sstable sst;
+    sstable_writer writer;
 };

 class compacting_sstable_writer {
@@ -570,10 +570,12 @@ private:
                                         std::move(gc_consumer));

            return seastar::async([cfc = std::move(cfc), reader = std::move(reader), this] () mutable {
-                reader.consume_in_thread(std::move(cfc), make_partition_filter(), db::no_timeout);
+                reader.consume_in_thread(std::move(cfc), db::no_timeout);
            });
        });
-        return consumer(make_sstable_reader());
+        // producer will filter out a partition before it reaches the consumer(s)
+        auto producer = make_filtering_reader(make_sstable_reader(), make_partition_filter());
+        return consumer(std::move(producer));
    }

    virtual reader_consumer make_interposer_consumer(reader_consumer end_consumer) {
@@ -775,7 +777,8 @@ public:
        cfg.max_sstable_size = _max_sstable_size;
        cfg.monitor = &default_write_monitor();
        cfg.run_identifier = _run_identifier;
-        return compaction_writer{sst->get_writer(*_schema, partitions_per_sstable(), cfg, get_encoding_stats(), _io_priority), sst};
+        auto writer = sst->get_writer(*_schema, partitions_per_sstable(), cfg, get_encoding_stats(), _io_priority);
+        return compaction_writer{std::move(sst), std::move(writer)};
    }

    virtual void stop_sstable_writer(compaction_writer* writer) override {
@@ -839,7 +842,8 @@ public:
        cfg.max_sstable_size = _max_sstable_size;
        cfg.monitor = &_active_write_monitors.back();
        cfg.run_identifier = _run_identifier;
-        return compaction_writer{sst->get_writer(*_schema, partitions_per_sstable(), cfg, get_encoding_stats(), _io_priority), sst};
+        auto writer = sst->get_writer(*_schema, partitions_per_sstable(), cfg, get_encoding_stats(), _io_priority);
+        return compaction_writer{std::move(sst), std::move(writer)};
    }

    virtual void stop_sstable_writer(compaction_writer* writer) override {
@@ -1305,7 +1309,8 @@ public:
        cfg.monitor = &default_write_monitor();
        // sstables generated for a given shard will share the same run identifier.
        cfg.run_identifier = _run_identifiers.at(shard);
-        return compaction_writer{sst->get_writer(*_schema, partitions_per_sstable(shard), cfg, get_encoding_stats(), _io_priority, shard), sst};
+        auto writer = sst->get_writer(*_schema, partitions_per_sstable(shard), cfg, get_encoding_stats(), _io_priority, shard);
+        return compaction_writer{std::move(sst), std::move(writer)};
    }

    void on_new_partition() override {}
--- a/sstables/compaction_manager.cc
+++ b/sstables/compaction_manager.cc
@@ -646,10 +646,11 @@ future<> compaction_manager::rewrite_sstables(column_family* cf, sstables::compa
    _tasks.push_back(task);

    auto sstables = std::make_unique<std::vector<sstables::shared_sstable>>(get_func(*cf));
+    auto compacting = make_lw_shared<compacting_sstable_registration>(this, *sstables);
    auto sstables_ptr = sstables.get();
    _stats.pending_tasks += sstables->size();

-    task->compaction_done = do_until([sstables_ptr] { return sstables_ptr->empty(); }, [this, task, options, sstables_ptr] () mutable {
+    task->compaction_done = do_until([sstables_ptr] { return sstables_ptr->empty(); }, [this, task, options, sstables_ptr, compacting] () mutable {

        // FIXME: lock cf here
        if (!can_proceed(task)) {
@@ -659,7 +660,7 @@ future<> compaction_manager::rewrite_sstables(column_family* cf, sstables::compa
        auto sst = sstables_ptr->back();
        sstables_ptr->pop_back();

-        return repeat([this, task, options, sst = std::move(sst)] () mutable {
+        return repeat([this, task, options, sst = std::move(sst), compacting] () mutable {
            column_family& cf = *task->compacting_cf;
            auto sstable_level = sst->get_sstable_level();
            auto run_identifier = sst->run_identifier();
@@ -667,21 +668,22 @@ future<> compaction_manager::rewrite_sstables(column_family* cf, sstables::compa
            auto descriptor = sstables::compaction_descriptor({ sst }, cf.get_sstable_set(), service::get_local_compaction_priority(),
                sstable_level, sstables::compaction_descriptor::default_max_sstable_bytes, run_identifier, options);

-            auto compacting = make_lw_shared<compacting_sstable_registration>(this, descriptor.sstables);
            // Releases reference to cleaned sstable such that respective used disk space can be freed.
            descriptor.release_exhausted = [compacting] (const std::vector<sstables::shared_sstable>& exhausted_sstables) {
                compacting->release_compacting(exhausted_sstables);
            };

-            _stats.pending_tasks--;
-            _stats.active_tasks++;
-            task->compaction_running = true;
-            compaction_backlog_tracker user_initiated(std::make_unique<user_initiated_backlog_tracker>(_compaction_controller.backlog_of_shares(200), _available_memory));
-            return do_with(std::move(user_initiated), [this, &cf, descriptor = std::move(descriptor)] (compaction_backlog_tracker& bt) mutable {
-                return with_scheduling_group(_scheduling_group, [this, &cf, descriptor = std::move(descriptor)] () mutable {
-                    return cf.run_compaction(std::move(descriptor));
+            return with_semaphore(_rewrite_sstables_sem, 1, [this, task, &cf, descriptor = std::move(descriptor)] () mutable {
+                _stats.pending_tasks--;
+                _stats.active_tasks++;
+                task->compaction_running = true;
+                compaction_backlog_tracker user_initiated(std::make_unique<user_initiated_backlog_tracker>(_compaction_controller.backlog_of_shares(200), _available_memory));
+                return do_with(std::move(user_initiated), [this, &cf, descriptor = std::move(descriptor)] (compaction_backlog_tracker& bt) mutable {
+                    return with_scheduling_group(_scheduling_group, [this, &cf, descriptor = std::move(descriptor)]() mutable {
+                        return cf.run_compaction(std::move(descriptor));
+                    });
                });
-            }).then_wrapped([this, task, compacting = std::move(compacting)] (future<> f) mutable {
+            }).then_wrapped([this, task, compacting] (future<> f) mutable {
                task->compaction_running = false;
                _stats.active_tasks--;
                if (!can_proceed(task)) {
--- a/sstables/compaction_manager.hh
+++ b/sstables/compaction_manager.hh
@@ -110,6 +110,7 @@ private:
    std::unordered_map<column_family*, rwlock> _compaction_locks;

    semaphore _custom_job_sem{1};
+    seastar::named_semaphore _rewrite_sstables_sem = {1, named_semaphore_exception_factory{"rewrite sstables"}};

    std::function<void()> compaction_submission_callback();
    // all registered column families are submitted for compaction at a constant interval.
--- a/sstables/leveled_compaction_strategy.cc
+++ b/sstables/leveled_compaction_strategy.cc
@@ -176,7 +176,7 @@ leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input

    unsigned max_filled_level = 0;

-    size_t offstrategy_threshold = std::max(schema->min_compaction_threshold(), 4);
+    size_t offstrategy_threshold = (mode == reshape_mode::strict) ? std::max(schema->min_compaction_threshold(), 4) : std::max(schema->max_compaction_threshold(), 32);
    size_t max_sstables = std::max(schema->max_compaction_threshold(), int(offstrategy_threshold));
    auto tolerance = [mode] (unsigned level) -> unsigned {
        if (mode == reshape_mode::strict) {
--- a/sstables/mp_row_consumer.hh
+++ b/sstables/mp_row_consumer.hh
@@ -374,6 +374,7 @@ private:
        _fwd_end = _fwd ? position_in_partition::before_all_clustered_rows() : position_in_partition::after_all_clustered_rows();
        _out_of_range = false;
        _range_tombstones.reset();
+        _ready = {};
        _first_row_encountered = false;
    }
 public:
--- a/sstables/sstable_directory.cc
+++ b/sstables/sstable_directory.cc
@@ -250,11 +250,11 @@ sstable_directory::load_foreign_sstables(sstable_info_vector info_vec) {
 }

 future<>
-sstable_directory::remove_input_sstables_from_resharding(const std::vector<sstables::shared_sstable>& sstlist) {
+sstable_directory::remove_input_sstables_from_resharding(std::vector<sstables::shared_sstable> sstlist) {
    dirlog.debug("Removing {} resharded SSTables", sstlist.size());
-    return parallel_for_each(sstlist, [] (sstables::shared_sstable sst) {
+    return parallel_for_each(std::move(sstlist), [] (const sstables::shared_sstable& sst) {
        dirlog.trace("Removing resharded SSTable {}", sst->get_filename());
-        return sst->unlink();
+        return sst->unlink().then([sst] {});
    });
 }

@@ -393,7 +393,9 @@ sstable_directory::reshard(sstable_info_vector shared_info, compaction_manager&
                    desc.creator = std::move(creator);

                    return sstables::compact_sstables(std::move(desc), table).then([this, &sstlist] (sstables::compaction_info result) {
-                        return when_all_succeed(collect_output_sstables_from_resharding(std::move(result.new_sstables)), remove_input_sstables_from_resharding(sstlist)).discard_result();
+                        // input sstables are moved, to guarantee their resources are released once we're done
+                        // resharding them.
+                        return when_all_succeed(collect_output_sstables_from_resharding(std::move(result.new_sstables)), remove_input_sstables_from_resharding(std::move(sstlist))).discard_result();
                    });
                });
            });
--- a/sstables/sstable_directory.hh
+++ b/sstables/sstable_directory.hh
@@ -119,7 +119,7 @@ private:
    future<> process_descriptor(sstables::entry_descriptor desc, const ::io_priority_class& iop);
    void validate(sstables::shared_sstable sst) const;
    void handle_component(scan_state& state, sstables::entry_descriptor desc, std::filesystem::path filename);
-    future<> remove_input_sstables_from_resharding(const std::vector<sstables::shared_sstable>& sstlist);
+    future<> remove_input_sstables_from_resharding(std::vector<sstables::shared_sstable> sstlist);
    future<> collect_output_sstables_from_resharding(std::vector<sstables::shared_sstable> resharded_sstables);

    future<> remove_input_sstables_from_reshaping(std::vector<sstables::shared_sstable> sstlist);
--- a/sstables/sstables.cc
+++ b/sstables/sstables.cc
@@ -1734,8 +1734,8 @@ void sstable::write_collection(file_writer& out, const composite& clustering_key
 void sstable::write_clustered_row(file_writer& out, const schema& schema, const clustering_row& clustered_row) {
    auto clustering_key = composite::from_clustering_element(schema, clustered_row.key());

-    maybe_write_row_marker(out, schema, clustered_row.marker(), clustering_key);
    maybe_write_row_tombstone(out, clustering_key, clustered_row);
+    maybe_write_row_marker(out, schema, clustered_row.marker(), clustering_key);

    if (schema.clustering_key_size()) {
        column_name_helper::min_max_components(schema, _collector.min_column_names(), _collector.max_column_names(),
--- a/sstables/time_window_compaction_strategy.cc
+++ b/sstables/time_window_compaction_strategy.cc
@@ -115,7 +115,7 @@ time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> i
    for (auto& pair : all_buckets.first) {
        auto ssts = std::move(pair.second);
        if (ssts.size() > offstrategy_threshold) {
-            ssts.resize(std::min(multi_window.size(), max_sstables));
+            ssts.resize(std::min(ssts.size(), max_sstables));
            compaction_descriptor desc(std::move(ssts), std::optional<sstables::sstable_set>(), iop);
            desc.options = compaction_options::make_reshape();
            return desc;
--- a/streaming/stream_session.cc
+++ b/streaming/stream_session.cc
@@ -432,7 +432,7 @@ future<prepare_message> stream_session::prepare(std::vector<stream_request> requ
            try {
                db.find_column_family(ks, cf);
            } catch (no_such_column_family&) {
-                auto err = format("[Stream #{{}}] prepare requested ks={{}} cf={{}} does not exist", ks, cf);
+                auto err = format("[Stream #{{}}] prepare requested ks={{}} cf={{}} does not exist", plan_id, ks, cf);
                sslog.warn(err.c_str());
                throw std::runtime_error(err);
            }
--- a/table.cc
+++ b/table.cc
@@ -1048,7 +1048,7 @@ table::stop() {
        return make_ready_future<>();
    }
    return _async_gate.close().then([this] {
-        return when_all(await_pending_writes(), await_pending_reads(), await_pending_streams()).discard_result().finally([this] {
+        return await_pending_ops().finally([this] {
            return when_all(_memtables->request_flush(), _streaming_memtables->request_flush()).discard_result().finally([this] {
                return _compaction_manager.remove(this).then([this] {
                    // Nest, instead of using when_all, so we don't lose any exceptions.
@@ -1226,9 +1226,20 @@ table::on_compaction_completion(sstables::compaction_completion_desc& desc) {
        }
    }

-    auto new_compacted_but_not_deleted = _sstables_compacted_but_not_deleted;
-    // rebuilding _sstables_compacted_but_not_deleted first to make the entire rebuild operation exception safe.
-    new_compacted_but_not_deleted.insert(new_compacted_but_not_deleted.end(), desc.old_sstables.begin(), desc.old_sstables.end());
+    // Precompute before so undo_compacted_but_not_deleted can be sure not to throw
+    std::unordered_set<sstables::shared_sstable> s(
+           desc.old_sstables.begin(), desc.old_sstables.end());
+    _sstables_compacted_but_not_deleted.insert(_sstables_compacted_but_not_deleted.end(), desc.old_sstables.begin(), desc.old_sstables.end());
+    // After we are done, unconditionally remove compacted sstables from _sstables_compacted_but_not_deleted,
+    // or they could stay forever in the set, resulting in deleted files remaining
+    // opened and disk space not being released until shutdown.
+    auto undo_compacted_but_not_deleted = defer([&] {
+        auto e = boost::range::remove_if(_sstables_compacted_but_not_deleted, [&] (sstables::shared_sstable sst) {
+            return s.count(sst);
+        });
+        _sstables_compacted_but_not_deleted.erase(e, _sstables_compacted_but_not_deleted.end());
+        rebuild_statistics();
+    });

    _cache.invalidate([this, &desc] () noexcept {
        // FIXME: this is not really noexcept, but we need to provide strong exception guarantees.
@@ -1239,8 +1250,6 @@ table::on_compaction_completion(sstables::compaction_completion_desc& desc) {
    // to sstables files that are about to be deleted.
    _cache.refresh_snapshot();

-    _sstables_compacted_but_not_deleted = std::move(new_compacted_but_not_deleted);
-
    rebuild_statistics();

    auto f = seastar::with_gate(_sstable_deletion_gate, [this, sstables_to_remove = desc.old_sstables] {
@@ -1256,17 +1265,6 @@ table::on_compaction_completion(sstables::compaction_completion_desc& desc) {
        // Any remaining SSTables will eventually be re-compacted and re-deleted.
        tlogger.error("Compacted SSTables deletion failed: {}. Ignored.", std::current_exception());
    }
-
-    // unconditionally remove compacted sstables from _sstables_compacted_but_not_deleted,
-    // or they could stay forever in the set, resulting in deleted files remaining
-    // opened and disk space not being released until shutdown.
-    std::unordered_set<sstables::shared_sstable> s(
-           desc.old_sstables.begin(), desc.old_sstables.end());
-    auto e = boost::range::remove_if(_sstables_compacted_but_not_deleted, [&] (sstables::shared_sstable sst) -> bool {
-        return s.count(sst);
-    });
-    _sstables_compacted_but_not_deleted.erase(e, _sstables_compacted_but_not_deleted.end());
-    rebuild_statistics();
 }

 // For replace/remove_ancestors_needed_write, note that we need to update the compaction backlog
@@ -1825,7 +1823,8 @@ future<std::unordered_map<sstring, table::snapshot_details>> table::get_snapshot
 }

 future<> table::flush() {
-    return _memtables->request_flush();
+    auto op = _pending_flushes_phaser.start();
+    return _memtables->request_flush().then([op = std::move(op)] {});
 }

 // FIXME: We can do much better than this in terms of cache management. Right
--- a/test/alternator/test_condition_expression.py
+++ b/test/alternator/test_condition_expression.py
@@ -136,7 +136,7 @@ def test_update_condition_eq_different(test_table_s):
                        ConditionExpression='a = :val2',
                        ExpressionAttributeValues={':val1': val1, ':val2': val2})

-# Also check an actual case of same time, but inequality.
+# Also check an actual case of same type, but inequality.
 def test_update_condition_eq_unequal(test_table_s):
    p = random_string()
    test_table_s.update_item(Key={'p': p},
@@ -146,6 +146,13 @@ def test_update_condition_eq_unequal(test_table_s):
            UpdateExpression='SET a = :val1',
            ConditionExpression='a = :oldval',
            ExpressionAttributeValues={':val1': 3, ':oldval': 2})
+    # If the attribute being compared doesn't exist, it's considered a failed
+    # condition, not an error:
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET a = :val1',
+            ConditionExpression='q = :oldval',
+            ExpressionAttributeValues={':val1': 3, ':oldval': 2})

 # Check that set equality is checked correctly. Unlike string equality (for
 # example), it cannot be done with just naive string comparison of the JSON
@@ -269,15 +276,44 @@ def test_update_condition_lt(test_table_s):
            UpdateExpression='SET z = :newval',
            ConditionExpression='a < :oldval',
            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
-    # Trying to compare an unsupported type - e.g., in the following test
-    # a boolean, is unfortunately caught by boto3 and cannot be tested here...
-    #test_table_s.update_item(Key={'p': p},
-    #    AttributeUpdates={'d': {'Value': False, 'Action': 'PUT'}})
-    #with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
-    #    test_table_s.update_item(Key={'p': p},
-    #        UpdateExpression='SET z = :newval',
-    #        ConditionExpression='d < :oldval',
-    #        ExpressionAttributeValues={':newval': 2, ':oldval': True})
+    # If the attribute being compared doesn't even exist, this is also
+    # considered as a false condition - not an error.
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='q < :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval < q',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    # If a comparison parameter comes from a constant specified in the query,
+    # and it has a type not supported by the comparison (e.g., a list), it's
+    # not just a failed comparison - it is considered a ValidationException
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a < :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval < a',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    # However, if when the wrong type comes from an item attribute, not the
+    # query, the comparison is simply false - not a ValidationException.
+    test_table_s.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': [1,2,3], 'Action': 'PUT'}})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='x < :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval < x',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 4

 # Test for ConditionExpression with operator "<="
@@ -341,6 +377,44 @@ def test_update_condition_le(test_table_s):
            UpdateExpression='SET z = :newval',
            ConditionExpression='a <= :oldval',
            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    # If the attribute being compared doesn't even exist, this is also
+    # considered as a false condition - not an error.
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='q <= :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval <= q',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    # If a comparison parameter comes from a constant specified in the query,
+    # and it has a type not supported by the comparison (e.g., a list), it's
+    # not just a failed comparison - it is considered a ValidationException
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a <= :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval <= a',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    # However, if when the wrong type comes from an item attribute, not the
+    # query, the comparison is simply false - not a ValidationException.
+    test_table_s.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': [1,2,3], 'Action': 'PUT'}})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='x <= :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval <= x',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 7

 # Test for ConditionExpression with operator ">"
@@ -404,6 +478,44 @@ def test_update_condition_gt(test_table_s):
            UpdateExpression='SET z = :newval',
            ConditionExpression='a > :oldval',
            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    # If the attribute being compared doesn't even exist, this is also
+    # considered as a false condition - not an error.
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='q > :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval > q',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    # If a comparison parameter comes from a constant specified in the query,
+    # and it has a type not supported by the comparison (e.g., a list), it's
+    # not just a failed comparison - it is considered a ValidationException
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a > :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval > a',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    # However, if when the wrong type comes from an item attribute, not the
+    # query, the comparison is simply false - not a ValidationException.
+    test_table_s.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': [1,2,3], 'Action': 'PUT'}})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='x > :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval > x',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 4

 # Test for ConditionExpression with operator ">="
@@ -467,6 +579,44 @@ def test_update_condition_ge(test_table_s):
            UpdateExpression='SET z = :newval',
            ConditionExpression='a >= :oldval',
            ExpressionAttributeValues={':newval': 2, ':oldval': '0'})
+    # If the attribute being compared doesn't even exist, this is also
+    # considered as a false condition - not an error.
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='q >= :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval >= q',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    # If a comparison parameter comes from a constant specified in the query,
+    # and it has a type not supported by the comparison (e.g., a list), it's
+    # not just a failed comparison - it is considered a ValidationException
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a >= :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval >= a',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    # However, if when the wrong type comes from an item attribute, not the
+    # query, the comparison is simply false - not a ValidationException.
+    test_table_s.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': [1,2,3], 'Action': 'PUT'}})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='x >= :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval >= x',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 7

 # Test for ConditionExpression with ternary operator "BETWEEN" (checking
@@ -548,6 +698,60 @@ def test_update_condition_between(test_table_s):
            UpdateExpression='SET z = :newval',
            ConditionExpression='a BETWEEN :oldval1 AND :oldval2',
            ExpressionAttributeValues={':newval': 2, ':oldval1': '0', ':oldval2': '2'})
+    # If the attribute being compared doesn't even exist, this is also
+    # considered as a false condition - not an error.
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='q BETWEEN :oldval1 AND :oldval2',
+            ExpressionAttributeValues={':newval': 2, ':oldval1': b'dog', ':oldval2': b'zebra'})
+    # If and operand from the query, and it has a type not supported by the
+    # comparison (e.g., a list), it's not just a failed condition - it is
+    # considered a ValidationException
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN :oldval1 AND :oldval2',
+            ExpressionAttributeValues={':newval': 2, ':oldval1': [1,2], ':oldval2': [2,3]})
+    # However, if when the wrong type comes from an item attribute, not the
+    # query, the comparison is simply false - not a ValidationException.
+    test_table_s.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': [1,2,3], 'Action': 'PUT'},
+                                                             'y': {'Value': [2,3,4], 'Action': 'PUT'}})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN x and y',
+            ExpressionAttributeValues={':newval': 2})
+    # If the two operands come from the query (":val" references) then if they
+    # have different types or the wrong order, this is a ValidationException.
+    # But if one or more of the operands come from the item, this only causes
+    # a false condition - not a ValidationException.
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN :oldval1 AND :oldval2',
+            ExpressionAttributeValues={':newval': 2, ':oldval1': 2, ':oldval2': 1})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN :oldval1 AND :oldval2',
+            ExpressionAttributeValues={':newval': 2, ':oldval1': 2, ':oldval2': 'dog'})
+    test_table_s.update_item(Key={'p': p}, AttributeUpdates={'two': {'Value': 2, 'Action': 'PUT'}})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN two AND :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN :oldval AND two',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 3})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN two AND :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 'dog'})
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 9

 # Test for ConditionExpression with multi-operand operator "IN", checking
@@ -605,6 +809,13 @@ def test_update_condition_in(test_table_s):
            UpdateExpression='SET c = :val37',
            ConditionExpression='a IN ()',
            ExpressionAttributeValues=values)
+    # If the attribute being compared doesn't even exist, this is also
+    # considered as a false condition - not an error.
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET c = :val37',
+            ConditionExpression='q IN ({})'.format(','.join(values.keys())),
+            ExpressionAttributeValues=values)

 # Beyond the above operators, there are also test functions supported -
 # attribute_exists, attribute_not_exists, attribute_type, begins_with,
--- a/test/alternator/test_expected.py
+++ b/test/alternator/test_expected.py
@@ -237,6 +237,30 @@ def test_update_expected_1_le(test_table_s):
                            'AttributeValueList': [2, 3]}}
        )

+# Comparison operators like le work only on numbers, strings or bytes.
+# As noted in issue #8043, if any other type is included in *the query*,
+# the result should be a ValidationException, but if the wrong type appears
+# in the item, not the query, the result is a failed condition.
+def test_update_expected_1_le_validation(test_table_s):
+    p = random_string()
+    test_table_s.update_item(Key={'p': p},
+        AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
+                          'b': {'Value': [1,2], 'Action': 'PUT'}})
+    # Bad type (a list) in the query. Result is ValidationException.
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
+            Expected={'a': {'ComparisonOperator': 'LE',
+                            'AttributeValueList': [[1,2,3]]}}
+        )
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
+            Expected={'b': {'ComparisonOperator': 'LE',
+                            'AttributeValueList': [3]}}
+        )
+    assert not 'z' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']
+
 # Tests for Expected with ComparisonOperator = "LT":
 def test_update_expected_1_lt(test_table_s):
    p = random_string()
@@ -894,6 +918,34 @@ def test_update_expected_1_between(test_table_s):
            AttributeUpdates={'z': {'Value': 2, 'Action': 'PUT'}},
            Expected={'d': {'ComparisonOperator': 'BETWEEN', 'AttributeValueList': [set([1]), set([2])]}})

+# BETWEEN work only on numbers, strings or bytes. As noted in issue #8043,
+# if any other type is included in *the query*, the result should be a
+# ValidationException, but if the wrong type appears in the item, not the
+# query, the result is a failed condition.
+# BETWEEN should also generate ValidationException if the two ends of the
+# range are not of the same type or not in the correct order, but this
+# already is tested in the test above (test_update_expected_1_between).
+def test_update_expected_1_between_validation(test_table_s):
+    p = random_string()
+    test_table_s.update_item(Key={'p': p},
+        AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
+                          'b': {'Value': [1,2], 'Action': 'PUT'}})
+    # Bad type (a list) in the query. Result is ValidationException.
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
+            Expected={'a': {'ComparisonOperator': 'BETWEEN',
+                            'AttributeValueList': [[1,2,3], [2,3,4]]}}
+        )
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
+            Expected={'b': {'ComparisonOperator': 'BETWEEN',
+                            'AttributeValueList': [1,2]}}
+        )
+    assert not 'z' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']
+
+
 ##############################################################################
 # Instead of ComparisonOperator and AttributeValueList, one can specify either
 # Value or Exists:
--- a/test/alternator/test_filter_expression.py
+++ b/test/alternator/test_filter_expression.py
@@ -236,6 +236,30 @@ def test_filter_expression_ge(test_table_sn_with_data):
        expected_items = [item for item in items if item[xn] >= xv]
        assert(got_items == expected_items)

+# Comparison operators such as >= or BETWEEN only work on numbers, strings or
+# bytes. When an expression's operands come from the item and has a wrong type
+# (e.g., a list), the result is that the item is skipped - aborting the scan
+# with a ValidationException is a bug (this was issue #8043).
+def test_filter_expression_le_bad_type(test_table_sn_with_data):
+    table, p, items = test_table_sn_with_data
+    got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression='l <= :xv',
+        ExpressionAttributeValues={':p': p, ':xv': 3})
+    assert got_items == []
+    got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression=':xv <= l',
+        ExpressionAttributeValues={':p': p, ':xv': 3})
+    assert got_items == []
+def test_filter_expression_between_bad_type(test_table_sn_with_data):
+    table, p, items = test_table_sn_with_data
+    got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression='s between :xv and l',
+        ExpressionAttributeValues={':p': p, ':xv': 'cat'})
+    assert got_items == []
+    got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression='s between l and :xv',
+        ExpressionAttributeValues={':p': p, ':xv': 'cat'})
+    assert got_items == []
+    got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression='s between i and :xv',
+        ExpressionAttributeValues={':p': p, ':xv': 'cat'})
+    assert got_items == []
+
 # Test the "BETWEEN/AND" ternary operator on a numeric, string and bytes
 # attribute. These keywords are case-insensitive.
 def test_filter_expression_between(test_table_sn_with_data):
@@ -655,6 +679,46 @@ def test_filter_expression_and_sort_key_condition(test_table_sn_with_data):
    expected_items = [item for item in items if item['i'] > i and item['c'] <= 7]
    assert(got_items == expected_items)

+# Test that a FilterExpression and ProjectionExpression may be given together.
+# In particular, test that FilterExpression may inspect attributes which will
+# not be returned by the query, because of the ProjectionExpression.
+# This test reproduces issue #6951.
+def test_filter_expression_and_projection_expression(test_table):
+    p = random_string()
+    test_table.put_item(Item={'p': p, 'c': 'hi', 'x': 'dog', 'y': 'cat'})
+    test_table.put_item(Item={'p': p, 'c': 'yo', 'x': 'mouse', 'y': 'horse'})
+    # Note that the filter is on the column x, but x is not included in the
+    # results because of ProjectionExpression. The filter should still work.
+    got_items = full_query(test_table,
+        KeyConditionExpression='p=:p',
+        FilterExpression='x=:x',
+        ProjectionExpression='y',
+        ExpressionAttributeValues={':p': p, ':x': 'mouse'})
+    # Note that:
+    # 1. Exactly one item matches the filter on x
+    # 2. The returned record for that item will include *only* the attribute y
+    #    as requestd by ProjectionExpression. It won't include x - it was just
+    #    needed for the filter, but didn't appear in ProjectionExpression.
+    expected_items = [{'y': 'horse'}]
+    assert(got_items == expected_items)
+
+# It is not allowed to combine the new-style FilterExpression with the
+# old-style AttributesToGet. You must use ProjectionExpression instead
+# (tested in test_filter_expression_and_projection_expression() above).
+@pytest.mark.xfail(reason="missing check for expression and non-expression query parameters")
+def test_filter_expression_and_attributes_to_get(test_table):
+    p = random_string()
+    # DynamoDB complains: "Can not use both expression and non-expression
+    # parameters in the same request: Non-expression parameters:
+    # {AttributesToGet} Expression parameters: {FilterExpression,
+    # KeyConditionExpression}.
+    with pytest.raises(ClientError, match='ValidationException.* both'):
+        full_query(test_table,
+            KeyConditionExpression='p=:p',
+            FilterExpression='x=:x',
+            AttributesToGet=['y'],
+            ExpressionAttributeValues={':p': p, ':x': 'mouse'})
+
 # All the tests above were for the Query operation. Let's verify that
 # FilterExpression also works for Scan. We will assume the same code is
 # used internally, so don't need to check all the details of the
--- a/test/alternator/test_query.py
+++ b/test/alternator/test_query.py
@@ -386,3 +386,38 @@ def test_query_missing_key(test_table):
        full_query(test_table, KeyConditions={})
    with pytest.raises(ClientError, match='ValidationException'):
        full_query(test_table)
+
+# The paging tests above used a numeric sort key. Let's now also test paging
+# with a bytes sort key. We already have above a test that bytes sort keys
+# work and are sorted correctly (test_query_sort_order_bytes), but the
+# following test adds a check that *paging* works correctly for such keys.
+# We used to have a bug in this (issue #7768) - the returned LastEvaluatedKey
+# was incorrectly formatted, breaking the boto3's parsing of the response.
+# Note we only check the case of bytes *sort* keys in this test. For bytes
+# *partition* keys, see test_scan_paging_bytes().
+def test_query_paging_bytes(test_table_sb):
+    p = random_string()
+    items = [{'p': p, 'c': random_bytes()} for i in range(10)]
+    with test_table_sb.batch_writer() as batch:
+        for item in items:
+            batch.put_item(item)
+    # Deliberately pass Limit=1 to enforce paging even though we have
+    # just 10 items in the partition.
+    got_items = full_query(test_table_sb, Limit=1,
+        KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})
+    got_sort_keys = [x['c'] for x in got_items]
+    expected_sort_keys = sorted(x['c'] for x in items)
+    assert got_sort_keys == expected_sort_keys
+
+# Similar for test for string clustering keys
+def test_query_paging_string(test_table_ss):
+    p = random_string()
+    items = [{'p': p, 'c': random_string()} for i in range(10)]
+    with test_table_ss.batch_writer() as batch:
+        for item in items:
+            batch.put_item(item)
+    got_items = full_query(test_table_ss, Limit=1,
+        KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})
+    got_sort_keys = [x['c'] for x in got_items]
+    expected_sort_keys = sorted(x['c'] for x in items)
+    assert got_sort_keys == expected_sort_keys
--- a/test/alternator/test_query_filter.py
+++ b/test/alternator/test_query_filter.py
@@ -534,3 +534,40 @@ def test_query_filter_paging(test_table_sn_with_data):
    # passed the filter), but doesn't know it is really the last item - it
    # takes one more query to discover there are no more.
    assert(pages == len(items) + 1)
+
+# Test that a QueryFilter and AttributesToGet may be given together.
+# In particular, test that QueryFilter may inspect attributes which will
+# not be returned by the query, because the AttributesToGet.
+# This test reproduces issue #6951.
+def test_query_filter_and_attributes_to_get(test_table):
+    p = random_string()
+    test_table.put_item(Item={'p': p, 'c': 'hi', 'x': 'dog', 'y': 'cat'})
+    test_table.put_item(Item={'p': p, 'c': 'yo', 'x': 'mouse', 'y': 'horse'})
+    # Note that the filter is on the column x, but x is not included in the
+    # results because of AttributesToGet. The filter should still work.
+    got_items = full_query(test_table,
+        KeyConditions={ 'p': { 'AttributeValueList': [p], 'ComparisonOperator': 'EQ' }},
+        QueryFilter={ 'x': { 'AttributeValueList': ['mouse'], 'ComparisonOperator': 'EQ' }},
+        AttributesToGet=['y'])
+    # Note that:
+    # 1. Exactly one item matches the filter on x
+    # 2. The returned record for that item will include *only* the attribute y
+    #    as requestd by AttributesToGet. It won't include x - it was just
+    #    needed for the filter, but didn't appear in ProjectionExpression.
+    expected_items = [{'y': 'horse'}]
+    assert(got_items == expected_items)
+
+# It is not allowed to combine the old-style QueryFilter with the
+# new-style ProjectionExpression. You must use AttributesToGet instead
+# (tested in test_query_filter_and_attributes_to_get() above).
+@pytest.mark.xfail(reason="missing check for expression and non-expression query parameters")
+def test_query_filter_and_projection_expression(test_table):
+    p = random_string()
+    # DynamoDB complains: "Can not use both expression and non-expression
+    # parameters in the same request: Non-expression parameters:
+    # {QueryFilter} Expression parameters: {ProjectionExpression}.
+    with pytest.raises(ClientError, match='ValidationException.* both'):
+        full_query(test_table,
+            KeyConditions={ 'p': { 'AttributeValueList': [p], 'ComparisonOperator': 'EQ' }},
+            QueryFilter={ 'x': { 'AttributeValueList': ['mouse'], 'ComparisonOperator': 'EQ' }},
+            ProjectionExpression='y')
--- a/test/alternator/test_scan.py
+++ b/test/alternator/test_scan.py
@@ -19,7 +19,7 @@

 import pytest
 from botocore.exceptions import ClientError
-from util import random_string, full_scan, full_scan_and_count, multiset
+from util import random_string, random_bytes, full_scan, full_scan_and_count, multiset
 from boto3.dynamodb.conditions import Attr

 # Test that scanning works fine with/without pagination
@@ -264,3 +264,20 @@ def test_scan_parallel_incorrect(filled_test_table):
    for segment in [7, 9]:
        with pytest.raises(ClientError, match='ValidationException.*Segment'):
            full_scan(test_table, TotalSegments=5, Segment=segment)
+
+# We used to have a bug with formatting of LastEvaluatedKey in the response
+# of Query and Scan with bytes keys (issue #7768). In test_query_paging_byte()
+# (test_query.py) we tested the case of bytes *sort* keys. In the following
+# test we check bytes *partition* keys.
+def test_scan_paging_bytes(test_table_b):
+    # We will not Scan the entire table - we have no idea what it contains.
+    # But we don't need to scan the entire table - we just need the table
+    # to contain at least two items, and then Scan it with Limit=1 and stop
+    # after one page. Before #7768 was fixed, the test failed when the
+    # LastEvaluatedKey in the response could not be parsed.
+    items = [{'p': random_bytes()}, {'p': random_bytes()}]
+    with test_table_b.batch_writer() as batch:
+        for item in items:
+            batch.put_item(item)
+    response = test_table_b.scan(ConsistentRead=True, Limit=1)
+    assert 'LastEvaluatedKey' in response
--- a/test/alternator/test_system_tables.py
+++ b/test/alternator/test_system_tables.py
@@ -41,8 +41,10 @@ def test_fetch_from_system_tables(scylla_only, dynamodb):

        key_columns = [item['column_name'] for item in col_response['Items'] if item['kind'] == 'clustering' or item['kind'] == 'partition_key']
        qualified_name = "{}{}.{}".format(internal_prefix, ks_name, table_name)
-        response = client.scan(TableName=qualified_name, AttributesToGet=key_columns)
-        print(ks_name, table_name, response)
+        import time
+        start = time.time()
+        response = client.scan(TableName=qualified_name, AttributesToGet=key_columns, Limit=50)
+        print(ks_name, table_name, len(str(response)), time.time()-start)

 def test_block_access_to_non_system_tables_with_virtual_interface(scylla_only, test_table_s, dynamodb):
    client = dynamodb.meta.client
--- a/test/alternator/test_update_expression.py
+++ b/test/alternator/test_update_expression.py
@@ -675,6 +675,24 @@ def test_update_expression_add_numbers(test_table_s):
            UpdateExpression='ADD b :val1',
            ExpressionAttributeValues={':val1': 1})

+# In test_update_expression_add_numbers() above we tested ADDing a number to
+# an existing number. The following test check that ADD can be used to
+# create a *new* number, as if it was added to zero.
+def test_update_expression_add_numbers_new(test_table_s):
+    # Test that "ADD" can create a new number attribute:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hello'})
+    test_table_s.update_item(Key={'p': p},
+        UpdateExpression='ADD b :val1',
+        ExpressionAttributeValues={':val1': 7})
+    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == 7
+    # Test that "ADD" can create an entirely new item:
+    p = random_string()
+    test_table_s.update_item(Key={'p': p},
+        UpdateExpression='ADD b :val1',
+        ExpressionAttributeValues={':val1': 8})
+    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == 8
+
 # Test "ADD" operation for sets
 def test_update_expression_add_sets(test_table_s):
    p = random_string()
@@ -703,6 +721,24 @@ def test_update_expression_add_sets(test_table_s):
            UpdateExpression='ADD a :val1',
            ExpressionAttributeValues={':val1': 'hello'})

+# In test_update_expression_add_sets() above we tested ADDing elements to an
+# existing set. The following test checks that ADD can be used to create a
+# *new* set, by adding its first item.
+def test_update_expression_add_sets_new(test_table_s):
+    # Test that "ADD" can create a new set attribute:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hello'})
+    test_table_s.update_item(Key={'p': p},
+        UpdateExpression='ADD b :val1',
+        ExpressionAttributeValues={':val1': set(['dog'])})
+    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == set(['dog'])
+    # Test that "ADD" can create an entirely new item:
+    p = random_string()
+    test_table_s.update_item(Key={'p': p},
+        UpdateExpression='ADD b :val1',
+        ExpressionAttributeValues={':val1': set(['cat'])})
+    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == set(['cat'])
+
 # Test "DELETE" operation for sets
 def test_update_expression_delete_sets(test_table_s):
    p = random_string()
--- a/test/boost/cql_query_large_test.cc
+++ b/test/boost/cql_query_large_test.cc
@@ -128,12 +128,14 @@ SEASTAR_THREAD_TEST_CASE(test_large_data) {
            });
        }).get();

+        // Since deletion of large data entries has been deleted,
+        // expect the record to be present.
        assert_that(e.execute_cql("select partition_key from system.large_rows where table_name = 'tbl' allow filtering;").get0())
            .is_rows()
-            .is_empty();
+            .with_size(1);
        assert_that(e.execute_cql("select partition_key from system.large_cells where table_name = 'tbl' allow filtering;").get0())
            .is_rows()
-            .is_empty();
+            .with_size(1);

        return make_ready_future<>();
    }, cfg).get();
--- a/test/boost/mutation_reader_test.cc
+++ b/test/boost/mutation_reader_test.cc
@@ -2558,7 +2558,7 @@ SEASTAR_THREAD_TEST_CASE(test_queue_reader) {
        }
    }

-    // abort()
+    // abort() -- check that consumer is aborted
    {
        auto [reader, handle] = make_queue_reader(gen.schema());
        auto fill_buffer_fut = reader.fill_buffer(db::no_timeout);
@@ -2573,6 +2573,28 @@ SEASTAR_THREAD_TEST_CASE(test_queue_reader) {

        BOOST_REQUIRE_THROW(fill_buffer_fut.get(), std::runtime_error);
        BOOST_REQUIRE_THROW(handle.push(partition_end{}).get(), std::runtime_error);
+        BOOST_REQUIRE(!reader.is_end_of_stream());
+    }
+
+    // abort() -- check that producer is aborted
+    {
+        auto [reader, handle] = make_queue_reader(gen.schema());
+        reader.set_max_buffer_size(1);
+
+        auto expected_reader = flat_mutation_reader_from_mutations(expected_muts);
+
+        auto push_fut = make_ready_future<>();
+        while (push_fut.available()) {
+            push_fut = handle.push(std::move(*expected_reader(db::no_timeout).get0()));
+        }
+
+        BOOST_REQUIRE(!push_fut.available());
+
+        handle.abort(std::make_exception_ptr<std::runtime_error>(std::runtime_error("error")));
+
+        BOOST_REQUIRE_THROW(reader.fill_buffer(db::no_timeout).get(), std::runtime_error);
+        BOOST_REQUIRE_THROW(push_fut.get(), std::runtime_error);
+        BOOST_REQUIRE(!reader.is_end_of_stream());
    }

    // Detached handle
--- a/test/boost/mvcc_test.cc
+++ b/test/boost/mvcc_test.cc
@@ -49,7 +49,7 @@ static thread_local mutation_application_stats app_stats_for_tests;
 // Verifies that tombstones in "list" are monotonic, overlap with the requested range,
 // and have information equivalent with "expected" in that range.
 static
-void check_tombstone_slice(const schema& s, std::vector<range_tombstone> list,
+void check_tombstone_slice(const schema& s, const utils::chunked_vector<range_tombstone>& list,
    const query::clustering_range& range,
    std::initializer_list<range_tombstone> expected)
 {
--- a/test/boost/secondary_index_test.cc
+++ b/test/boost/secondary_index_test.cc
@@ -29,6 +29,7 @@
 #include "types/set.hh"
 #include "test/lib/exception_utils.hh"
 #include "cql3/statements/select_statement.hh"
+#include "test/lib/select_statement_utils.hh"


 SEASTAR_TEST_CASE(test_secondary_index_regular_column_query) {
@@ -1208,6 +1209,293 @@ SEASTAR_TEST_CASE(test_indexing_paging_and_aggregation) {
    });
 }

+// Verifies that both "SELECT * [rest_of_query]" and "SELECT count(*) [rest_of_query]" 
+// return expected count of rows.
+void assert_select_count_and_select_rows_has_size(
+        cql_test_env& e, 
+        const sstring& rest_of_query, int64_t expected_count, 
+        const std::experimental::source_location& loc = std::experimental::source_location::current()) {
+    eventually([&] { 
+        require_rows(e, "SELECT count(*) " + rest_of_query, {
+            { long_type->decompose(expected_count) }
+        }, loc);
+        auto res = cquery_nofail(e, "SELECT * " + rest_of_query, nullptr, loc);
+        try {
+            assert_that(res).is_rows().with_size(expected_count);
+        } catch (const std::exception& e) {
+            BOOST_FAIL(format("is_rows/with_size failed: {}\n{}:{}: originally from here",
+                              e.what(), loc.file_name(), loc.line()));
+        }
+    });
+}
+
+static constexpr int page_scenarios_page_size = 20;
+static constexpr int page_scenarios_row_count = 2 * page_scenarios_page_size + 5;
+static constexpr int page_scenarios_initial_count = 3;
+static constexpr int page_scenarios_window_size = 4;
+static constexpr int page_scenarios_just_before_first_page = page_scenarios_page_size - page_scenarios_window_size;
+static constexpr int page_scenarios_just_after_first_page = page_scenarios_page_size + page_scenarios_window_size;    
+static constexpr int page_scenarios_just_before_second_page = 2 * page_scenarios_page_size - page_scenarios_window_size;
+static constexpr int page_scenarios_just_after_second_page = 2 * page_scenarios_page_size + page_scenarios_window_size;    
+
+static_assert(page_scenarios_initial_count < page_scenarios_row_count);
+static_assert(page_scenarios_window_size < page_scenarios_page_size);
+static_assert(page_scenarios_just_after_second_page < page_scenarios_row_count);
+
+// Executes `insert` lambda page_scenarios_row_count times. 
+// Runs `validate` lambda in a few scenarios:
+//
+// 1. After a small number of `insert`s
+// 2. In a window from just before and just after `insert`s were executed
+//    DEFAULT_COUNT_PAGE_SIZE times
+// 3. In a window from just before and just after `insert`s were executed
+//    2 * DEFAULT_COUNT_PAGE_SIZE times
+// 4. After all `insert`s
+void test_with_different_page_scenarios(
+    noncopyable_function<void (int)> insert, noncopyable_function<void (int)> validate) {
+
+    int current_row = 0;
+    for (; current_row < page_scenarios_initial_count; current_row++) {
+        insert(current_row);
+        validate(current_row + 1);
+    }
+
+    for (; current_row < page_scenarios_just_before_first_page; current_row++) {
+        insert(current_row);
+    }
+
+    for (; current_row < page_scenarios_just_after_first_page; current_row++) {
+        insert(current_row);
+        validate(current_row + 1);
+    }
+
+    for (; current_row < page_scenarios_just_before_second_page; current_row++) {
+        insert(current_row);
+    }
+
+    for (; current_row < page_scenarios_just_after_second_page; current_row++) {
+        insert(current_row);
+        validate(current_row + 1);
+    }   
+
+    for (; current_row < page_scenarios_row_count; current_row++) {
+        insert(current_row);
+    }
+
+    // No +1, because we just left for loop and current_row was incremented.
+    validate(current_row);
+}
+
+SEASTAR_TEST_CASE(test_secondary_index_on_ck_first_column_and_aggregation) {
+    // Tests aggregation on table with secondary index on first column
+    // of clustering key. This is the "partition_slices" case of 
+    // indexed_table_select_statement::do_execute.
+
+    return do_with_cql_env_thread([] (cql_test_env& e) {
+        cql3::statements::set_internal_paging_size(page_scenarios_page_size).get();
+
+        // Explicitly reproduce the first failing example in issue #7355.
+        cquery_nofail(e, "CREATE TABLE t1 (pk1 int, pk2 int, ck int, primary key((pk1, pk2), ck))");
+        cquery_nofail(e, "CREATE INDEX ON t1(ck)");
+
+        cquery_nofail(e, "INSERT INTO t1(pk1, pk2, ck) VALUES (1, 2, 3)");
+        assert_select_count_and_select_rows_has_size(e, "FROM t1 WHERE ck = 3", 1);
+
+        cquery_nofail(e, "INSERT INTO t1(pk1, pk2, ck) VALUES (1, 2, 4)");
+        cquery_nofail(e, "INSERT INTO t1(pk1, pk2, ck) VALUES (1, 2, 5)");
+        assert_select_count_and_select_rows_has_size(e, "FROM t1 WHERE ck = 3", 1);
+
+        cquery_nofail(e, "INSERT INTO t1(pk1, pk2, ck) VALUES (2, 2, 3)");
+        assert_select_count_and_select_rows_has_size(e, "FROM t1 WHERE ck = 3", 2);
+
+        cquery_nofail(e, "INSERT INTO t1(pk1, pk2, ck) VALUES (2, 1, 3)");
+        assert_select_count_and_select_rows_has_size(e, "FROM t1 WHERE ck = 3", 3);
+
+        // Test a case when there are a lot of small partitions (more than a page size).
+        cquery_nofail(e, "CREATE TABLE t2 (pk int, ck int, primary key(pk, ck))");
+        cquery_nofail(e, "CREATE INDEX ON t2(ck)");
+
+        // "Decoy" rows - they should be not counted (previously they were incorrectly counted in,
+        // see issue #7355).
+        cquery_nofail(e, "INSERT INTO t2(pk, ck) VALUES (0, -2)");
+        cquery_nofail(e, "INSERT INTO t2(pk, ck) VALUES (0, 3)");
+        cquery_nofail(e, format("INSERT INTO t2(pk, ck) VALUES ({}, 3)", page_scenarios_just_after_first_page).c_str());
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t2(pk, ck) VALUES ({}, 1)", current_row).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t2 WHERE ck = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk FROM t2 WHERE ck = 1 GROUP BY pk");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            res = cquery_nofail(e, "SELECT pk, ck FROM t2 WHERE ck = 1 GROUP BY pk, ck");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT sum(pk) FROM t2 WHERE ck = 1", {
+               { int32_type->decompose(int32_t(rows_inserted * (rows_inserted - 1) / 2)) }
+            });
+          });
+        });
+
+        // Test a case when there is a single large partition (larger than a page size).
+        cquery_nofail(e, "CREATE TABLE t3 (pk int, ck1 int, ck2 int, primary key(pk, ck1, ck2))");
+        cquery_nofail(e, "CREATE INDEX ON t3(ck1)");
+
+        // "Decoy" rows
+        cquery_nofail(e, "INSERT INTO t3(pk, ck1, ck2) VALUES (1, 0, 0)");
+        cquery_nofail(e, "INSERT INTO t3(pk, ck1, ck2) VALUES (1, 2, 0)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t3(pk, ck1, ck2) VALUES (1, 1, {})", current_row).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t3 WHERE ck1 = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk FROM t3 WHERE ck1 = 1 GROUP BY pk");
+            assert_that(res).is_rows().with_size(1);
+            res = cquery_nofail(e, "SELECT pk, ck1 FROM t3 WHERE ck1 = 1 GROUP BY pk, ck1");
+            assert_that(res).is_rows().with_size(1);
+            res = cquery_nofail(e, "SELECT pk, ck1, ck2 FROM t3 WHERE ck1 = 1 GROUP BY pk, ck1, ck2");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT avg(ck2) FROM t3 WHERE ck1 = 1", {
+                { int32_type->decompose(int32_t((rows_inserted * (rows_inserted - 1) / 2) / rows_inserted)) }
+            }); 
+          });
+        });
+
+        cql3::statements::reset_internal_paging_size().get();
+    });
+}
+
+SEASTAR_TEST_CASE(test_secondary_index_on_pk_column_and_aggregation) {
+    // Tests aggregation on table with secondary index on a column
+    // of partition key. This is the "whole_partitions" case of 
+    // indexed_table_select_statement::do_execute.
+
+    return do_with_cql_env_thread([] (cql_test_env& e) {
+        cql3::statements::set_internal_paging_size(page_scenarios_page_size).get();
+
+        // Explicitly reproduce the second failing example in issue #7355.
+        // This a case with a single large partition.
+        cquery_nofail(e, "CREATE TABLE t1 (pk1 int, pk2 int, ck int, primary key((pk1, pk2), ck))");
+        cquery_nofail(e, "CREATE INDEX ON t1(pk2)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t1(pk1, pk2, ck) VALUES (1, 1, {})", current_row).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t1 WHERE pk2 = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk1, pk2 FROM t1 WHERE pk2 = 1 GROUP BY pk1, pk2");
+            assert_that(res).is_rows().with_size(1);
+            res = cquery_nofail(e, "SELECT pk1, pk2, ck FROM t1 WHERE pk2 = 1 GROUP BY pk1, pk2, ck");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT min(pk1) FROM t1 WHERE pk2 = 1", {
+                { int32_type->decompose(1) }
+            });
+          });
+        });
+
+        // Test a case when there are a lot of small partitions (more than a page size)
+        // and there is a clustering key in base table.
+        cquery_nofail(e, "CREATE TABLE t2 (pk1 int, pk2 int, ck int, primary key((pk1, pk2), ck))");
+        cquery_nofail(e, "CREATE INDEX ON t2(pk2)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t2(pk1, pk2, ck) VALUES ({}, 1, {})", 
+                current_row, current_row % 20).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t2 WHERE pk2 = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk1, pk2 FROM t2 WHERE pk2 = 1 GROUP BY pk1, pk2");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT max(pk1) FROM t2 WHERE pk2 = 1", {
+                { int32_type->decompose(int32_t(rows_inserted - 1)) }
+            });
+          });
+        });
+
+        // Test a case when there are a lot of small partitions (more than a page size)
+        // and there is NO clustering key in base table.
+        cquery_nofail(e, "CREATE TABLE t3 (pk1 int, pk2 int, primary key((pk1, pk2)))");
+        cquery_nofail(e, "CREATE INDEX ON t3(pk2)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t3(pk1, pk2) VALUES ({}, 1)", current_row).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t3 WHERE pk2 = 1", rows_inserted);
+        });
+
+        cql3::statements::reset_internal_paging_size().get();
+    });
+}
+
+SEASTAR_TEST_CASE(test_secondary_index_on_non_pk_ck_column_and_aggregation) {
+    // Tests aggregation on table with secondary index on a column
+    // that is not a part of partition key and clustering key. 
+    // This is the non-"whole_partitions" and non-"partition_slices"
+    // case of indexed_table_select_statement::do_execute.
+
+    return do_with_cql_env_thread([] (cql_test_env& e) {
+        cql3::statements::set_internal_paging_size(page_scenarios_page_size).get();
+
+        // Test a case when there are a lot of small partitions (more than a page size)
+        // and there is a clustering key in base table.
+        cquery_nofail(e, "CREATE TABLE t (pk int, ck int, v int, primary key(pk, ck))");
+        cquery_nofail(e, "CREATE INDEX ON t(v)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t(pk, ck, v) VALUES ({}, {}, 1)", 
+                current_row, current_row % 20).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t WHERE v = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk FROM t WHERE v = 1 GROUP BY pk");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT sum(v) FROM t WHERE v = 1", {
+                { int32_type->decompose(int32_t(rows_inserted)) }
+            });
+          });
+        });
+
+        // Test a case when there are a lot of small partitions (more than a page size)
+        // and there is NO clustering key in base table.
+        cquery_nofail(e, "CREATE TABLE t2 (pk int, v int, primary key(pk))");
+        cquery_nofail(e, "CREATE INDEX ON t2(v)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t2(pk, v) VALUES ({}, 1)", current_row).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t2 WHERE v = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk FROM t2 WHERE v = 1 GROUP BY pk");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT sum(pk) FROM t2 WHERE v = 1", {
+                { int32_type->decompose(int32_t(rows_inserted * (rows_inserted - 1) / 2)) }
+            });
+          });
+        });
+
+        // Test a case when there is a single large partition (larger than a page size).
+        cquery_nofail(e, "CREATE TABLE t3 (pk int, ck int, v int, primary key(pk, ck))");
+        cquery_nofail(e, "CREATE INDEX ON t3(v)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t3(pk, ck, v) VALUES (1, {}, 1)", current_row).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t3 WHERE v = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk FROM t3 WHERE v = 1 GROUP BY pk");
+            assert_that(res).is_rows().with_size(1);
+            res = cquery_nofail(e, "SELECT pk, ck FROM t3 WHERE v = 1 GROUP BY pk, ck");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT max(ck) FROM t3 WHERE v = 1", {
+                { int32_type->decompose(int32_t(rows_inserted - 1)) }
+            });
+          });
+        });
+
+        cql3::statements::reset_internal_paging_size().get();
+    });
+}
+
 SEASTAR_TEST_CASE(test_computed_columns) {
    return do_with_cql_env_thread([] (auto& e) {
        e.execute_cql("CREATE TABLE t (p1 int, p2 int, c1 int, c2 int, v int, PRIMARY KEY ((p1,p2),c1,c2))").get();
--- a/test/boost/types_test.cc
+++ b/test/boost/types_test.cc
@@ -100,6 +100,13 @@ BOOST_AUTO_TEST_CASE(test_byte_type_string_conversions) {
    BOOST_REQUIRE_EQUAL(byte_type->to_string(bytes()), "");
 }

+BOOST_AUTO_TEST_CASE(test_ascii_type_string_conversions) {
+    BOOST_REQUIRE(ascii_type->equal(ascii_type->from_string("ascii"), ascii_type->decompose("ascii")));
+    BOOST_REQUIRE_EQUAL(ascii_type->to_string(ascii_type->decompose("ascii")), "ascii");
+
+    test_parsing_fails(ascii_type, "¡Hola!");
+}
+
 BOOST_AUTO_TEST_CASE(test_short_type_string_conversions) {
    BOOST_REQUIRE(short_type->equal(short_type->from_string("12345"), short_type->decompose(int16_t(12345))));
    BOOST_REQUIRE_EQUAL(short_type->to_string(short_type->decompose(int16_t(12345))), "12345");
--- a/test/cql-pytest/test_large_cells_rows.py
+++ b/test/cql-pytest/test_large_cells_rows.py
@@ -0,0 +1,43 @@
+# Copyright 2020 ScyllaDB
+#
+# This file is part of Scylla.
+#
+# Scylla is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Scylla is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+
+from util import new_test_table
+
+import requests
+
+def test_create_large_static_cells_and_rows(cql, test_keyspace):
+    '''Test that `large_data_handler` successfully reports large static cells
+    and static rows and this doesn't cause a crash of Scylla server.
+
+    This is a regression test for https://github.com/scylladb/scylla/issues/6780'''
+    schema = "pk int, ck int, user_ids set<text> static, PRIMARY KEY (pk, ck)"
+    with new_test_table(cql, test_keyspace, schema) as table:
+        insert_stmt = cql.prepare(f"INSERT INTO {table} (pk, ck, user_ids) VALUES (?, ?, ?) USING TIMEOUT 5m")
+        # Default large data threshold for cells is 1 mb, for rows it is 10 mb.
+        # Take 10 mb cell to trigger large data reporting code both for
+        # static cells and static rows simultaneously.
+        large_set = {'x' * 1024 * 1024 * 10}
+        cql.execute(insert_stmt, [1, 1, large_set])
+
+        # REST API endpoint address for test scylla node
+        node_address = f'http://{cql.cluster.contact_points[0]}:10000'
+        # Execute force flush of test table to persistent storage, which is necessary to trigger
+        # `large_data_handler` execution.
+        table_without_ks = table[table.find('.') + 1:] # strip keyspace part from the table name
+        requests.post(f'{node_address}/storage_service/keyspace_flush/{test_keyspace}', params={'cf' : table_without_ks})
+        # No need to check that the Scylla server is running here, since the test will
+        # fail automatically in case Scylla crashes.
--- a/test/lib/select_statement_utils.hh
+++ b/test/lib/select_statement_utils.hh
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2020 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <seastar/core/future.hh>
+
+namespace cql3 {
+
+namespace statements {
+
+	future<> set_internal_paging_size(int internal_paging_size);
+	future<> reset_internal_paging_size();
+
+}
+
+}
--- a/tools/scyllatop/livedata.py
+++ b/tools/scyllatop/livedata.py
@@ -39,7 +39,7 @@ class LiveData(object):
    def _discoverMetrics(self):
        results = metric.Metric.discover(self._metric_source)
        logging.debug('_discoverMetrics: {} results discovered'.format(len(results)))
-        for symbol in results:
+        for symbol in list(results):
            if not self._matches(symbol, self._metricPatterns):
                results.pop(symbol)
        logging.debug('_initializeMetrics: {} results matched'.format(len(results)))
--- a/types.cc
+++ b/types.cc
@@ -1387,42 +1387,50 @@ static void validate_aux(const tuple_type_impl& t, bytes_view v, cql_serializati
 }

 namespace {
+template <typename UnderlyingView, typename FragmentRangeView>
 struct validate_visitor {
-    bytes_view v;
+    UnderlyingView underlying;
+    FragmentRangeView v;
+
    cql_serialization_format sf;
-    void operator()(const reversed_type_impl& t) { return t.underlying_type()->validate(v, sf); }
+    void operator()(const reversed_type_impl& t) { return t.underlying_type()->validate(underlying, sf); }
    void operator()(const abstract_type&) {}
    template <typename T> void operator()(const integer_type_impl<T>& t) {
        if (v.empty()) {
            return;
        }
-        if (v.size() != sizeof(T)) {
-            throw marshal_exception(format("Validation failed for type {}: got {:d} bytes", t.name(), v.size()));
+        if (v.size_bytes() != sizeof(T)) {
+            throw marshal_exception(format("Validation failed for type {}: got {:d} bytes", t.name(), v.size_bytes()));
        }
    }
    void operator()(const byte_type_impl& t) {
        if (v.empty()) {
            return;
        }
-        if (v.size() != 1) {
-            throw marshal_exception(format("Expected 1 byte for a tinyint ({:d})", v.size()));
+        if (v.size_bytes() != 1) {
+            throw marshal_exception(format("Expected 1 byte for a tinyint ({:d})", v.size_bytes()));
        }
    }
    void operator()(const short_type_impl& t) {
        if (v.empty()) {
            return;
        }
-        if (v.size() != 2) {
-            throw marshal_exception(format("Expected 2 bytes for a smallint ({:d})", v.size()));
+        if (v.size_bytes() != 2) {
+            throw marshal_exception(format("Expected 2 bytes for a smallint ({:d})", v.size_bytes()));
        }
    }
    void operator()(const ascii_type_impl&) {
-        if (!utils::ascii::validate(v)) {
-            throw marshal_exception("Validation failed - non-ASCII character in an ASCII string");
-        }
+        with_linearized(v, [this] (bytes_view bv) {
+            if (!utils::ascii::validate(bv)) {
+                throw marshal_exception("Validation failed - non-ASCII character in an ASCII string");
+            }
+        });
    }
    void operator()(const utf8_type_impl&) {
-        if (!utils::utf8::validate(v)) {
+        auto error = with_linearized(v, [this] (bytes_view bv) {
+            return !utils::utf8::validate(bv);
+        });
+        if (error) {
            throw marshal_exception("Validation failed - non-UTF8 character in a UTF8 string");
        }
    }
@@ -1431,19 +1439,20 @@ struct validate_visitor {
        if (v.empty()) {
            return;
        }
-        if (v.size() != 1) {
-            throw marshal_exception(format("Validation failed for boolean, got {:d} bytes", v.size()));
+        if (v.size_bytes() != 1) {
+            throw marshal_exception(format("Validation failed for boolean, got {:d} bytes", v.size_bytes()));
        }
    }
    void operator()(const timeuuid_type_impl& t) {
        if (v.empty()) {
            return;
        }
-        if (v.size() != 16) {
-            throw marshal_exception(format("Validation failed for timeuuid - got {:d} bytes", v.size()));
+        if (v.size_bytes() != 16) {
+            throw marshal_exception(format("Validation failed for timeuuid - got {:d} bytes", v.size_bytes()));
        }
-        auto msb = read_simple<uint64_t>(v);
-        auto lsb = read_simple<uint64_t>(v);
+        auto in = utils::linearizing_input_stream(v);
+        auto msb = in.template read_trivial<uint64_t>();
+        auto lsb = in.template read_trivial<uint64_t>();
        utils::UUID uuid(msb, lsb);
        if (uuid.version() != 1) {
            throw marshal_exception(format("Unsupported UUID version ({:d})", uuid.version()));
@@ -1453,17 +1462,19 @@ struct validate_visitor {
        if (v.empty()) {
            return;
        }
-        if (v.size() != sizeof(uint64_t)) {
-            throw marshal_exception(format("Validation failed for timestamp - got {:d} bytes", v.size()));
+        if (v.size_bytes() != sizeof(uint64_t)) {
+            throw marshal_exception(format("Validation failed for timestamp - got {:d} bytes", v.size_bytes()));
        }
    }
    void operator()(const duration_type_impl& t) {
-        if (v.size() < 3) {
-            throw marshal_exception(format("Expected at least 3 bytes for a duration, got {:d}", v.size()));
+        if (v.size_bytes() < 3) {
+            throw marshal_exception(format("Expected at least 3 bytes for a duration, got {:d}", v.size_bytes()));
        }

        common_counter_type months, days, nanoseconds;
-        std::tie(months, days, nanoseconds) = deserialize_counters(v);
+        std::tie(months, days, nanoseconds) = with_linearized(v, [] (bytes_view bv) {
+            return deserialize_counters(bv);
+        });

        auto check_counter_range = [] (common_counter_type value, auto counter_value_type_instance,
                                           std::string_view counter_name) {
@@ -1490,51 +1501,71 @@ struct validate_visitor {
        if (v.empty()) {
            return;
        }
-        if (v.size() != sizeof(T)) {
-            throw marshal_exception(format("Expected {:d} bytes for a floating type, got {:d}", sizeof(T), v.size()));
+        if (v.size_bytes() != sizeof(T)) {
+            throw marshal_exception(format("Expected {:d} bytes for a floating type, got {:d}", sizeof(T), v.size_bytes()));
        }
    }
    void operator()(const simple_date_type_impl& t) {
        if (v.empty()) {
            return;
        }
-        if (v.size() != 4) {
-            throw marshal_exception(format("Expected 4 byte long for date ({:d})", v.size()));
+        if (v.size_bytes() != 4) {
+            throw marshal_exception(format("Expected 4 byte long for date ({:d})", v.size_bytes()));
        }
    }
    void operator()(const time_type_impl& t) {
        if (v.empty()) {
            return;
        }
-        if (v.size() != 8) {
-            throw marshal_exception(format("Expected 8 byte long for time ({:d})", v.size()));
+        if (v.size_bytes() != 8) {
+            throw marshal_exception(format("Expected 8 byte long for time ({:d})", v.size_bytes()));
        }
    }
    void operator()(const uuid_type_impl& t) {
        if (v.empty()) {
            return;
        }
-        if (v.size() != 16) {
-            throw marshal_exception(format("Validation failed for uuid - got {:d} bytes", v.size()));
+        if (v.size_bytes() != 16) {
+            throw marshal_exception(format("Validation failed for uuid - got {:d} bytes", v.size_bytes()));
        }
    }
    void operator()(const inet_addr_type_impl& t) {
        if (v.empty()) {
            return;
        }
-        if (v.size() != sizeof(uint32_t) && v.size() != 16) {
-            throw marshal_exception(format("Validation failed for inet_addr - got {:d} bytes", v.size()));
+        if (v.size_bytes() != sizeof(uint32_t) && v.size_bytes() != 16) {
+            throw marshal_exception(format("Validation failed for inet_addr - got {:d} bytes", v.size_bytes()));
        }
    }
-    void operator()(const map_type_impl& t) { validate_aux(t, v, sf); }
-    void operator()(const set_type_impl& t) { validate_aux(t, v, sf); }
-    void operator()(const list_type_impl& t) { validate_aux(t, v, sf); }
-    void operator()(const tuple_type_impl& t) { validate_aux(t, v, sf); }
+    void operator()(const map_type_impl& t) {
+        with_linearized(v, [this, &t] (bytes_view bv) {
+            validate_aux(t, bv, sf);
+        });
+    }
+    void operator()(const set_type_impl& t) {
+        with_linearized(v, [this, &t] (bytes_view bv) {
+            validate_aux(t, bv, sf);
+        });
+    }
+    void operator()(const list_type_impl& t) {
+        with_linearized(v, [this, &t] (bytes_view bv) {
+            validate_aux(t, bv, sf);
+        });
+    }
+    void operator()(const tuple_type_impl& t) {
+        with_linearized(v, [this, &t] (bytes_view bv) {
+            validate_aux(t, bv, sf);
+        });
+    }
 };
 }

+void abstract_type::validate(const fragmented_temporary_buffer::view& view, cql_serialization_format sf) const {
+    visit(*this, validate_visitor{view, view, sf});
+}
+
 void abstract_type::validate(bytes_view v, cql_serialization_format sf) const {
-    visit(*this, validate_visitor{v, sf});
+    visit(*this, validate_visitor{v, single_fragment_range(v), sf});
 }

 static void serialize_aux(const tuple_type_impl& type, const tuple_type_impl::native_type* val, bytes::iterator& out) {
@@ -2319,6 +2350,14 @@ struct from_string_visitor {
    sstring_view s;
    bytes operator()(const reversed_type_impl& r) { return r.underlying_type()->from_string(s); }
    template <typename T> bytes operator()(const integer_type_impl<T>& t) { return decompose_value(parse_int(t, s)); }
+    bytes operator()(const ascii_type_impl&) {
+        auto bv = bytes_view(reinterpret_cast<const int8_t*>(s.begin()), s.size());
+        if (utils::ascii::validate(bv)) {
+            return to_bytes(bv);
+        } else {
+            throw marshal_exception(format("Value not compatible with type {}: '{}'", ascii_type_name, s));
+        }
+    }
    bytes operator()(const string_type_impl&) {
        return to_bytes(bytes_view(reinterpret_cast<const int8_t*>(s.begin()), s.size()));
    }
--- a/types.hh
+++ b/types.hh
@@ -380,6 +380,14 @@ public:
    data_value(const std::string&);
    data_value(const sstring&);

+    // Do not allow construction of a data_value from nullptr. The reason is
+    // that this is error prone, for example: it conflicts with `const char*` overload
+    // which tries to allocate a value from it and will cause UB.
+    //
+    // We want the null value semantics here instead. So the user will be forced
+    // to explicitly call `make_null()` instead.
+    data_value(std::nullptr_t) = delete;
+
    data_value(ascii_native_type);
    data_value(bool);
    data_value(int8_t);
@@ -508,12 +516,8 @@ public:
    data_value deserialize_value(bytes_view v) const {
        return deserialize(v);
    };
-    void validate(bytes_view v, cql_serialization_format sf) const;
-    virtual void validate(const fragmented_temporary_buffer::view& view, cql_serialization_format sf) const {
-        with_linearized(view, [this, sf] (bytes_view bv) {
-            validate(bv, sf);
-        });
-    }
+    void validate(const fragmented_temporary_buffer::view& view, cql_serialization_format sf) const;
+    void validate(bytes_view view, cql_serialization_format sf) const;
    bool is_compatible_with(const abstract_type& previous) const;
    /*
     * Types which are wrappers over other types return the inner type.