schema.cc/describe: fix invalid compaction options in schema

There is a typo in schema.cql of snapshot, lack of comma after compaction strategy. It will fail to restore schema by the file. AND compaction = {'class': 'SizeTieredCompactionStrategy''max_compaction_threshold': '32'} map_as_cql_param() function has a `first` parameter to smartly add comma, the compaction_strategy_options is always not the first. Fixes #7741 Signed-off-by: Amos Kong <amos@scylladb.com> Closes #7734 (cherry picked from commit 6b1659ee80)
sstable: writer: ka/la: Write row marker cell after row tombstone
2021-03-24 12:58:11 +02:00 · 2021-03-24 10:42:11 +02:00 · 2021-03-21 10:51:36 +02:00 · 2021-03-18 19:20:10 +02:00 · 2021-03-18 14:29:38 +02:00 · 2021-03-11 08:24:56 +02:00
36 changed files with 978 additions and 213 deletions
--- a/2
+++ b/2
@@ -1,7 +1,7 @@
 #!/bin/sh

 PRODUCT=scylla
-VERSION=4.2.3
+VERSION=4.2.4

 if test -f version
 then
--- a/alternator/conditions.cc
+++ b/alternator/conditions.cc
@@ -159,23 +159,40 @@ static bool check_NE(const rjson::value* v1, const rjson::value& v2) {
 }

 // Check if two JSON-encoded values match with the BEGINS_WITH relation
-static bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2) {
-    // BEGINS_WITH requires that its single operand (v2) be a string or
-    // binary - otherwise it's a validation error. However, problems with
-    // the stored attribute (v1) will just return false (no match).
-    if (!v2.IsObject() || v2.MemberCount() != 1) {
-        throw api_error("ValidationException", format("BEGINS_WITH operator encountered malformed AttributeValue: {}", v2));
-    }
-    auto it2 = v2.MemberBegin();
-    if (it2->name != "S" && it2->name != "B") {
-        throw api_error("ValidationException", format("BEGINS_WITH operator requires String or Binary type in AttributeValue, got {}", it2->name));
-    }
-
-
+bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2,
+                       bool v1_from_query, bool v2_from_query) {
+    bool bad = false;
    if (!v1 || !v1->IsObject() || v1->MemberCount() != 1) {
+        if (v1_from_query) {
+            throw api_error("ValidationException", "begins_with() encountered malformed argument");
+        } else {
+            bad = true;
+        }
+    } else if (v1->MemberBegin()->name != "S" && v1->MemberBegin()->name != "B") {
+        if (v1_from_query) {
+            throw api_error("ValidationException", format("begins_with supports only string or binary type, got: {}", *v1));
+        } else {
+            bad = true;
+        }
+    }
+    if (!v2.IsObject() || v2.MemberCount() != 1) {
+        if (v2_from_query) {
+            throw api_error("ValidationException", "begins_with() encountered malformed argument");
+        } else {
+            bad = true;
+        }
+    } else if (v2.MemberBegin()->name != "S" && v2.MemberBegin()->name != "B") {
+        if (v2_from_query) {
+            throw api_error("ValidationException", format("begins_with() supports only string or binary type, got: {}", v2));
+        } else {
+            bad = true;
+        }
+    }
+    if (bad) {
        return false;
    }
    auto it1 = v1->MemberBegin();
+    auto it2 = v2.MemberBegin();
    if (it1->name != it2->name) {
        return false;
    }
@@ -279,24 +296,38 @@ static bool check_NOT_NULL(const rjson::value* val) {
    return val != nullptr;
 }

+// Only types S, N or B (string, number or bytes) may be compared by the
+// various comparion operators - lt, le, gt, ge, and between.
+static bool check_comparable_type(const rjson::value& v) {
+    if (!v.IsObject() || v.MemberCount() != 1) {
+        return false;
+    }
+    const rjson::value& type = v.MemberBegin()->name;
+    return type == "S" || type == "N" || type == "B";
+}
+
 // Check if two JSON-encoded values match with cmp.
 template <typename Comparator>
-bool check_compare(const rjson::value* v1, const rjson::value& v2, const Comparator& cmp) {
-    if (!v2.IsObject() || v2.MemberCount() != 1) {
-        throw api_error("ValidationException",
-                        format("{} requires a single AttributeValue of type String, Number, or Binary",
-                               cmp.diagnostic));
+bool check_compare(const rjson::value* v1, const rjson::value& v2, const Comparator& cmp,
+                   bool v1_from_query, bool v2_from_query) {
+    bool bad = false;
+    if (!v1 || !check_comparable_type(*v1)) {
+        if (v1_from_query) {
+            throw api_error("ValidationException", format("{} allow only the types String, Number, or Binary", cmp.diagnostic));
+        }
+        bad = true;
    }
-    const auto& kv2 = *v2.MemberBegin();
-    if (kv2.name != "S" && kv2.name != "N" && kv2.name != "B") {
-        throw api_error("ValidationException",
-                        format("{} requires a single AttributeValue of type String, Number, or Binary",
-                               cmp.diagnostic));
+    if (!check_comparable_type(v2)) {
+        if (v2_from_query) {
+            throw api_error("ValidationException", format("{} allow only the types String, Number, or Binary", cmp.diagnostic));
+        }
+        bad = true;
    }
-    if (!v1 || !v1->IsObject() || v1->MemberCount() != 1) {
+    if (bad) {
        return false;
    }
    const auto& kv1 = *v1->MemberBegin();
+    const auto& kv2 = *v2.MemberBegin();
    if (kv1.name != kv2.name) {
        return false;
    }
@@ -310,7 +341,8 @@ bool check_compare(const rjson::value* v1, const rjson::value& v2, const Compara
    if (kv1.name == "B") {
        return cmp(base64_decode(kv1.value), base64_decode(kv2.value));
    }
-    clogger.error("check_compare panic: LHS type equals RHS type, but one is in {N,S,B} while the other isn't");
+    // cannot reach here, as check_comparable_type() verifies the type is one
+    // of the above options.
    return false;
 }

@@ -341,57 +373,71 @@ struct cmp_gt {
    static constexpr const char* diagnostic = "GT operator";
 };

-// True if v is between lb and ub, inclusive.  Throws if lb > ub.
+// True if v is between lb and ub, inclusive.  Throws or returns false
+// (depending on bounds_from_query parameter) if lb > ub.
 template <typename T>
-static bool check_BETWEEN(const T& v, const T& lb, const T& ub) {
+static bool check_BETWEEN(const T& v, const T& lb, const T& ub, bool bounds_from_query) {
    if (cmp_lt()(ub, lb)) {
-        throw api_error("ValidationException",
-                        format("BETWEEN operator requires lower_bound <= upper_bound, but {} > {}", lb, ub));
+        if (bounds_from_query) {
+            throw api_error("ValidationException",
+                format("BETWEEN operator requires lower_bound <= upper_bound, but {} > {}", lb, ub));
+        } else {
+            return false;
+        }
    }
    return cmp_ge()(v, lb) && cmp_le()(v, ub);
 }

-static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const rjson::value& ub) {
-    if (!v) {
+static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const rjson::value& ub,
+                          bool v_from_query, bool lb_from_query, bool ub_from_query) {
+    if ((v && v_from_query && !check_comparable_type(*v)) ||
+        (lb_from_query && !check_comparable_type(lb)) ||
+        (ub_from_query && !check_comparable_type(ub))) {
+        throw api_error("ValidationException", "between allow only the types String, Number, or Binary");
+
+    }
+    if (!v || !v->IsObject() || v->MemberCount() != 1 ||
+        !lb.IsObject() || lb.MemberCount() != 1 ||
+        !ub.IsObject() || ub.MemberCount() != 1) {
        return false;
    }
-    if (!v->IsObject() || v->MemberCount() != 1) {
-        throw api_error("ValidationException", format("BETWEEN operator encountered malformed AttributeValue: {}", *v));
-    }
-    if (!lb.IsObject() || lb.MemberCount() != 1) {
-        throw api_error("ValidationException", format("BETWEEN operator encountered malformed AttributeValue: {}", lb));
-    }
-    if (!ub.IsObject() || ub.MemberCount() != 1) {
-        throw api_error("ValidationException", format("BETWEEN operator encountered malformed AttributeValue: {}", ub));
-    }

    const auto& kv_v = *v->MemberBegin();
    const auto& kv_lb = *lb.MemberBegin();
    const auto& kv_ub = *ub.MemberBegin();
+    bool bounds_from_query = lb_from_query && ub_from_query;
    if (kv_lb.name != kv_ub.name) {
-        throw api_error(
-                "ValidationException",
+        if (bounds_from_query) {
+           throw api_error("ValidationException",
                format("BETWEEN operator requires the same type for lower and upper bound; instead got {} and {}",
                       kv_lb.name, kv_ub.name));
+        } else {
+            return false;
+        }
    }
    if (kv_v.name != kv_lb.name) { // Cannot compare different types, so v is NOT between lb and ub.
        return false;
    }
    if (kv_v.name == "N") {
        const char* diag = "BETWEEN operator";
-        return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag));
+        return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag), bounds_from_query);
    }
    if (kv_v.name == "S") {
        return check_BETWEEN(std::string_view(kv_v.value.GetString(), kv_v.value.GetStringLength()),
                             std::string_view(kv_lb.value.GetString(), kv_lb.value.GetStringLength()),
-                             std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()));
+                             std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()),
+                             bounds_from_query);
    }
    if (kv_v.name == "B") {
-        return check_BETWEEN(base64_decode(kv_v.value), base64_decode(kv_lb.value), base64_decode(kv_ub.value));
+        return check_BETWEEN(base64_decode(kv_v.value), base64_decode(kv_lb.value), base64_decode(kv_ub.value), bounds_from_query);
    }
-    throw api_error("ValidationException",
-        format("BETWEEN operator requires AttributeValueList elements to be of type String, Number, or Binary; instead got {}",
+    if (v_from_query) {
+        throw api_error("ValidationException",
+            format("BETWEEN operator requires AttributeValueList elements to be of type String, Number, or Binary; instead got {}",
               kv_lb.name));
+    } else {
+        return false;
+    }
 }

 // Verify one Expect condition on one attribute (whose content is "got")
@@ -438,19 +484,19 @@ static bool verify_expected_one(const rjson::value& condition, const rjson::valu
            return check_NE(got, (*attribute_value_list)[0]);
        case comparison_operator_type::LT:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_lt{});
+            return check_compare(got, (*attribute_value_list)[0], cmp_lt{}, false, true);
        case comparison_operator_type::LE:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_le{});
+            return check_compare(got, (*attribute_value_list)[0], cmp_le{}, false, true);
        case comparison_operator_type::GT:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_gt{});
+            return check_compare(got, (*attribute_value_list)[0], cmp_gt{}, false, true);
        case comparison_operator_type::GE:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_compare(got, (*attribute_value_list)[0], cmp_ge{});
+            return check_compare(got, (*attribute_value_list)[0], cmp_ge{}, false, true);
        case comparison_operator_type::BEGINS_WITH:
            verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
-            return check_BEGINS_WITH(got, (*attribute_value_list)[0]);
+            return check_BEGINS_WITH(got, (*attribute_value_list)[0], false, true);
        case comparison_operator_type::IN:
            verify_operand_count(attribute_value_list, nonempty(), *comparison_operator);
            return check_IN(got, *attribute_value_list);
@@ -462,7 +508,8 @@ static bool verify_expected_one(const rjson::value& condition, const rjson::valu
            return check_NOT_NULL(got);
        case comparison_operator_type::BETWEEN:
            verify_operand_count(attribute_value_list, exact_size(2), *comparison_operator);
-            return check_BETWEEN(got, (*attribute_value_list)[0], (*attribute_value_list)[1]);
+            return check_BETWEEN(got, (*attribute_value_list)[0], (*attribute_value_list)[1],
+                                 false, true, true);
        case comparison_operator_type::CONTAINS:
            {
                verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
@@ -574,7 +621,8 @@ static bool calculate_primitive_condition(const parsed::primitive_condition& con
            // Shouldn't happen unless we have a bug in the parser
            throw std::logic_error(format("Wrong number of values {} in BETWEEN primitive_condition", cond._values.size()));
        }
-        return check_BETWEEN(&calculated_values[0], calculated_values[1], calculated_values[2]);
+        return check_BETWEEN(&calculated_values[0], calculated_values[1], calculated_values[2],
+                             cond._values[0].is_constant(), cond._values[1].is_constant(), cond._values[2].is_constant());
    case parsed::primitive_condition::type::IN:
        return check_IN(calculated_values);
    case parsed::primitive_condition::type::VALUE:
@@ -605,13 +653,17 @@ static bool calculate_primitive_condition(const parsed::primitive_condition& con
    case parsed::primitive_condition::type::NE:
        return check_NE(&calculated_values[0], calculated_values[1]);
    case parsed::primitive_condition::type::GT:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_gt{});
+        return check_compare(&calculated_values[0], calculated_values[1], cmp_gt{},
+            cond._values[0].is_constant(), cond._values[1].is_constant());
    case parsed::primitive_condition::type::GE:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_ge{});
+        return check_compare(&calculated_values[0], calculated_values[1], cmp_ge{},
+            cond._values[0].is_constant(), cond._values[1].is_constant());
    case parsed::primitive_condition::type::LT:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_lt{});
+        return check_compare(&calculated_values[0], calculated_values[1], cmp_lt{},
+            cond._values[0].is_constant(), cond._values[1].is_constant());
    case parsed::primitive_condition::type::LE:
-        return check_compare(&calculated_values[0], calculated_values[1], cmp_le{});
+        return check_compare(&calculated_values[0], calculated_values[1], cmp_le{},
+            cond._values[0].is_constant(), cond._values[1].is_constant());
    default:
        // Shouldn't happen unless we have a bug in the parser
        throw std::logic_error(format("Unknown type {} in primitive_condition object", (int)(cond._op)));
--- a/alternator/conditions.hh
+++ b/alternator/conditions.hh
@@ -52,6 +52,7 @@ bool verify_expected(const rjson::value& req, const rjson::value* previous_item)
 bool verify_condition(const rjson::value& condition, bool require_all, const rjson::value* previous_item);

 bool check_CONTAINS(const rjson::value* v1, const rjson::value& v2);
+bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2, bool v1_from_query, bool v2_from_query);

 bool verify_condition_expression(
        const parsed::condition_expression& condition_expression,
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
@@ -2134,19 +2134,30 @@ update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::t
                    rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item.get());
                    rjson::value v2 = calculate_value(addition, calculate_value_caller::UpdateExpression, previous_item.get());
                    rjson::value result;
-                    std::string v1_type = get_item_type_string(v1);
-                    if (v1_type == "N") {
-                        if (get_item_type_string(v2) != "N") {
-                            throw api_error("ValidationException", format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
+                    // An ADD can be used to create a new attribute (when
+                    // v1.IsNull()) or to add to a pre-existing attribute:
+                    if (v1.IsNull()) {
+                        std::string v2_type = get_item_type_string(v2);
+                        if (v2_type == "N" || v2_type == "SS" || v2_type == "NS" || v2_type == "BS") {
+                            result = v2;
+                        } else {
+                            throw api_error("ValidationException", format("An operand in the update expression has an incorrect data type: {}", v2));
                        }
-                        result = number_add(v1, v2);
-                    } else if (v1_type == "SS" || v1_type == "NS" || v1_type == "BS") {
-                        if (get_item_type_string(v2) != v1_type) {
-                            throw api_error("ValidationException", format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
-                        }
-                        result = set_sum(v1, v2);
                    } else {
-                        throw api_error("ValidationException", format("An operand in the update expression has an incorrect data type: {}", v1));
+                        std::string v1_type = get_item_type_string(v1);
+                        if (v1_type == "N") {
+                            if (get_item_type_string(v2) != "N") {
+                                throw api_error("ValidationException", format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
+                            }
+                            result = number_add(v1, v2);
+                        } else if (v1_type == "SS" || v1_type == "NS" || v1_type == "BS") {
+                            if (get_item_type_string(v2) != v1_type) {
+                                throw api_error("ValidationException", format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
+                            }
+                            result = set_sum(v1, v2);
+                        } else {
+                            throw api_error("ValidationException", format("An operand in the update expression has an incorrect data type: {}", v1));
+                        }
                    }
                    do_update(to_bytes(column_name), result);
                },
--- a/alternator/expressions.cc
+++ b/alternator/expressions.cc
@@ -603,52 +603,8 @@ std::unordered_map<std::string_view, function_handler_type*> function_handlers {
            }
            rjson::value v1 = calculate_value(f._parameters[0], caller, previous_item);
            rjson::value v2 = calculate_value(f._parameters[1], caller, previous_item);
-            // TODO: There's duplication here with check_BEGINS_WITH().
-            // But unfortunately, the two functions differ a bit.
-
-            // If one of v1 or v2 is malformed or has an unsupported type
-            // (not B or S), what we do depends on whether it came from
-            // the user's query (is_constant()), or the item. Unsupported
-            // values in the query result in an error, but if they are in
-            // the item, we silently return false (no match).
-            bool bad = false;
-            if (!v1.IsObject() || v1.MemberCount() != 1) {
-                bad = true;
-                if (f._parameters[0].is_constant()) {
-                    throw api_error("ValidationException", format("{}: begins_with() encountered malformed AttributeValue: {}", caller, v1));
-                }
-            } else if (v1.MemberBegin()->name != "S" && v1.MemberBegin()->name != "B") {
-                bad = true;
-                if (f._parameters[0].is_constant()) {
-                    throw api_error("ValidationException", format("{}: begins_with() supports only string or binary in AttributeValue: {}", caller, v1));
-                }
-            }
-            if (!v2.IsObject() || v2.MemberCount() != 1) {
-                bad = true;
-                if (f._parameters[1].is_constant()) {
-                    throw api_error("ValidationException", format("{}: begins_with() encountered malformed AttributeValue: {}", caller, v2));
-                }
-            } else if (v2.MemberBegin()->name != "S" && v2.MemberBegin()->name != "B") {
-                bad = true;
-                if (f._parameters[1].is_constant()) {
-                    throw api_error("ValidationException", format("{}: begins_with() supports only string or binary in AttributeValue: {}", caller, v2));
-                }
-            }
-            bool ret = false;
-            if (!bad) {
-                auto it1 = v1.MemberBegin();
-                auto it2 = v2.MemberBegin();
-                if (it1->name == it2->name) {
-                    if (it2->name == "S") {
-                        std::string_view val1 = rjson::to_string_view(it1->value);
-                        std::string_view val2 = rjson::to_string_view(it2->value);
-                        ret = val1.starts_with(val2);
-                    } else /* it2->name == "B" */ {
-                        ret = base64_begins_with(rjson::to_string_view(it1->value), rjson::to_string_view(it2->value));
-                    }
-                }
-            }
-            return to_bool_json(ret);
+            return to_bool_json(check_BEGINS_WITH(v1.IsNull() ? nullptr : &v1,  v2,
+                                    f._parameters[0].is_constant(), f._parameters[1].is_constant()));
        }
    },
    {"contains", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
--- a/api/column_family.cc
+++ b/api/column_family.cc
@@ -650,7 +650,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->filter_size();
+                return s + sst->filter_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -658,7 +658,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->filter_size();
+                return s + sst->filter_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -666,7 +666,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->filter_memory_size();
+                return s + sst->filter_memory_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -674,7 +674,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->filter_memory_size();
+                return s + sst->filter_memory_size();
            });
        }, std::plus<uint64_t>());
    });
@@ -682,7 +682,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->get_summary().memory_footprint();
+                return s + sst->get_summary().memory_footprint();
            });
        }, std::plus<uint64_t>());
    });
@@ -690,7 +690,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_all_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
-                return sst->get_summary().memory_footprint();
+                return s + sst->get_summary().memory_footprint();
            });
        }, std::plus<uint64_t>());
    });
--- a/cql3/statements/select_statement.cc
+++ b/cql3/statements/select_statement.cc
@@ -59,6 +59,7 @@
 #include "db/timeout_clock.hh"
 #include "db/consistency_level_validations.hh"
 #include "database.hh"
+#include "test/lib/select_statement_utils.hh"
 #include <boost/algorithm/cxx11/any_of.hpp>

 bool is_system_keyspace(const sstring& name);
@@ -67,6 +68,8 @@ namespace cql3 {

 namespace statements {

+static constexpr int DEFAULT_INTERNAL_PAGING_SIZE = select_statement::DEFAULT_COUNT_PAGE_SIZE;
+thread_local int internal_paging_size = DEFAULT_INTERNAL_PAGING_SIZE;
 thread_local const lw_shared_ptr<const select_statement::parameters> select_statement::_default_parameters = make_lw_shared<select_statement::parameters>();

 select_statement::parameters::parameters()
@@ -333,7 +336,7 @@ select_statement::do_execute(service::storage_proxy& proxy,
    const bool aggregate = _selection->is_aggregate() || has_group_by();
    const bool nonpaged_filtering = restrictions_need_filtering && page_size <= 0;
    if (aggregate || nonpaged_filtering) {
-        page_size = DEFAULT_COUNT_PAGE_SIZE;
+        page_size = internal_paging_size;
    }

    auto key_ranges = _restrictions->get_partition_key_ranges(options);
@@ -530,13 +533,29 @@ indexed_table_select_statement::do_execute_base_query(
            if (old_paging_state && concurrency == 1) {
                auto base_pk = generate_base_key_from_index_pk<partition_key>(old_paging_state->get_partition_key(),
                        old_paging_state->get_clustering_key(), *_schema, *_view_schema);
+                auto row_ranges = command->slice.default_row_ranges();
                if (old_paging_state->get_clustering_key() && _schema->clustering_key_size() > 0) {
                    auto base_ck = generate_base_key_from_index_pk<clustering_key>(old_paging_state->get_partition_key(),
                            old_paging_state->get_clustering_key(), *_schema, *_view_schema);
-                    command->slice.set_range(*_schema, base_pk,
-                            std::vector<query::clustering_range>{query::clustering_range::make_starting_with(range_bound<clustering_key>(base_ck, false))});
+
+                    query::trim_clustering_row_ranges_to(*_schema, row_ranges, base_ck, false);
+                    command->slice.set_range(*_schema, base_pk, row_ranges);
                } else {
-                    command->slice.set_range(*_schema, base_pk, std::vector<query::clustering_range>{query::clustering_range::make_open_ended_both_sides()});
+                    // There is no clustering key in old_paging_state and/or no clustering key in 
+                    // _schema, therefore read an entire partition (whole clustering range).
+                    //
+                    // The only exception to applying no restrictions on clustering key
+                    // is a case when we have a secondary index on the first column
+                    // of clustering key. In such a case we should not read the
+                    // entire clustering range - only a range in which first column
+                    // of clustering key has the correct value. 
+                    //
+                    // This means that we should not set a open_ended_both_sides
+                    // clustering range on base_pk, instead intersect it with
+                    // _row_ranges (which contains the restrictions neccessary for the
+                    // case described above). The result of such intersection is just
+                    // _row_ranges, which we explicity set on base_pk.
+                    command->slice.set_range(*_schema, base_pk, row_ranges);
                }
            }
            concurrency *= 2;
@@ -974,12 +993,16 @@ indexed_table_select_statement::do_execute(service::storage_proxy& proxy,
    const bool aggregate = _selection->is_aggregate() || has_group_by();
    if (aggregate) {
        const bool restrictions_need_filtering = _restrictions->need_filtering();
-        return do_with(cql3::selection::result_set_builder(*_selection, now, options.get_cql_serialization_format()), std::make_unique<cql3::query_options>(cql3::query_options(options)),
+        return do_with(cql3::selection::result_set_builder(*_selection, now, options.get_cql_serialization_format(), *_group_by_cell_indices), std::make_unique<cql3::query_options>(cql3::query_options(options)),
                [this, &options, &proxy, &state, now, whole_partitions, partition_slices, restrictions_need_filtering] (cql3::selection::result_set_builder& builder, std::unique_ptr<cql3::query_options>& internal_options) {
            // page size is set to the internal count page size, regardless of the user-provided value
-            internal_options.reset(new cql3::query_options(std::move(internal_options), options.get_paging_state(), DEFAULT_COUNT_PAGE_SIZE));
+            internal_options.reset(new cql3::query_options(std::move(internal_options), options.get_paging_state(), internal_paging_size));
            return repeat([this, &builder, &options, &internal_options, &proxy, &state, now, whole_partitions, partition_slices, restrictions_need_filtering] () {
-                auto consume_results = [this, &builder, &options, &internal_options, restrictions_need_filtering] (foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd) {
+                auto consume_results = [this, &builder, &options, &internal_options, &proxy, &state, restrictions_need_filtering] (foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd, lw_shared_ptr<const service::pager::paging_state> paging_state) {
+                    if (paging_state) {
+                        paging_state = generate_view_paging_state_from_base_query_results(paging_state, results, proxy, state, options);
+                    }
+                    internal_options.reset(new cql3::query_options(std::move(internal_options), paging_state ? make_lw_shared<service::pager::paging_state>(*paging_state) : nullptr));
                    if (restrictions_need_filtering) {
                        _stats.filtered_rows_read_total += *results->row_count();
                        query::result_view::consume(*results, cmd->slice, cql3::selection::result_set_builder::visitor(builder, *_schema, *_selection,
@@ -987,24 +1010,24 @@ indexed_table_select_statement::do_execute(service::storage_proxy& proxy,
                    } else {
                        query::result_view::consume(*results, cmd->slice, cql3::selection::result_set_builder::visitor(builder, *_schema, *_selection));
                    }
+                    bool has_more_pages = paging_state && paging_state->get_remaining() > 0;
+                    return stop_iteration(!has_more_pages);
                };

                if (whole_partitions || partition_slices) {
                    return find_index_partition_ranges(proxy, state, *internal_options).then(
                            [this, now, &state, &internal_options, &proxy, consume_results = std::move(consume_results)] (dht::partition_range_vector partition_ranges, lw_shared_ptr<const service::pager::paging_state> paging_state) {
-                        bool has_more_pages = paging_state && paging_state->get_remaining() > 0;
-                        internal_options.reset(new cql3::query_options(std::move(internal_options), paging_state ? make_lw_shared<service::pager::paging_state>(*paging_state) : nullptr));
-                        return do_execute_base_query(proxy, std::move(partition_ranges), state, *internal_options, now, std::move(paging_state)).then(consume_results).then([has_more_pages] {
-                            return stop_iteration(!has_more_pages);
+                        return do_execute_base_query(proxy, std::move(partition_ranges), state, *internal_options, now, paging_state)
+                        .then([paging_state, consume_results = std::move(consume_results)](foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd) {
+                            return consume_results(std::move(results), std::move(cmd), std::move(paging_state));
                        });
                    });
                } else {
                    return find_index_clustering_rows(proxy, state, *internal_options).then(
                            [this, now, &state, &internal_options, &proxy, consume_results = std::move(consume_results)] (std::vector<primary_key> primary_keys, lw_shared_ptr<const service::pager::paging_state> paging_state) {
-                        bool has_more_pages = paging_state && paging_state->get_remaining() > 0;
-                        internal_options.reset(new cql3::query_options(std::move(internal_options), paging_state ? make_lw_shared<service::pager::paging_state>(*paging_state) : nullptr));
-                        return this->do_execute_base_query(proxy, std::move(primary_keys), state, *internal_options, now, std::move(paging_state)).then(consume_results).then([has_more_pages] {
-                            return stop_iteration(!has_more_pages);
+                        return this->do_execute_base_query(proxy, std::move(primary_keys), state, *internal_options, now, paging_state)
+                        .then([paging_state, consume_results = std::move(consume_results)](foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd) {
+                            return consume_results(std::move(results), std::move(cmd), std::move(paging_state));
                        });
                    });
                }
@@ -1661,6 +1684,16 @@ std::vector<size_t> select_statement::prepare_group_by(const schema& schema, sel

 }

+future<> set_internal_paging_size(int paging_size) {
+    return seastar::smp::invoke_on_all([paging_size] {
+        internal_paging_size = paging_size;
+    });
+}
+
+future<> reset_internal_paging_size() {
+    return set_internal_paging_size(DEFAULT_INTERNAL_PAGING_SIZE);
+}
+
 }

 namespace util {
--- a/database.cc
+++ b/database.cc
@@ -767,7 +767,7 @@ future<> database::drop_column_family(const sstring& ks_name, const sstring& cf_
    remove(*cf);
    cf->clear_views();
    auto& ks = find_keyspace(ks_name);
-    return when_all_succeed(cf->await_pending_writes(), cf->await_pending_reads()).then_unpack([this, &ks, cf, tsf = std::move(tsf), snapshot] {
+    return cf->await_pending_ops().then([this, &ks, cf, tsf = std::move(tsf), snapshot] {
        return truncate(ks, *cf, std::move(tsf), snapshot).finally([this, cf] {
            return cf->stop();
        });
--- a/database.hh
+++ b/database.hh
@@ -541,6 +541,8 @@ private:
    utils::phased_barrier _pending_reads_phaser;
    // Corresponding phaser for in-progress streams
    utils::phased_barrier _pending_streams_phaser;
+    // Corresponding phaser for in-progress flushes
+    utils::phased_barrier _pending_flushes_phaser;

    // This field cashes the last truncation time for the table.
    // The master resides in system.truncated table
@@ -986,6 +988,14 @@ public:
        return _pending_streams_phaser.advance_and_await();
    }

+    future<> await_pending_flushes() {
+        return _pending_flushes_phaser.advance_and_await();
+    }
+
+    future<> await_pending_ops() {
+        return when_all(await_pending_reads(), await_pending_writes(), await_pending_streams(), await_pending_flushes()).discard_result();
+    }
+
    void add_or_update_view(view_ptr v);
    void remove_view(view_ptr v);
    void clear_views();
--- a/db/large_data_handler.cc
+++ b/db/large_data_handler.cc
@@ -113,7 +113,7 @@ future<> cql_table_large_data_handler::record_large_cells(const sstables::sstabl
        auto ck_str = key_to_str(*clustering_key, s);
        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, format("{} {}", ck_str, column_name), extra_fields, ck_str, column_name);
    } else {
-        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, nullptr, column_name);
+        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, data_value::make_null(utf8_type), column_name);
    }
 }

@@ -125,7 +125,7 @@ future<> cql_table_large_data_handler::record_large_rows(const sstables::sstable
        std::string ck_str = key_to_str(*clustering_key, s);
        return try_record("row", sst, partition_key, int64_t(row_size), "row", ck_str, extra_fields,  ck_str);
    } else {
-        return try_record("row", sst, partition_key, int64_t(row_size), "static row", "", extra_fields, nullptr);
+        return try_record("row", sst, partition_key, int64_t(row_size), "static row", "", extra_fields, data_value::make_null(utf8_type));
    }
 }

--- a/db/large_data_handler.hh
+++ b/db/large_data_handler.hh
@@ -111,27 +111,12 @@ public:
        return make_ready_future<>();
    }

-    future<> maybe_delete_large_data_entries(const schema& s, sstring filename, uint64_t data_size) {
+    future<> maybe_delete_large_data_entries(const schema& /*s*/, sstring /*filename*/, uint64_t /*data_size*/) {
        assert(running());
-        future<> large_partitions = make_ready_future<>();
-        if (__builtin_expect(data_size > _partition_threshold_bytes, false)) {
-            large_partitions = with_sem([&s, filename, this] () mutable {
-                return delete_large_data_entries(s, std::move(filename), db::system_keyspace::LARGE_PARTITIONS);
-            });
-        }
-        future<> large_rows = make_ready_future<>();
-        if (__builtin_expect(data_size > _row_threshold_bytes, false)) {
-            large_rows = with_sem([&s, filename, this] () mutable {
-                return delete_large_data_entries(s, std::move(filename), db::system_keyspace::LARGE_ROWS);
-            });
-        }
-        future<> large_cells = make_ready_future<>();
-        if (__builtin_expect(data_size > _cell_threshold_bytes, false)) {
-            large_cells = with_sem([&s, filename, this] () mutable {
-                return delete_large_data_entries(s, std::move(filename), db::system_keyspace::LARGE_CELLS);
-            });
-        }
-        return when_all(std::move(large_partitions), std::move(large_rows), std::move(large_cells)).discard_result();
+
+        // Deletion of large data entries is disabled due to #7668
+        // They will evetually expire based on the 30 days TTL.
+        return make_ready_future<>();
    }

    const large_data_handler::stats& stats() const { return _stats; }
--- a/dist/common/scripts/scylla_util.py
+++ b/dist/common/scripts/scylla_util.py
@@ -133,6 +133,8 @@ class aws_instance:
            raise Exception("found more than one disk mounted at root'".format(root_dev_candidates))

        root_dev = root_dev_candidates[0].device
+        if root_dev == '/dev/root':
+            root_dev = run('findmnt -n -o SOURCE /', shell=True, check=True, capture_output=True, encoding='utf-8').stdout.strip()
        nvmes_present = list(filter(nvme_re.match, os.listdir("/dev")))
        return {"root": [ root_dev ], "ephemeral": [ x for x in nvmes_present if not root_dev.startswith(os.path.join("/dev/", x)) ] }

--- a/repair/row_level.cc
+++ b/repair/row_level.cc
@@ -519,7 +519,7 @@ public:
        table& t = db.local().find_column_family(_schema->id());
        auto writer = shared_from_this();
        _writer_done[node_idx] = mutation_writer::distribute_reader_and_consume_on_shards(_schema, std::move(queue_reader),
-                [&db, reason = this->_reason, estimated_partitions = this->_estimated_partitions, writer] (flat_mutation_reader reader) {
+                [&db, reason = this->_reason, estimated_partitions = this->_estimated_partitions] (flat_mutation_reader reader) {
            auto& t = db.local().find_column_family(reader.schema());
            return db::view::check_needs_view_update_path(_sys_dist_ks->local(), t, reason).then([t = t.shared_from_this(), estimated_partitions, reader = std::move(reader)] (bool use_view_update_path) mutable {
                //FIXME: for better estimations this should be transmitted from remote
--- a/row_cache.cc
+++ b/row_cache.cc
@@ -1272,7 +1272,9 @@ flat_mutation_reader cache_entry::read(row_cache& rc, read_context& reader, row_
 // Assumes reader is in the corresponding partition
 flat_mutation_reader cache_entry::do_read(row_cache& rc, read_context& reader) {
    auto snp = _pe.read(rc._tracker.region(), rc._tracker.cleaner(), _schema, &rc._tracker, reader.phase());
-    auto ckr = query::clustering_key_filter_ranges::get_ranges(*_schema, reader.slice(), _key.key());
+    auto ckr = with_linearized_managed_bytes([&] {
+        return query::clustering_key_filter_ranges::get_ranges(*_schema, reader.slice(), _key.key());
+    });
    auto r = make_cache_flat_mutation_reader(_schema, _key, std::move(ckr), rc, reader.shared_from_this(), std::move(snp));
    r.upgrade_schema(rc.schema());
    r.upgrade_schema(reader.schema());
--- a/schema.cc
+++ b/schema.cc
@@ -827,7 +827,7 @@ std::ostream& schema::describe(std::ostream& os) const {
    os << "}";
    os << "\n    AND comment = '" << comment()<< "'";
    os << "\n    AND compaction = {'class': '" <<  sstables::compaction_strategy::name(compaction_strategy()) << "'";
-    map_as_cql_param(os, compaction_strategy_options()) << "}";
+    map_as_cql_param(os, compaction_strategy_options(), false) << "}";
    os << "\n    AND compression = {";
    map_as_cql_param(os,  get_compressor_params().get_options());
    os << "}";
--- a/2
+++ b/2
--- a/service/storage_proxy.cc
+++ b/service/storage_proxy.cc
@@ -2585,14 +2585,13 @@ future<> storage_proxy::send_hint_to_endpoint(frozen_mutation_and_schema fm_a_s,
 }

 future<> storage_proxy::send_hint_to_all_replicas(frozen_mutation_and_schema fm_a_s) {
-    const auto timeout = db::timeout_clock::now() + 1h;
    if (!_features.cluster_supports_hinted_handoff_separate_connection()) {
        std::array<mutation, 1> ms{fm_a_s.fm.unfreeze(fm_a_s.s)};
-        return mutate_internal(std::move(ms), db::consistency_level::ALL, false, nullptr, empty_service_permit(), timeout);
+        return mutate_internal(std::move(ms), db::consistency_level::ALL, false, nullptr, empty_service_permit());
    }

    std::array<hint_wrapper, 1> ms{hint_wrapper { std::move(fm_a_s.fm.unfreeze(fm_a_s.s)) }};
-    return mutate_internal(std::move(ms), db::consistency_level::ALL, false, nullptr, empty_service_permit(), timeout);
+    return mutate_internal(std::move(ms), db::consistency_level::ALL, false, nullptr, empty_service_permit());
 }

 /**
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -2546,7 +2546,7 @@ future<> storage_service::rebuild(sstring source_dc) {
                    slogger.info("Streaming for rebuild successful");
                }).handle_exception([] (auto ep) {
                    // This is used exclusively through JMX, so log the full trace but only throw a simple RTE
-                    slogger.warn("Error while rebuilding node: {}", std::current_exception());
+                    slogger.warn("Error while rebuilding node: {}", ep);
                    return make_exception_future<>(std::move(ep));
                });
            });
--- a/sstables/compaction_manager.cc
+++ b/sstables/compaction_manager.cc
@@ -646,10 +646,11 @@ future<> compaction_manager::rewrite_sstables(column_family* cf, sstables::compa
    _tasks.push_back(task);

    auto sstables = std::make_unique<std::vector<sstables::shared_sstable>>(get_func(*cf));
+    auto compacting = make_lw_shared<compacting_sstable_registration>(this, *sstables);
    auto sstables_ptr = sstables.get();
    _stats.pending_tasks += sstables->size();

-    task->compaction_done = do_until([sstables_ptr] { return sstables_ptr->empty(); }, [this, task, options, sstables_ptr] () mutable {
+    task->compaction_done = do_until([sstables_ptr] { return sstables_ptr->empty(); }, [this, task, options, sstables_ptr, compacting] () mutable {

        // FIXME: lock cf here
        if (!can_proceed(task)) {
@@ -659,7 +660,7 @@ future<> compaction_manager::rewrite_sstables(column_family* cf, sstables::compa
        auto sst = sstables_ptr->back();
        sstables_ptr->pop_back();

-        return repeat([this, task, options, sst = std::move(sst)] () mutable {
+        return repeat([this, task, options, sst = std::move(sst), compacting] () mutable {
            column_family& cf = *task->compacting_cf;
            auto sstable_level = sst->get_sstable_level();
            auto run_identifier = sst->run_identifier();
@@ -667,21 +668,22 @@ future<> compaction_manager::rewrite_sstables(column_family* cf, sstables::compa
            auto descriptor = sstables::compaction_descriptor({ sst }, cf.get_sstable_set(), service::get_local_compaction_priority(),
                sstable_level, sstables::compaction_descriptor::default_max_sstable_bytes, run_identifier, options);

-            auto compacting = make_lw_shared<compacting_sstable_registration>(this, descriptor.sstables);
            // Releases reference to cleaned sstable such that respective used disk space can be freed.
            descriptor.release_exhausted = [compacting] (const std::vector<sstables::shared_sstable>& exhausted_sstables) {
                compacting->release_compacting(exhausted_sstables);
            };

-            _stats.pending_tasks--;
-            _stats.active_tasks++;
-            task->compaction_running = true;
-            compaction_backlog_tracker user_initiated(std::make_unique<user_initiated_backlog_tracker>(_compaction_controller.backlog_of_shares(200), _available_memory));
-            return do_with(std::move(user_initiated), [this, &cf, descriptor = std::move(descriptor)] (compaction_backlog_tracker& bt) mutable {
-                return with_scheduling_group(_scheduling_group, [this, &cf, descriptor = std::move(descriptor)] () mutable {
-                    return cf.run_compaction(std::move(descriptor));
+            return with_semaphore(_rewrite_sstables_sem, 1, [this, task, &cf, descriptor = std::move(descriptor)] () mutable {
+                _stats.pending_tasks--;
+                _stats.active_tasks++;
+                task->compaction_running = true;
+                compaction_backlog_tracker user_initiated(std::make_unique<user_initiated_backlog_tracker>(_compaction_controller.backlog_of_shares(200), _available_memory));
+                return do_with(std::move(user_initiated), [this, &cf, descriptor = std::move(descriptor)] (compaction_backlog_tracker& bt) mutable {
+                    return with_scheduling_group(_scheduling_group, [this, &cf, descriptor = std::move(descriptor)]() mutable {
+                        return cf.run_compaction(std::move(descriptor));
+                    });
                });
-            }).then_wrapped([this, task, compacting = std::move(compacting)] (future<> f) mutable {
+            }).then_wrapped([this, task, compacting] (future<> f) mutable {
                task->compaction_running = false;
                _stats.active_tasks--;
                if (!can_proceed(task)) {
--- a/sstables/compaction_manager.hh
+++ b/sstables/compaction_manager.hh
@@ -110,6 +110,7 @@ private:
    std::unordered_map<column_family*, rwlock> _compaction_locks;

    semaphore _custom_job_sem{1};
+    seastar::named_semaphore _rewrite_sstables_sem = {1, named_semaphore_exception_factory{"rewrite sstables"}};

    std::function<void()> compaction_submission_callback();
    // all registered column families are submitted for compaction at a constant interval.
--- a/sstables/leveled_compaction_strategy.cc
+++ b/sstables/leveled_compaction_strategy.cc
@@ -176,7 +176,7 @@ leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input

    unsigned max_filled_level = 0;

-    size_t offstrategy_threshold = std::max(schema->min_compaction_threshold(), 4);
+    size_t offstrategy_threshold = (mode == reshape_mode::strict) ? std::max(schema->min_compaction_threshold(), 4) : std::max(schema->max_compaction_threshold(), 32);
    size_t max_sstables = std::max(schema->max_compaction_threshold(), int(offstrategy_threshold));
    auto tolerance = [mode] (unsigned level) -> unsigned {
        if (mode == reshape_mode::strict) {
--- a/sstables/sstables.cc
+++ b/sstables/sstables.cc
@@ -1734,8 +1734,8 @@ void sstable::write_collection(file_writer& out, const composite& clustering_key
 void sstable::write_clustered_row(file_writer& out, const schema& schema, const clustering_row& clustered_row) {
    auto clustering_key = composite::from_clustering_element(schema, clustered_row.key());

-    maybe_write_row_marker(out, schema, clustered_row.marker(), clustering_key);
    maybe_write_row_tombstone(out, clustering_key, clustered_row);
+    maybe_write_row_marker(out, schema, clustered_row.marker(), clustering_key);

    if (schema.clustering_key_size()) {
        column_name_helper::min_max_components(schema, _collector.min_column_names(), _collector.max_column_names(),
--- a/sstables/time_window_compaction_strategy.cc
+++ b/sstables/time_window_compaction_strategy.cc
@@ -115,7 +115,7 @@ time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> i
    for (auto& pair : all_buckets.first) {
        auto ssts = std::move(pair.second);
        if (ssts.size() > offstrategy_threshold) {
-            ssts.resize(std::min(multi_window.size(), max_sstables));
+            ssts.resize(std::min(ssts.size(), max_sstables));
            compaction_descriptor desc(std::move(ssts), std::optional<sstables::sstable_set>(), iop);
            desc.options = compaction_options::make_reshape();
            return desc;
--- a/streaming/stream_session.cc
+++ b/streaming/stream_session.cc
@@ -432,7 +432,7 @@ future<prepare_message> stream_session::prepare(std::vector<stream_request> requ
            try {
                db.find_column_family(ks, cf);
            } catch (no_such_column_family&) {
-                auto err = format("[Stream #{{}}] prepare requested ks={{}} cf={{}} does not exist", ks, cf);
+                auto err = format("[Stream #{{}}] prepare requested ks={{}} cf={{}} does not exist", plan_id, ks, cf);
                sslog.warn(err.c_str());
                throw std::runtime_error(err);
            }
--- a/table.cc
+++ b/table.cc
@@ -1048,7 +1048,7 @@ table::stop() {
        return make_ready_future<>();
    }
    return _async_gate.close().then([this] {
-        return when_all(await_pending_writes(), await_pending_reads(), await_pending_streams()).discard_result().finally([this] {
+        return await_pending_ops().finally([this] {
            return when_all(_memtables->request_flush(), _streaming_memtables->request_flush()).discard_result().finally([this] {
                return _compaction_manager.remove(this).then([this] {
                    // Nest, instead of using when_all, so we don't lose any exceptions.
@@ -1226,9 +1226,20 @@ table::on_compaction_completion(sstables::compaction_completion_desc& desc) {
        }
    }

-    auto new_compacted_but_not_deleted = _sstables_compacted_but_not_deleted;
-    // rebuilding _sstables_compacted_but_not_deleted first to make the entire rebuild operation exception safe.
-    new_compacted_but_not_deleted.insert(new_compacted_but_not_deleted.end(), desc.old_sstables.begin(), desc.old_sstables.end());
+    // Precompute before so undo_compacted_but_not_deleted can be sure not to throw
+    std::unordered_set<sstables::shared_sstable> s(
+           desc.old_sstables.begin(), desc.old_sstables.end());
+    _sstables_compacted_but_not_deleted.insert(_sstables_compacted_but_not_deleted.end(), desc.old_sstables.begin(), desc.old_sstables.end());
+    // After we are done, unconditionally remove compacted sstables from _sstables_compacted_but_not_deleted,
+    // or they could stay forever in the set, resulting in deleted files remaining
+    // opened and disk space not being released until shutdown.
+    auto undo_compacted_but_not_deleted = defer([&] {
+        auto e = boost::range::remove_if(_sstables_compacted_but_not_deleted, [&] (sstables::shared_sstable sst) {
+            return s.count(sst);
+        });
+        _sstables_compacted_but_not_deleted.erase(e, _sstables_compacted_but_not_deleted.end());
+        rebuild_statistics();
+    });

    _cache.invalidate([this, &desc] () noexcept {
        // FIXME: this is not really noexcept, but we need to provide strong exception guarantees.
@@ -1239,8 +1250,6 @@ table::on_compaction_completion(sstables::compaction_completion_desc& desc) {
    // to sstables files that are about to be deleted.
    _cache.refresh_snapshot();

-    _sstables_compacted_but_not_deleted = std::move(new_compacted_but_not_deleted);
-
    rebuild_statistics();

    auto f = seastar::with_gate(_sstable_deletion_gate, [this, sstables_to_remove = desc.old_sstables] {
@@ -1256,17 +1265,6 @@ table::on_compaction_completion(sstables::compaction_completion_desc& desc) {
        // Any remaining SSTables will eventually be re-compacted and re-deleted.
        tlogger.error("Compacted SSTables deletion failed: {}. Ignored.", std::current_exception());
    }
-
-    // unconditionally remove compacted sstables from _sstables_compacted_but_not_deleted,
-    // or they could stay forever in the set, resulting in deleted files remaining
-    // opened and disk space not being released until shutdown.
-    std::unordered_set<sstables::shared_sstable> s(
-           desc.old_sstables.begin(), desc.old_sstables.end());
-    auto e = boost::range::remove_if(_sstables_compacted_but_not_deleted, [&] (sstables::shared_sstable sst) -> bool {
-        return s.count(sst);
-    });
-    _sstables_compacted_but_not_deleted.erase(e, _sstables_compacted_but_not_deleted.end());
-    rebuild_statistics();
 }

 // For replace/remove_ancestors_needed_write, note that we need to update the compaction backlog
@@ -1825,7 +1823,8 @@ future<std::unordered_map<sstring, table::snapshot_details>> table::get_snapshot
 }

 future<> table::flush() {
-    return _memtables->request_flush();
+    auto op = _pending_flushes_phaser.start();
+    return _memtables->request_flush().then([op = std::move(op)] {});
 }

 // FIXME: We can do much better than this in terms of cache management. Right
--- a/test/alternator/test_condition_expression.py
+++ b/test/alternator/test_condition_expression.py
@@ -136,7 +136,7 @@ def test_update_condition_eq_different(test_table_s):
                        ConditionExpression='a = :val2',
                        ExpressionAttributeValues={':val1': val1, ':val2': val2})

-# Also check an actual case of same time, but inequality.
+# Also check an actual case of same type, but inequality.
 def test_update_condition_eq_unequal(test_table_s):
    p = random_string()
    test_table_s.update_item(Key={'p': p},
@@ -146,6 +146,13 @@ def test_update_condition_eq_unequal(test_table_s):
            UpdateExpression='SET a = :val1',
            ConditionExpression='a = :oldval',
            ExpressionAttributeValues={':val1': 3, ':oldval': 2})
+    # If the attribute being compared doesn't exist, it's considered a failed
+    # condition, not an error:
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET a = :val1',
+            ConditionExpression='q = :oldval',
+            ExpressionAttributeValues={':val1': 3, ':oldval': 2})

 # Check that set equality is checked correctly. Unlike string equality (for
 # example), it cannot be done with just naive string comparison of the JSON
@@ -269,15 +276,44 @@ def test_update_condition_lt(test_table_s):
            UpdateExpression='SET z = :newval',
            ConditionExpression='a < :oldval',
            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
-    # Trying to compare an unsupported type - e.g., in the following test
-    # a boolean, is unfortunately caught by boto3 and cannot be tested here...
-    #test_table_s.update_item(Key={'p': p},
-    #    AttributeUpdates={'d': {'Value': False, 'Action': 'PUT'}})
-    #with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
-    #    test_table_s.update_item(Key={'p': p},
-    #        UpdateExpression='SET z = :newval',
-    #        ConditionExpression='d < :oldval',
-    #        ExpressionAttributeValues={':newval': 2, ':oldval': True})
+    # If the attribute being compared doesn't even exist, this is also
+    # considered as a false condition - not an error.
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='q < :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval < q',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    # If a comparison parameter comes from a constant specified in the query,
+    # and it has a type not supported by the comparison (e.g., a list), it's
+    # not just a failed comparison - it is considered a ValidationException
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a < :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval < a',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    # However, if when the wrong type comes from an item attribute, not the
+    # query, the comparison is simply false - not a ValidationException.
+    test_table_s.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': [1,2,3], 'Action': 'PUT'}})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='x < :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval < x',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 4

 # Test for ConditionExpression with operator "<="
@@ -341,6 +377,44 @@ def test_update_condition_le(test_table_s):
            UpdateExpression='SET z = :newval',
            ConditionExpression='a <= :oldval',
            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    # If the attribute being compared doesn't even exist, this is also
+    # considered as a false condition - not an error.
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='q <= :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval <= q',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    # If a comparison parameter comes from a constant specified in the query,
+    # and it has a type not supported by the comparison (e.g., a list), it's
+    # not just a failed comparison - it is considered a ValidationException
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a <= :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval <= a',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    # However, if when the wrong type comes from an item attribute, not the
+    # query, the comparison is simply false - not a ValidationException.
+    test_table_s.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': [1,2,3], 'Action': 'PUT'}})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='x <= :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval <= x',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 7

 # Test for ConditionExpression with operator ">"
@@ -404,6 +478,44 @@ def test_update_condition_gt(test_table_s):
            UpdateExpression='SET z = :newval',
            ConditionExpression='a > :oldval',
            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    # If the attribute being compared doesn't even exist, this is also
+    # considered as a false condition - not an error.
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='q > :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval > q',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    # If a comparison parameter comes from a constant specified in the query,
+    # and it has a type not supported by the comparison (e.g., a list), it's
+    # not just a failed comparison - it is considered a ValidationException
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a > :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval > a',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    # However, if when the wrong type comes from an item attribute, not the
+    # query, the comparison is simply false - not a ValidationException.
+    test_table_s.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': [1,2,3], 'Action': 'PUT'}})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='x > :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval > x',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 4

 # Test for ConditionExpression with operator ">="
@@ -467,6 +579,44 @@ def test_update_condition_ge(test_table_s):
            UpdateExpression='SET z = :newval',
            ConditionExpression='a >= :oldval',
            ExpressionAttributeValues={':newval': 2, ':oldval': '0'})
+    # If the attribute being compared doesn't even exist, this is also
+    # considered as a false condition - not an error.
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='q >= :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval >= q',
+            ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
+    # If a comparison parameter comes from a constant specified in the query,
+    # and it has a type not supported by the comparison (e.g., a list), it's
+    # not just a failed comparison - it is considered a ValidationException
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a >= :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval >= a',
+            ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
+    # However, if when the wrong type comes from an item attribute, not the
+    # query, the comparison is simply false - not a ValidationException.
+    test_table_s.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': [1,2,3], 'Action': 'PUT'}})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='x >= :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression=':oldval >= x',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 7

 # Test for ConditionExpression with ternary operator "BETWEEN" (checking
@@ -548,6 +698,60 @@ def test_update_condition_between(test_table_s):
            UpdateExpression='SET z = :newval',
            ConditionExpression='a BETWEEN :oldval1 AND :oldval2',
            ExpressionAttributeValues={':newval': 2, ':oldval1': '0', ':oldval2': '2'})
+    # If the attribute being compared doesn't even exist, this is also
+    # considered as a false condition - not an error.
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='q BETWEEN :oldval1 AND :oldval2',
+            ExpressionAttributeValues={':newval': 2, ':oldval1': b'dog', ':oldval2': b'zebra'})
+    # If and operand from the query, and it has a type not supported by the
+    # comparison (e.g., a list), it's not just a failed condition - it is
+    # considered a ValidationException
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN :oldval1 AND :oldval2',
+            ExpressionAttributeValues={':newval': 2, ':oldval1': [1,2], ':oldval2': [2,3]})
+    # However, if when the wrong type comes from an item attribute, not the
+    # query, the comparison is simply false - not a ValidationException.
+    test_table_s.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': [1,2,3], 'Action': 'PUT'},
+                                                             'y': {'Value': [2,3,4], 'Action': 'PUT'}})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN x and y',
+            ExpressionAttributeValues={':newval': 2})
+    # If the two operands come from the query (":val" references) then if they
+    # have different types or the wrong order, this is a ValidationException.
+    # But if one or more of the operands come from the item, this only causes
+    # a false condition - not a ValidationException.
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN :oldval1 AND :oldval2',
+            ExpressionAttributeValues={':newval': 2, ':oldval1': 2, ':oldval2': 1})
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN :oldval1 AND :oldval2',
+            ExpressionAttributeValues={':newval': 2, ':oldval1': 2, ':oldval2': 'dog'})
+    test_table_s.update_item(Key={'p': p}, AttributeUpdates={'two': {'Value': 2, 'Action': 'PUT'}})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN two AND :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 1})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN :oldval AND two',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 3})
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET z = :newval',
+            ConditionExpression='a BETWEEN two AND :oldval',
+            ExpressionAttributeValues={':newval': 2, ':oldval': 'dog'})
    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 9

 # Test for ConditionExpression with multi-operand operator "IN", checking
@@ -605,6 +809,13 @@ def test_update_condition_in(test_table_s):
            UpdateExpression='SET c = :val37',
            ConditionExpression='a IN ()',
            ExpressionAttributeValues=values)
+    # If the attribute being compared doesn't even exist, this is also
+    # considered as a false condition - not an error.
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            UpdateExpression='SET c = :val37',
+            ConditionExpression='q IN ({})'.format(','.join(values.keys())),
+            ExpressionAttributeValues=values)

 # Beyond the above operators, there are also test functions supported -
 # attribute_exists, attribute_not_exists, attribute_type, begins_with,
--- a/test/alternator/test_expected.py
+++ b/test/alternator/test_expected.py
@@ -237,6 +237,30 @@ def test_update_expected_1_le(test_table_s):
                            'AttributeValueList': [2, 3]}}
        )

+# Comparison operators like le work only on numbers, strings or bytes.
+# As noted in issue #8043, if any other type is included in *the query*,
+# the result should be a ValidationException, but if the wrong type appears
+# in the item, not the query, the result is a failed condition.
+def test_update_expected_1_le_validation(test_table_s):
+    p = random_string()
+    test_table_s.update_item(Key={'p': p},
+        AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
+                          'b': {'Value': [1,2], 'Action': 'PUT'}})
+    # Bad type (a list) in the query. Result is ValidationException.
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
+            Expected={'a': {'ComparisonOperator': 'LE',
+                            'AttributeValueList': [[1,2,3]]}}
+        )
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
+            Expected={'b': {'ComparisonOperator': 'LE',
+                            'AttributeValueList': [3]}}
+        )
+    assert not 'z' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']
+
 # Tests for Expected with ComparisonOperator = "LT":
 def test_update_expected_1_lt(test_table_s):
    p = random_string()
@@ -894,6 +918,34 @@ def test_update_expected_1_between(test_table_s):
            AttributeUpdates={'z': {'Value': 2, 'Action': 'PUT'}},
            Expected={'d': {'ComparisonOperator': 'BETWEEN', 'AttributeValueList': [set([1]), set([2])]}})

+# BETWEEN work only on numbers, strings or bytes. As noted in issue #8043,
+# if any other type is included in *the query*, the result should be a
+# ValidationException, but if the wrong type appears in the item, not the
+# query, the result is a failed condition.
+# BETWEEN should also generate ValidationException if the two ends of the
+# range are not of the same type or not in the correct order, but this
+# already is tested in the test above (test_update_expected_1_between).
+def test_update_expected_1_between_validation(test_table_s):
+    p = random_string()
+    test_table_s.update_item(Key={'p': p},
+        AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
+                          'b': {'Value': [1,2], 'Action': 'PUT'}})
+    # Bad type (a list) in the query. Result is ValidationException.
+    with pytest.raises(ClientError, match='ValidationException'):
+        test_table_s.update_item(Key={'p': p},
+            AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
+            Expected={'a': {'ComparisonOperator': 'BETWEEN',
+                            'AttributeValueList': [[1,2,3], [2,3,4]]}}
+        )
+    with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
+        test_table_s.update_item(Key={'p': p},
+            AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
+            Expected={'b': {'ComparisonOperator': 'BETWEEN',
+                            'AttributeValueList': [1,2]}}
+        )
+    assert not 'z' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']
+
+
 ##############################################################################
 # Instead of ComparisonOperator and AttributeValueList, one can specify either
 # Value or Exists:
--- a/test/alternator/test_filter_expression.py
+++ b/test/alternator/test_filter_expression.py
@@ -236,6 +236,30 @@ def test_filter_expression_ge(test_table_sn_with_data):
        expected_items = [item for item in items if item[xn] >= xv]
        assert(got_items == expected_items)

+# Comparison operators such as >= or BETWEEN only work on numbers, strings or
+# bytes. When an expression's operands come from the item and has a wrong type
+# (e.g., a list), the result is that the item is skipped - aborting the scan
+# with a ValidationException is a bug (this was issue #8043).
+def test_filter_expression_le_bad_type(test_table_sn_with_data):
+    table, p, items = test_table_sn_with_data
+    got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression='l <= :xv',
+        ExpressionAttributeValues={':p': p, ':xv': 3})
+    assert got_items == []
+    got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression=':xv <= l',
+        ExpressionAttributeValues={':p': p, ':xv': 3})
+    assert got_items == []
+def test_filter_expression_between_bad_type(test_table_sn_with_data):
+    table, p, items = test_table_sn_with_data
+    got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression='s between :xv and l',
+        ExpressionAttributeValues={':p': p, ':xv': 'cat'})
+    assert got_items == []
+    got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression='s between l and :xv',
+        ExpressionAttributeValues={':p': p, ':xv': 'cat'})
+    assert got_items == []
+    got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression='s between i and :xv',
+        ExpressionAttributeValues={':p': p, ':xv': 'cat'})
+    assert got_items == []
+
 # Test the "BETWEEN/AND" ternary operator on a numeric, string and bytes
 # attribute. These keywords are case-insensitive.
 def test_filter_expression_between(test_table_sn_with_data):
--- a/test/alternator/test_update_expression.py
+++ b/test/alternator/test_update_expression.py
@@ -675,6 +675,24 @@ def test_update_expression_add_numbers(test_table_s):
            UpdateExpression='ADD b :val1',
            ExpressionAttributeValues={':val1': 1})

+# In test_update_expression_add_numbers() above we tested ADDing a number to
+# an existing number. The following test check that ADD can be used to
+# create a *new* number, as if it was added to zero.
+def test_update_expression_add_numbers_new(test_table_s):
+    # Test that "ADD" can create a new number attribute:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hello'})
+    test_table_s.update_item(Key={'p': p},
+        UpdateExpression='ADD b :val1',
+        ExpressionAttributeValues={':val1': 7})
+    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == 7
+    # Test that "ADD" can create an entirely new item:
+    p = random_string()
+    test_table_s.update_item(Key={'p': p},
+        UpdateExpression='ADD b :val1',
+        ExpressionAttributeValues={':val1': 8})
+    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == 8
+
 # Test "ADD" operation for sets
 def test_update_expression_add_sets(test_table_s):
    p = random_string()
@@ -703,6 +721,24 @@ def test_update_expression_add_sets(test_table_s):
            UpdateExpression='ADD a :val1',
            ExpressionAttributeValues={':val1': 'hello'})

+# In test_update_expression_add_sets() above we tested ADDing elements to an
+# existing set. The following test checks that ADD can be used to create a
+# *new* set, by adding its first item.
+def test_update_expression_add_sets_new(test_table_s):
+    # Test that "ADD" can create a new set attribute:
+    p = random_string()
+    test_table_s.put_item(Item={'p': p, 'a': 'hello'})
+    test_table_s.update_item(Key={'p': p},
+        UpdateExpression='ADD b :val1',
+        ExpressionAttributeValues={':val1': set(['dog'])})
+    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == set(['dog'])
+    # Test that "ADD" can create an entirely new item:
+    p = random_string()
+    test_table_s.update_item(Key={'p': p},
+        UpdateExpression='ADD b :val1',
+        ExpressionAttributeValues={':val1': set(['cat'])})
+    assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == set(['cat'])
+
 # Test "DELETE" operation for sets
 def test_update_expression_delete_sets(test_table_s):
    p = random_string()
--- a/test/boost/cql_query_large_test.cc
+++ b/test/boost/cql_query_large_test.cc
@@ -128,12 +128,14 @@ SEASTAR_THREAD_TEST_CASE(test_large_data) {
            });
        }).get();

+        // Since deletion of large data entries has been deleted,
+        // expect the record to be present.
        assert_that(e.execute_cql("select partition_key from system.large_rows where table_name = 'tbl' allow filtering;").get0())
            .is_rows()
-            .is_empty();
+            .with_size(1);
        assert_that(e.execute_cql("select partition_key from system.large_cells where table_name = 'tbl' allow filtering;").get0())
            .is_rows()
-            .is_empty();
+            .with_size(1);

        return make_ready_future<>();
    }, cfg).get();
--- a/test/boost/secondary_index_test.cc
+++ b/test/boost/secondary_index_test.cc
@@ -29,6 +29,7 @@
 #include "types/set.hh"
 #include "test/lib/exception_utils.hh"
 #include "cql3/statements/select_statement.hh"
+#include "test/lib/select_statement_utils.hh"


 SEASTAR_TEST_CASE(test_secondary_index_regular_column_query) {
@@ -1208,6 +1209,293 @@ SEASTAR_TEST_CASE(test_indexing_paging_and_aggregation) {
    });
 }

+// Verifies that both "SELECT * [rest_of_query]" and "SELECT count(*) [rest_of_query]" 
+// return expected count of rows.
+void assert_select_count_and_select_rows_has_size(
+        cql_test_env& e, 
+        const sstring& rest_of_query, int64_t expected_count, 
+        const std::experimental::source_location& loc = std::experimental::source_location::current()) {
+    eventually([&] { 
+        require_rows(e, "SELECT count(*) " + rest_of_query, {
+            { long_type->decompose(expected_count) }
+        }, loc);
+        auto res = cquery_nofail(e, "SELECT * " + rest_of_query, nullptr, loc);
+        try {
+            assert_that(res).is_rows().with_size(expected_count);
+        } catch (const std::exception& e) {
+            BOOST_FAIL(format("is_rows/with_size failed: {}\n{}:{}: originally from here",
+                              e.what(), loc.file_name(), loc.line()));
+        }
+    });
+}
+
+static constexpr int page_scenarios_page_size = 20;
+static constexpr int page_scenarios_row_count = 2 * page_scenarios_page_size + 5;
+static constexpr int page_scenarios_initial_count = 3;
+static constexpr int page_scenarios_window_size = 4;
+static constexpr int page_scenarios_just_before_first_page = page_scenarios_page_size - page_scenarios_window_size;
+static constexpr int page_scenarios_just_after_first_page = page_scenarios_page_size + page_scenarios_window_size;    
+static constexpr int page_scenarios_just_before_second_page = 2 * page_scenarios_page_size - page_scenarios_window_size;
+static constexpr int page_scenarios_just_after_second_page = 2 * page_scenarios_page_size + page_scenarios_window_size;    
+
+static_assert(page_scenarios_initial_count < page_scenarios_row_count);
+static_assert(page_scenarios_window_size < page_scenarios_page_size);
+static_assert(page_scenarios_just_after_second_page < page_scenarios_row_count);
+
+// Executes `insert` lambda page_scenarios_row_count times. 
+// Runs `validate` lambda in a few scenarios:
+//
+// 1. After a small number of `insert`s
+// 2. In a window from just before and just after `insert`s were executed
+//    DEFAULT_COUNT_PAGE_SIZE times
+// 3. In a window from just before and just after `insert`s were executed
+//    2 * DEFAULT_COUNT_PAGE_SIZE times
+// 4. After all `insert`s
+void test_with_different_page_scenarios(
+    noncopyable_function<void (int)> insert, noncopyable_function<void (int)> validate) {
+
+    int current_row = 0;
+    for (; current_row < page_scenarios_initial_count; current_row++) {
+        insert(current_row);
+        validate(current_row + 1);
+    }
+
+    for (; current_row < page_scenarios_just_before_first_page; current_row++) {
+        insert(current_row);
+    }
+
+    for (; current_row < page_scenarios_just_after_first_page; current_row++) {
+        insert(current_row);
+        validate(current_row + 1);
+    }
+
+    for (; current_row < page_scenarios_just_before_second_page; current_row++) {
+        insert(current_row);
+    }
+
+    for (; current_row < page_scenarios_just_after_second_page; current_row++) {
+        insert(current_row);
+        validate(current_row + 1);
+    }   
+
+    for (; current_row < page_scenarios_row_count; current_row++) {
+        insert(current_row);
+    }
+
+    // No +1, because we just left for loop and current_row was incremented.
+    validate(current_row);
+}
+
+SEASTAR_TEST_CASE(test_secondary_index_on_ck_first_column_and_aggregation) {
+    // Tests aggregation on table with secondary index on first column
+    // of clustering key. This is the "partition_slices" case of 
+    // indexed_table_select_statement::do_execute.
+
+    return do_with_cql_env_thread([] (cql_test_env& e) {
+        cql3::statements::set_internal_paging_size(page_scenarios_page_size).get();
+
+        // Explicitly reproduce the first failing example in issue #7355.
+        cquery_nofail(e, "CREATE TABLE t1 (pk1 int, pk2 int, ck int, primary key((pk1, pk2), ck))");
+        cquery_nofail(e, "CREATE INDEX ON t1(ck)");
+
+        cquery_nofail(e, "INSERT INTO t1(pk1, pk2, ck) VALUES (1, 2, 3)");
+        assert_select_count_and_select_rows_has_size(e, "FROM t1 WHERE ck = 3", 1);
+
+        cquery_nofail(e, "INSERT INTO t1(pk1, pk2, ck) VALUES (1, 2, 4)");
+        cquery_nofail(e, "INSERT INTO t1(pk1, pk2, ck) VALUES (1, 2, 5)");
+        assert_select_count_and_select_rows_has_size(e, "FROM t1 WHERE ck = 3", 1);
+
+        cquery_nofail(e, "INSERT INTO t1(pk1, pk2, ck) VALUES (2, 2, 3)");
+        assert_select_count_and_select_rows_has_size(e, "FROM t1 WHERE ck = 3", 2);
+
+        cquery_nofail(e, "INSERT INTO t1(pk1, pk2, ck) VALUES (2, 1, 3)");
+        assert_select_count_and_select_rows_has_size(e, "FROM t1 WHERE ck = 3", 3);
+
+        // Test a case when there are a lot of small partitions (more than a page size).
+        cquery_nofail(e, "CREATE TABLE t2 (pk int, ck int, primary key(pk, ck))");
+        cquery_nofail(e, "CREATE INDEX ON t2(ck)");
+
+        // "Decoy" rows - they should be not counted (previously they were incorrectly counted in,
+        // see issue #7355).
+        cquery_nofail(e, "INSERT INTO t2(pk, ck) VALUES (0, -2)");
+        cquery_nofail(e, "INSERT INTO t2(pk, ck) VALUES (0, 3)");
+        cquery_nofail(e, format("INSERT INTO t2(pk, ck) VALUES ({}, 3)", page_scenarios_just_after_first_page).c_str());
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t2(pk, ck) VALUES ({}, 1)", current_row).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t2 WHERE ck = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk FROM t2 WHERE ck = 1 GROUP BY pk");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            res = cquery_nofail(e, "SELECT pk, ck FROM t2 WHERE ck = 1 GROUP BY pk, ck");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT sum(pk) FROM t2 WHERE ck = 1", {
+               { int32_type->decompose(int32_t(rows_inserted * (rows_inserted - 1) / 2)) }
+            });
+          });
+        });
+
+        // Test a case when there is a single large partition (larger than a page size).
+        cquery_nofail(e, "CREATE TABLE t3 (pk int, ck1 int, ck2 int, primary key(pk, ck1, ck2))");
+        cquery_nofail(e, "CREATE INDEX ON t3(ck1)");
+
+        // "Decoy" rows
+        cquery_nofail(e, "INSERT INTO t3(pk, ck1, ck2) VALUES (1, 0, 0)");
+        cquery_nofail(e, "INSERT INTO t3(pk, ck1, ck2) VALUES (1, 2, 0)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t3(pk, ck1, ck2) VALUES (1, 1, {})", current_row).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t3 WHERE ck1 = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk FROM t3 WHERE ck1 = 1 GROUP BY pk");
+            assert_that(res).is_rows().with_size(1);
+            res = cquery_nofail(e, "SELECT pk, ck1 FROM t3 WHERE ck1 = 1 GROUP BY pk, ck1");
+            assert_that(res).is_rows().with_size(1);
+            res = cquery_nofail(e, "SELECT pk, ck1, ck2 FROM t3 WHERE ck1 = 1 GROUP BY pk, ck1, ck2");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT avg(ck2) FROM t3 WHERE ck1 = 1", {
+                { int32_type->decompose(int32_t((rows_inserted * (rows_inserted - 1) / 2) / rows_inserted)) }
+            }); 
+          });
+        });
+
+        cql3::statements::reset_internal_paging_size().get();
+    });
+}
+
+SEASTAR_TEST_CASE(test_secondary_index_on_pk_column_and_aggregation) {
+    // Tests aggregation on table with secondary index on a column
+    // of partition key. This is the "whole_partitions" case of 
+    // indexed_table_select_statement::do_execute.
+
+    return do_with_cql_env_thread([] (cql_test_env& e) {
+        cql3::statements::set_internal_paging_size(page_scenarios_page_size).get();
+
+        // Explicitly reproduce the second failing example in issue #7355.
+        // This a case with a single large partition.
+        cquery_nofail(e, "CREATE TABLE t1 (pk1 int, pk2 int, ck int, primary key((pk1, pk2), ck))");
+        cquery_nofail(e, "CREATE INDEX ON t1(pk2)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t1(pk1, pk2, ck) VALUES (1, 1, {})", current_row).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t1 WHERE pk2 = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk1, pk2 FROM t1 WHERE pk2 = 1 GROUP BY pk1, pk2");
+            assert_that(res).is_rows().with_size(1);
+            res = cquery_nofail(e, "SELECT pk1, pk2, ck FROM t1 WHERE pk2 = 1 GROUP BY pk1, pk2, ck");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT min(pk1) FROM t1 WHERE pk2 = 1", {
+                { int32_type->decompose(1) }
+            });
+          });
+        });
+
+        // Test a case when there are a lot of small partitions (more than a page size)
+        // and there is a clustering key in base table.
+        cquery_nofail(e, "CREATE TABLE t2 (pk1 int, pk2 int, ck int, primary key((pk1, pk2), ck))");
+        cquery_nofail(e, "CREATE INDEX ON t2(pk2)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t2(pk1, pk2, ck) VALUES ({}, 1, {})", 
+                current_row, current_row % 20).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t2 WHERE pk2 = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk1, pk2 FROM t2 WHERE pk2 = 1 GROUP BY pk1, pk2");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT max(pk1) FROM t2 WHERE pk2 = 1", {
+                { int32_type->decompose(int32_t(rows_inserted - 1)) }
+            });
+          });
+        });
+
+        // Test a case when there are a lot of small partitions (more than a page size)
+        // and there is NO clustering key in base table.
+        cquery_nofail(e, "CREATE TABLE t3 (pk1 int, pk2 int, primary key((pk1, pk2)))");
+        cquery_nofail(e, "CREATE INDEX ON t3(pk2)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t3(pk1, pk2) VALUES ({}, 1)", current_row).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t3 WHERE pk2 = 1", rows_inserted);
+        });
+
+        cql3::statements::reset_internal_paging_size().get();
+    });
+}
+
+SEASTAR_TEST_CASE(test_secondary_index_on_non_pk_ck_column_and_aggregation) {
+    // Tests aggregation on table with secondary index on a column
+    // that is not a part of partition key and clustering key. 
+    // This is the non-"whole_partitions" and non-"partition_slices"
+    // case of indexed_table_select_statement::do_execute.
+
+    return do_with_cql_env_thread([] (cql_test_env& e) {
+        cql3::statements::set_internal_paging_size(page_scenarios_page_size).get();
+
+        // Test a case when there are a lot of small partitions (more than a page size)
+        // and there is a clustering key in base table.
+        cquery_nofail(e, "CREATE TABLE t (pk int, ck int, v int, primary key(pk, ck))");
+        cquery_nofail(e, "CREATE INDEX ON t(v)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t(pk, ck, v) VALUES ({}, {}, 1)", 
+                current_row, current_row % 20).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t WHERE v = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk FROM t WHERE v = 1 GROUP BY pk");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT sum(v) FROM t WHERE v = 1", {
+                { int32_type->decompose(int32_t(rows_inserted)) }
+            });
+          });
+        });
+
+        // Test a case when there are a lot of small partitions (more than a page size)
+        // and there is NO clustering key in base table.
+        cquery_nofail(e, "CREATE TABLE t2 (pk int, v int, primary key(pk))");
+        cquery_nofail(e, "CREATE INDEX ON t2(v)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t2(pk, v) VALUES ({}, 1)", current_row).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t2 WHERE v = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk FROM t2 WHERE v = 1 GROUP BY pk");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT sum(pk) FROM t2 WHERE v = 1", {
+                { int32_type->decompose(int32_t(rows_inserted * (rows_inserted - 1) / 2)) }
+            });
+          });
+        });
+
+        // Test a case when there is a single large partition (larger than a page size).
+        cquery_nofail(e, "CREATE TABLE t3 (pk int, ck int, v int, primary key(pk, ck))");
+        cquery_nofail(e, "CREATE INDEX ON t3(v)");
+
+        test_with_different_page_scenarios([&](int current_row) {
+            cquery_nofail(e, format("INSERT INTO t3(pk, ck, v) VALUES (1, {}, 1)", current_row).c_str());
+        }, [&](int rows_inserted) {
+            assert_select_count_and_select_rows_has_size(e, "FROM t3 WHERE v = 1", rows_inserted);
+          eventually([&] { 
+            auto res = cquery_nofail(e, "SELECT pk FROM t3 WHERE v = 1 GROUP BY pk");
+            assert_that(res).is_rows().with_size(1);
+            res = cquery_nofail(e, "SELECT pk, ck FROM t3 WHERE v = 1 GROUP BY pk, ck");
+            assert_that(res).is_rows().with_size(rows_inserted);
+            require_rows(e, "SELECT max(ck) FROM t3 WHERE v = 1", {
+                { int32_type->decompose(int32_t(rows_inserted - 1)) }
+            });
+          });
+        });
+
+        cql3::statements::reset_internal_paging_size().get();
+    });
+}
+
 SEASTAR_TEST_CASE(test_computed_columns) {
    return do_with_cql_env_thread([] (auto& e) {
        e.execute_cql("CREATE TABLE t (p1 int, p2 int, c1 int, c2 int, v int, PRIMARY KEY ((p1,p2),c1,c2))").get();
--- a/test/boost/types_test.cc
+++ b/test/boost/types_test.cc
@@ -100,6 +100,13 @@ BOOST_AUTO_TEST_CASE(test_byte_type_string_conversions) {
    BOOST_REQUIRE_EQUAL(byte_type->to_string(bytes()), "");
 }

+BOOST_AUTO_TEST_CASE(test_ascii_type_string_conversions) {
+    BOOST_REQUIRE(ascii_type->equal(ascii_type->from_string("ascii"), ascii_type->decompose("ascii")));
+    BOOST_REQUIRE_EQUAL(ascii_type->to_string(ascii_type->decompose("ascii")), "ascii");
+
+    test_parsing_fails(ascii_type, "¡Hola!");
+}
+
 BOOST_AUTO_TEST_CASE(test_short_type_string_conversions) {
    BOOST_REQUIRE(short_type->equal(short_type->from_string("12345"), short_type->decompose(int16_t(12345))));
    BOOST_REQUIRE_EQUAL(short_type->to_string(short_type->decompose(int16_t(12345))), "12345");
--- a/test/cql-pytest/test_large_cells_rows.py
+++ b/test/cql-pytest/test_large_cells_rows.py
@@ -0,0 +1,43 @@
+# Copyright 2020 ScyllaDB
+#
+# This file is part of Scylla.
+#
+# Scylla is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Scylla is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+
+from util import new_test_table
+
+import requests
+
+def test_create_large_static_cells_and_rows(cql, test_keyspace):
+    '''Test that `large_data_handler` successfully reports large static cells
+    and static rows and this doesn't cause a crash of Scylla server.
+
+    This is a regression test for https://github.com/scylladb/scylla/issues/6780'''
+    schema = "pk int, ck int, user_ids set<text> static, PRIMARY KEY (pk, ck)"
+    with new_test_table(cql, test_keyspace, schema) as table:
+        insert_stmt = cql.prepare(f"INSERT INTO {table} (pk, ck, user_ids) VALUES (?, ?, ?) USING TIMEOUT 5m")
+        # Default large data threshold for cells is 1 mb, for rows it is 10 mb.
+        # Take 10 mb cell to trigger large data reporting code both for
+        # static cells and static rows simultaneously.
+        large_set = {'x' * 1024 * 1024 * 10}
+        cql.execute(insert_stmt, [1, 1, large_set])
+
+        # REST API endpoint address for test scylla node
+        node_address = f'http://{cql.cluster.contact_points[0]}:10000'
+        # Execute force flush of test table to persistent storage, which is necessary to trigger
+        # `large_data_handler` execution.
+        table_without_ks = table[table.find('.') + 1:] # strip keyspace part from the table name
+        requests.post(f'{node_address}/storage_service/keyspace_flush/{test_keyspace}', params={'cf' : table_without_ks})
+        # No need to check that the Scylla server is running here, since the test will
+        # fail automatically in case Scylla crashes.
--- a/test/lib/select_statement_utils.hh
+++ b/test/lib/select_statement_utils.hh
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2020 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <seastar/core/future.hh>
+
+namespace cql3 {
+
+namespace statements {
+
+	future<> set_internal_paging_size(int internal_paging_size);
+	future<> reset_internal_paging_size();
+
+}
+
+}
--- a/types.cc
+++ b/types.cc
@@ -2350,6 +2350,14 @@ struct from_string_visitor {
    sstring_view s;
    bytes operator()(const reversed_type_impl& r) { return r.underlying_type()->from_string(s); }
    template <typename T> bytes operator()(const integer_type_impl<T>& t) { return decompose_value(parse_int(t, s)); }
+    bytes operator()(const ascii_type_impl&) {
+        auto bv = bytes_view(reinterpret_cast<const int8_t*>(s.begin()), s.size());
+        if (utils::ascii::validate(bv)) {
+            return to_bytes(bv);
+        } else {
+            throw marshal_exception(format("Value not compatible with type {}: '{}'", ascii_type_name, s));
+        }
+    }
    bytes operator()(const string_type_impl&) {
        return to_bytes(bytes_view(reinterpret_cast<const int8_t*>(s.begin()), s.size()));
    }
--- a/types.hh
+++ b/types.hh
@@ -380,6 +380,14 @@ public:
    data_value(const std::string&);
    data_value(const sstring&);

+    // Do not allow construction of a data_value from nullptr. The reason is
+    // that this is error prone, for example: it conflicts with `const char*` overload
+    // which tries to allocate a value from it and will cause UB.
+    //
+    // We want the null value semantics here instead. So the user will be forced
+    // to explicitly call `make_null()` instead.
+    data_value(std::nullptr_t) = delete;
+
    data_value(ascii_native_type);
    data_value(bool);
    data_value(int8_t);