Compare commits
109 Commits
next
...
scylla-4.3
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d737d56a08 | ||
|
|
e1c993fc13 | ||
|
|
0a6e38bf18 | ||
|
|
55bca74e90 | ||
|
|
162d466034 | ||
|
|
3e6d8c3fa7 | ||
|
|
5d3ff1e8a1 | ||
|
|
5358eaf1d6 | ||
|
|
e78b96ee49 | ||
|
|
add245a27e | ||
|
|
108f56c6ed | ||
|
|
d01ce491c0 | ||
|
|
7b2f65191c | ||
|
|
add5ffa787 | ||
|
|
32a1f2dcd9 | ||
|
|
f2072665d1 | ||
|
|
beb2bcb8bd | ||
|
|
8255b7984d | ||
|
|
28f5e0bd20 | ||
|
|
09f3bb93a3 | ||
|
|
76642eb00d | ||
|
|
a60f394d9a | ||
|
|
f2af68850c | ||
|
|
c7781f8c9e | ||
|
|
8f37924694 | ||
|
|
8588eef807 | ||
|
|
c50a2898cf | ||
|
|
44f7251809 | ||
|
|
fc070d3dc6 | ||
|
|
901784e122 | ||
|
|
2ccda04d57 | ||
|
|
e8facb1932 | ||
|
|
6f338e7656 | ||
|
|
7bb9230cfa | ||
|
|
2898e98733 | ||
|
|
2796b0050d | ||
|
|
6bc005643e | ||
|
|
d591ff5422 | ||
|
|
acb1c3eebf | ||
|
|
a04242ea62 | ||
|
|
7131c7c523 | ||
|
|
6af7cf8a39 | ||
|
|
e2d4940b6d | ||
|
|
09f9ff3f96 | ||
|
|
d671185828 | ||
|
|
8d1784805a | ||
|
|
1d4ce229eb | ||
|
|
ba9897a34e | ||
|
|
5cdc1fa662 | ||
|
|
81347037d3 | ||
|
|
49c3b812b9 | ||
|
|
6ffd23a957 | ||
|
|
a0b78956e8 | ||
|
|
74941f67e6 | ||
|
|
8c9c0807ef | ||
|
|
f316e1db54 | ||
|
|
675db3e65e | ||
|
|
5a45c2b947 | ||
|
|
b446cbad97 | ||
|
|
da2c5fd549 | ||
|
|
b44b814d94 | ||
|
|
46650adcd0 | ||
|
|
baeddc3cb5 | ||
|
|
33831c49cc | ||
|
|
47fc8389fb | ||
|
|
a7a979b794 | ||
|
|
413e03ce5e | ||
|
|
000585522e | ||
|
|
47b121130a | ||
|
|
15f55141ec | ||
|
|
69fbeaa27e | ||
|
|
a366de2a63 | ||
|
|
5bd52e4dba | ||
|
|
8a3a69bc3e | ||
|
|
50c01f7331 | ||
|
|
ecfe466e7b | ||
|
|
69e5caadb6 | ||
|
|
0ff3c0dcb5 | ||
|
|
2148a194c2 | ||
|
|
77ab7b1221 | ||
|
|
59bcd7f029 | ||
|
|
bc5008b165 | ||
|
|
dd7e3d3eab | ||
|
|
3b617164dc | ||
|
|
bb99d7ced6 | ||
|
|
9877246251 | ||
|
|
d966e2d500 | ||
|
|
81831d93d2 | ||
|
|
542a7d28a3 | ||
|
|
1310e6cb48 | ||
|
|
99a6ecb25d | ||
|
|
bc922a743f | ||
|
|
1ec4f50e3c | ||
|
|
9c7ff01c5d | ||
|
|
da29b65e04 | ||
|
|
8c3e8350d6 | ||
|
|
708588bf8b | ||
|
|
b2271800a5 | ||
|
|
209c3512e7 | ||
|
|
4896ce0fd4 | ||
|
|
9d84b1f13d | ||
|
|
a8e372bf94 | ||
|
|
17e5ac9ab1 | ||
|
|
d1d968c6e9 | ||
|
|
e186f66bfe | ||
|
|
78a39e8364 | ||
|
|
bbef05ae3c | ||
|
|
6f324cb732 | ||
|
|
239499a35a |
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -1,6 +1,6 @@
|
||||
[submodule "seastar"]
|
||||
path = seastar
|
||||
url = ../seastar
|
||||
url = ../scylla-seastar
|
||||
ignore = dirty
|
||||
[submodule "swagger-ui"]
|
||||
path = swagger-ui
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
|
||||
PRODUCT=scylla
|
||||
VERSION=666.development
|
||||
VERSION=4.3.4
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
@@ -159,23 +159,40 @@ static bool check_NE(const rjson::value* v1, const rjson::value& v2) {
|
||||
}
|
||||
|
||||
// Check if two JSON-encoded values match with the BEGINS_WITH relation
|
||||
static bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2) {
|
||||
// BEGINS_WITH requires that its single operand (v2) be a string or
|
||||
// binary - otherwise it's a validation error. However, problems with
|
||||
// the stored attribute (v1) will just return false (no match).
|
||||
if (!v2.IsObject() || v2.MemberCount() != 1) {
|
||||
throw api_error::validation(format("BEGINS_WITH operator encountered malformed AttributeValue: {}", v2));
|
||||
}
|
||||
auto it2 = v2.MemberBegin();
|
||||
if (it2->name != "S" && it2->name != "B") {
|
||||
throw api_error::validation(format("BEGINS_WITH operator requires String or Binary type in AttributeValue, got {}", it2->name));
|
||||
}
|
||||
|
||||
|
||||
bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2,
|
||||
bool v1_from_query, bool v2_from_query) {
|
||||
bool bad = false;
|
||||
if (!v1 || !v1->IsObject() || v1->MemberCount() != 1) {
|
||||
if (v1_from_query) {
|
||||
throw api_error::validation("begins_with() encountered malformed argument");
|
||||
} else {
|
||||
bad = true;
|
||||
}
|
||||
} else if (v1->MemberBegin()->name != "S" && v1->MemberBegin()->name != "B") {
|
||||
if (v1_from_query) {
|
||||
throw api_error::validation(format("begins_with supports only string or binary type, got: {}", *v1));
|
||||
} else {
|
||||
bad = true;
|
||||
}
|
||||
}
|
||||
if (!v2.IsObject() || v2.MemberCount() != 1) {
|
||||
if (v2_from_query) {
|
||||
throw api_error::validation("begins_with() encountered malformed argument");
|
||||
} else {
|
||||
bad = true;
|
||||
}
|
||||
} else if (v2.MemberBegin()->name != "S" && v2.MemberBegin()->name != "B") {
|
||||
if (v2_from_query) {
|
||||
throw api_error::validation(format("begins_with() supports only string or binary type, got: {}", v2));
|
||||
} else {
|
||||
bad = true;
|
||||
}
|
||||
}
|
||||
if (bad) {
|
||||
return false;
|
||||
}
|
||||
auto it1 = v1->MemberBegin();
|
||||
auto it2 = v2.MemberBegin();
|
||||
if (it1->name != it2->name) {
|
||||
return false;
|
||||
}
|
||||
@@ -279,24 +296,38 @@ static bool check_NOT_NULL(const rjson::value* val) {
|
||||
return val != nullptr;
|
||||
}
|
||||
|
||||
// Only types S, N or B (string, number or bytes) may be compared by the
|
||||
// various comparion operators - lt, le, gt, ge, and between.
|
||||
static bool check_comparable_type(const rjson::value& v) {
|
||||
if (!v.IsObject() || v.MemberCount() != 1) {
|
||||
return false;
|
||||
}
|
||||
const rjson::value& type = v.MemberBegin()->name;
|
||||
return type == "S" || type == "N" || type == "B";
|
||||
}
|
||||
|
||||
// Check if two JSON-encoded values match with cmp.
|
||||
template <typename Comparator>
|
||||
bool check_compare(const rjson::value* v1, const rjson::value& v2, const Comparator& cmp) {
|
||||
if (!v2.IsObject() || v2.MemberCount() != 1) {
|
||||
throw api_error::validation(
|
||||
format("{} requires a single AttributeValue of type String, Number, or Binary",
|
||||
cmp.diagnostic));
|
||||
bool check_compare(const rjson::value* v1, const rjson::value& v2, const Comparator& cmp,
|
||||
bool v1_from_query, bool v2_from_query) {
|
||||
bool bad = false;
|
||||
if (!v1 || !check_comparable_type(*v1)) {
|
||||
if (v1_from_query) {
|
||||
throw api_error::validation(format("{} allow only the types String, Number, or Binary", cmp.diagnostic));
|
||||
}
|
||||
bad = true;
|
||||
}
|
||||
const auto& kv2 = *v2.MemberBegin();
|
||||
if (kv2.name != "S" && kv2.name != "N" && kv2.name != "B") {
|
||||
throw api_error::validation(
|
||||
format("{} requires a single AttributeValue of type String, Number, or Binary",
|
||||
cmp.diagnostic));
|
||||
if (!check_comparable_type(v2)) {
|
||||
if (v2_from_query) {
|
||||
throw api_error::validation(format("{} allow only the types String, Number, or Binary", cmp.diagnostic));
|
||||
}
|
||||
bad = true;
|
||||
}
|
||||
if (!v1 || !v1->IsObject() || v1->MemberCount() != 1) {
|
||||
if (bad) {
|
||||
return false;
|
||||
}
|
||||
const auto& kv1 = *v1->MemberBegin();
|
||||
const auto& kv2 = *v2.MemberBegin();
|
||||
if (kv1.name != kv2.name) {
|
||||
return false;
|
||||
}
|
||||
@@ -310,7 +341,8 @@ bool check_compare(const rjson::value* v1, const rjson::value& v2, const Compara
|
||||
if (kv1.name == "B") {
|
||||
return cmp(base64_decode(kv1.value), base64_decode(kv2.value));
|
||||
}
|
||||
clogger.error("check_compare panic: LHS type equals RHS type, but one is in {N,S,B} while the other isn't");
|
||||
// cannot reach here, as check_comparable_type() verifies the type is one
|
||||
// of the above options.
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -341,56 +373,71 @@ struct cmp_gt {
|
||||
static constexpr const char* diagnostic = "GT operator";
|
||||
};
|
||||
|
||||
// True if v is between lb and ub, inclusive. Throws if lb > ub.
|
||||
// True if v is between lb and ub, inclusive. Throws or returns false
|
||||
// (depending on bounds_from_query parameter) if lb > ub.
|
||||
template <typename T>
|
||||
static bool check_BETWEEN(const T& v, const T& lb, const T& ub) {
|
||||
static bool check_BETWEEN(const T& v, const T& lb, const T& ub, bool bounds_from_query) {
|
||||
if (cmp_lt()(ub, lb)) {
|
||||
throw api_error::validation(
|
||||
format("BETWEEN operator requires lower_bound <= upper_bound, but {} > {}", lb, ub));
|
||||
if (bounds_from_query) {
|
||||
throw api_error::validation(
|
||||
format("BETWEEN operator requires lower_bound <= upper_bound, but {} > {}", lb, ub));
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return cmp_ge()(v, lb) && cmp_le()(v, ub);
|
||||
}
|
||||
|
||||
static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const rjson::value& ub) {
|
||||
if (!v) {
|
||||
static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const rjson::value& ub,
|
||||
bool v_from_query, bool lb_from_query, bool ub_from_query) {
|
||||
if ((v && v_from_query && !check_comparable_type(*v)) ||
|
||||
(lb_from_query && !check_comparable_type(lb)) ||
|
||||
(ub_from_query && !check_comparable_type(ub))) {
|
||||
throw api_error::validation("between allow only the types String, Number, or Binary");
|
||||
|
||||
}
|
||||
if (!v || !v->IsObject() || v->MemberCount() != 1 ||
|
||||
!lb.IsObject() || lb.MemberCount() != 1 ||
|
||||
!ub.IsObject() || ub.MemberCount() != 1) {
|
||||
return false;
|
||||
}
|
||||
if (!v->IsObject() || v->MemberCount() != 1) {
|
||||
throw api_error::validation(format("BETWEEN operator encountered malformed AttributeValue: {}", *v));
|
||||
}
|
||||
if (!lb.IsObject() || lb.MemberCount() != 1) {
|
||||
throw api_error::validation(format("BETWEEN operator encountered malformed AttributeValue: {}", lb));
|
||||
}
|
||||
if (!ub.IsObject() || ub.MemberCount() != 1) {
|
||||
throw api_error::validation(format("BETWEEN operator encountered malformed AttributeValue: {}", ub));
|
||||
}
|
||||
|
||||
const auto& kv_v = *v->MemberBegin();
|
||||
const auto& kv_lb = *lb.MemberBegin();
|
||||
const auto& kv_ub = *ub.MemberBegin();
|
||||
bool bounds_from_query = lb_from_query && ub_from_query;
|
||||
if (kv_lb.name != kv_ub.name) {
|
||||
throw api_error::validation(
|
||||
if (bounds_from_query) {
|
||||
throw api_error::validation(
|
||||
format("BETWEEN operator requires the same type for lower and upper bound; instead got {} and {}",
|
||||
kv_lb.name, kv_ub.name));
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (kv_v.name != kv_lb.name) { // Cannot compare different types, so v is NOT between lb and ub.
|
||||
return false;
|
||||
}
|
||||
if (kv_v.name == "N") {
|
||||
const char* diag = "BETWEEN operator";
|
||||
return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag));
|
||||
return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag), bounds_from_query);
|
||||
}
|
||||
if (kv_v.name == "S") {
|
||||
return check_BETWEEN(std::string_view(kv_v.value.GetString(), kv_v.value.GetStringLength()),
|
||||
std::string_view(kv_lb.value.GetString(), kv_lb.value.GetStringLength()),
|
||||
std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()));
|
||||
std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()),
|
||||
bounds_from_query);
|
||||
}
|
||||
if (kv_v.name == "B") {
|
||||
return check_BETWEEN(base64_decode(kv_v.value), base64_decode(kv_lb.value), base64_decode(kv_ub.value));
|
||||
return check_BETWEEN(base64_decode(kv_v.value), base64_decode(kv_lb.value), base64_decode(kv_ub.value), bounds_from_query);
|
||||
}
|
||||
throw api_error::validation(
|
||||
format("BETWEEN operator requires AttributeValueList elements to be of type String, Number, or Binary; instead got {}",
|
||||
if (v_from_query) {
|
||||
throw api_error::validation(
|
||||
format("BETWEEN operator requires AttributeValueList elements to be of type String, Number, or Binary; instead got {}",
|
||||
kv_lb.name));
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Verify one Expect condition on one attribute (whose content is "got")
|
||||
@@ -437,19 +484,19 @@ static bool verify_expected_one(const rjson::value& condition, const rjson::valu
|
||||
return check_NE(got, (*attribute_value_list)[0]);
|
||||
case comparison_operator_type::LT:
|
||||
verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
|
||||
return check_compare(got, (*attribute_value_list)[0], cmp_lt{});
|
||||
return check_compare(got, (*attribute_value_list)[0], cmp_lt{}, false, true);
|
||||
case comparison_operator_type::LE:
|
||||
verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
|
||||
return check_compare(got, (*attribute_value_list)[0], cmp_le{});
|
||||
return check_compare(got, (*attribute_value_list)[0], cmp_le{}, false, true);
|
||||
case comparison_operator_type::GT:
|
||||
verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
|
||||
return check_compare(got, (*attribute_value_list)[0], cmp_gt{});
|
||||
return check_compare(got, (*attribute_value_list)[0], cmp_gt{}, false, true);
|
||||
case comparison_operator_type::GE:
|
||||
verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
|
||||
return check_compare(got, (*attribute_value_list)[0], cmp_ge{});
|
||||
return check_compare(got, (*attribute_value_list)[0], cmp_ge{}, false, true);
|
||||
case comparison_operator_type::BEGINS_WITH:
|
||||
verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
|
||||
return check_BEGINS_WITH(got, (*attribute_value_list)[0]);
|
||||
return check_BEGINS_WITH(got, (*attribute_value_list)[0], false, true);
|
||||
case comparison_operator_type::IN:
|
||||
verify_operand_count(attribute_value_list, nonempty(), *comparison_operator);
|
||||
return check_IN(got, *attribute_value_list);
|
||||
@@ -461,7 +508,8 @@ static bool verify_expected_one(const rjson::value& condition, const rjson::valu
|
||||
return check_NOT_NULL(got);
|
||||
case comparison_operator_type::BETWEEN:
|
||||
verify_operand_count(attribute_value_list, exact_size(2), *comparison_operator);
|
||||
return check_BETWEEN(got, (*attribute_value_list)[0], (*attribute_value_list)[1]);
|
||||
return check_BETWEEN(got, (*attribute_value_list)[0], (*attribute_value_list)[1],
|
||||
false, true, true);
|
||||
case comparison_operator_type::CONTAINS:
|
||||
{
|
||||
verify_operand_count(attribute_value_list, exact_size(1), *comparison_operator);
|
||||
@@ -573,7 +621,8 @@ static bool calculate_primitive_condition(const parsed::primitive_condition& con
|
||||
// Shouldn't happen unless we have a bug in the parser
|
||||
throw std::logic_error(format("Wrong number of values {} in BETWEEN primitive_condition", cond._values.size()));
|
||||
}
|
||||
return check_BETWEEN(&calculated_values[0], calculated_values[1], calculated_values[2]);
|
||||
return check_BETWEEN(&calculated_values[0], calculated_values[1], calculated_values[2],
|
||||
cond._values[0].is_constant(), cond._values[1].is_constant(), cond._values[2].is_constant());
|
||||
case parsed::primitive_condition::type::IN:
|
||||
return check_IN(calculated_values);
|
||||
case parsed::primitive_condition::type::VALUE:
|
||||
@@ -604,13 +653,17 @@ static bool calculate_primitive_condition(const parsed::primitive_condition& con
|
||||
case parsed::primitive_condition::type::NE:
|
||||
return check_NE(&calculated_values[0], calculated_values[1]);
|
||||
case parsed::primitive_condition::type::GT:
|
||||
return check_compare(&calculated_values[0], calculated_values[1], cmp_gt{});
|
||||
return check_compare(&calculated_values[0], calculated_values[1], cmp_gt{},
|
||||
cond._values[0].is_constant(), cond._values[1].is_constant());
|
||||
case parsed::primitive_condition::type::GE:
|
||||
return check_compare(&calculated_values[0], calculated_values[1], cmp_ge{});
|
||||
return check_compare(&calculated_values[0], calculated_values[1], cmp_ge{},
|
||||
cond._values[0].is_constant(), cond._values[1].is_constant());
|
||||
case parsed::primitive_condition::type::LT:
|
||||
return check_compare(&calculated_values[0], calculated_values[1], cmp_lt{});
|
||||
return check_compare(&calculated_values[0], calculated_values[1], cmp_lt{},
|
||||
cond._values[0].is_constant(), cond._values[1].is_constant());
|
||||
case parsed::primitive_condition::type::LE:
|
||||
return check_compare(&calculated_values[0], calculated_values[1], cmp_le{});
|
||||
return check_compare(&calculated_values[0], calculated_values[1], cmp_le{},
|
||||
cond._values[0].is_constant(), cond._values[1].is_constant());
|
||||
default:
|
||||
// Shouldn't happen unless we have a bug in the parser
|
||||
throw std::logic_error(format("Unknown type {} in primitive_condition object", (int)(cond._op)));
|
||||
|
||||
@@ -52,6 +52,7 @@ bool verify_expected(const rjson::value& req, const rjson::value* previous_item)
|
||||
bool verify_condition(const rjson::value& condition, bool require_all, const rjson::value* previous_item);
|
||||
|
||||
bool check_CONTAINS(const rjson::value* v1, const rjson::value& v2);
|
||||
bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2, bool v1_from_query, bool v2_from_query);
|
||||
|
||||
bool verify_condition_expression(
|
||||
const parsed::condition_expression& condition_expression,
|
||||
|
||||
@@ -1881,7 +1881,8 @@ static std::string get_item_type_string(const rjson::value& v) {
|
||||
|
||||
// calculate_attrs_to_get() takes either AttributesToGet or
|
||||
// ProjectionExpression parameters (having both is *not* allowed),
|
||||
// and returns the list of cells we need to read.
|
||||
// and returns the list of cells we need to read, or an empty set when
|
||||
// *all* attributes are to be returned.
|
||||
// In our current implementation, only top-level attributes are stored
|
||||
// as cells, and nested documents are stored serialized as JSON.
|
||||
// So this function currently returns only the the top-level attributes
|
||||
@@ -2243,19 +2244,30 @@ update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::t
|
||||
rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item.get());
|
||||
rjson::value v2 = calculate_value(addition, calculate_value_caller::UpdateExpression, previous_item.get());
|
||||
rjson::value result;
|
||||
std::string v1_type = get_item_type_string(v1);
|
||||
if (v1_type == "N") {
|
||||
if (get_item_type_string(v2) != "N") {
|
||||
throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
|
||||
// An ADD can be used to create a new attribute (when
|
||||
// v1.IsNull()) or to add to a pre-existing attribute:
|
||||
if (v1.IsNull()) {
|
||||
std::string v2_type = get_item_type_string(v2);
|
||||
if (v2_type == "N" || v2_type == "SS" || v2_type == "NS" || v2_type == "BS") {
|
||||
result = v2;
|
||||
} else {
|
||||
throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v2));
|
||||
}
|
||||
result = number_add(v1, v2);
|
||||
} else if (v1_type == "SS" || v1_type == "NS" || v1_type == "BS") {
|
||||
if (get_item_type_string(v2) != v1_type) {
|
||||
throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
|
||||
}
|
||||
result = set_sum(v1, v2);
|
||||
} else {
|
||||
throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v1));
|
||||
std::string v1_type = get_item_type_string(v1);
|
||||
if (v1_type == "N") {
|
||||
if (get_item_type_string(v2) != "N") {
|
||||
throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
|
||||
}
|
||||
result = number_add(v1, v2);
|
||||
} else if (v1_type == "SS" || v1_type == "NS" || v1_type == "BS") {
|
||||
if (get_item_type_string(v2) != v1_type) {
|
||||
throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
|
||||
}
|
||||
result = set_sum(v1, v2);
|
||||
} else {
|
||||
throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v1));
|
||||
}
|
||||
}
|
||||
do_update(to_bytes(column_name), result);
|
||||
},
|
||||
@@ -2571,6 +2583,10 @@ public:
|
||||
std::unordered_set<std::string>& used_attribute_values);
|
||||
bool check(const rjson::value& item) const;
|
||||
bool filters_on(std::string_view attribute) const;
|
||||
// for_filters_on() runs the given function on the attributes that the
|
||||
// filter works on. It may run for the same attribute more than once if
|
||||
// used more than once in the filter.
|
||||
void for_filters_on(const noncopyable_function<void(std::string_view)>& func) const;
|
||||
operator bool() const { return bool(_imp); }
|
||||
};
|
||||
|
||||
@@ -2651,10 +2667,26 @@ bool filter::filters_on(std::string_view attribute) const {
|
||||
}, *_imp);
|
||||
}
|
||||
|
||||
void filter::for_filters_on(const noncopyable_function<void(std::string_view)>& func) const {
|
||||
if (_imp) {
|
||||
std::visit(overloaded_functor {
|
||||
[&] (const conditions_filter& f) -> void {
|
||||
for (auto it = f.conditions.MemberBegin(); it != f.conditions.MemberEnd(); ++it) {
|
||||
func(rjson::to_string_view(it->name));
|
||||
}
|
||||
},
|
||||
[&] (const expression_filter& f) -> void {
|
||||
return for_condition_expression_on(f.expression, func);
|
||||
}
|
||||
}, *_imp);
|
||||
}
|
||||
}
|
||||
|
||||
class describe_items_visitor {
|
||||
typedef std::vector<const column_definition*> columns_t;
|
||||
const columns_t& _columns;
|
||||
const std::unordered_set<std::string>& _attrs_to_get;
|
||||
std::unordered_set<std::string> _extra_filter_attrs;
|
||||
const filter& _filter;
|
||||
typename columns_t::const_iterator _column_it;
|
||||
rjson::value _item;
|
||||
@@ -2670,7 +2702,20 @@ public:
|
||||
, _item(rjson::empty_object())
|
||||
, _items(rjson::empty_array())
|
||||
, _scanned_count(0)
|
||||
{ }
|
||||
{
|
||||
// _filter.check() may need additional attributes not listed in
|
||||
// _attrs_to_get (i.e., not requested as part of the output).
|
||||
// We list those in _extra_filter_attrs. We will include them in
|
||||
// the JSON but take them out before finally returning the JSON.
|
||||
if (!_attrs_to_get.empty()) {
|
||||
_filter.for_filters_on([&] (std::string_view attr) {
|
||||
std::string a(attr); // no heterogenous maps searches :-(
|
||||
if (!_attrs_to_get.contains(a)) {
|
||||
_extra_filter_attrs.emplace(std::move(a));
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void start_row() {
|
||||
_column_it = _columns.begin();
|
||||
@@ -2684,7 +2729,7 @@ public:
|
||||
result_bytes_view->with_linearized([this] (bytes_view bv) {
|
||||
std::string column_name = (*_column_it)->name_as_text();
|
||||
if (column_name != executor::ATTRS_COLUMN_NAME) {
|
||||
if (_attrs_to_get.empty() || _attrs_to_get.contains(column_name)) {
|
||||
if (_attrs_to_get.empty() || _attrs_to_get.contains(column_name) || _extra_filter_attrs.contains(column_name)) {
|
||||
if (!_item.HasMember(column_name.c_str())) {
|
||||
rjson::set_with_string_name(_item, column_name, rjson::empty_object());
|
||||
}
|
||||
@@ -2696,7 +2741,7 @@ public:
|
||||
auto keys_and_values = value_cast<map_type_impl::native_type>(deserialized);
|
||||
for (auto entry : keys_and_values) {
|
||||
std::string attr_name = value_cast<sstring>(entry.first);
|
||||
if (_attrs_to_get.empty() || _attrs_to_get.contains(attr_name)) {
|
||||
if (_attrs_to_get.empty() || _attrs_to_get.contains(attr_name) || _extra_filter_attrs.contains(attr_name)) {
|
||||
bytes value = value_cast<bytes>(entry.second);
|
||||
rjson::set_with_string_name(_item, attr_name, deserialize_item(value));
|
||||
}
|
||||
@@ -2708,6 +2753,11 @@ public:
|
||||
|
||||
void end_row() {
|
||||
if (_filter.check(_item)) {
|
||||
// Remove the extra attributes _extra_filter_attrs which we had
|
||||
// to add just for the filter, and not requested to be returned:
|
||||
for (const auto& attr : _extra_filter_attrs) {
|
||||
rjson::remove_member(_item, attr);
|
||||
}
|
||||
rjson::push_back(_items, std::move(_item));
|
||||
}
|
||||
_item = rjson::empty_object();
|
||||
@@ -2742,7 +2792,7 @@ static rjson::value encode_paging_state(const schema& schema, const service::pag
|
||||
for (const column_definition& cdef : schema.partition_key_columns()) {
|
||||
rjson::set_with_string_name(last_evaluated_key, std::string_view(cdef.name_as_text()), rjson::empty_object());
|
||||
rjson::value& key_entry = last_evaluated_key[cdef.name_as_text()];
|
||||
rjson::set_with_string_name(key_entry, type_to_string(cdef.type), rjson::parse(to_json_string(*cdef.type, *exploded_pk_it)));
|
||||
rjson::set_with_string_name(key_entry, type_to_string(cdef.type), json_key_column_value(*exploded_pk_it, cdef));
|
||||
++exploded_pk_it;
|
||||
}
|
||||
auto ck = paging_state.get_clustering_key();
|
||||
@@ -2752,7 +2802,7 @@ static rjson::value encode_paging_state(const schema& schema, const service::pag
|
||||
for (const column_definition& cdef : schema.clustering_key_columns()) {
|
||||
rjson::set_with_string_name(last_evaluated_key, std::string_view(cdef.name_as_text()), rjson::empty_object());
|
||||
rjson::value& key_entry = last_evaluated_key[cdef.name_as_text()];
|
||||
rjson::set_with_string_name(key_entry, type_to_string(cdef.type), rjson::parse(to_json_string(*cdef.type, *exploded_ck_it)));
|
||||
rjson::set_with_string_name(key_entry, type_to_string(cdef.type), json_key_column_value(*exploded_ck_it, cdef));
|
||||
++exploded_ck_it;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -348,6 +348,39 @@ bool condition_expression_on(const parsed::condition_expression& ce, std::string
|
||||
}, ce._expression);
|
||||
}
|
||||
|
||||
// for_condition_expression_on() runs a given function over all the attributes
|
||||
// mentioned in the expression. If the same attribute is mentioned more than
|
||||
// once, the function will be called more than once for the same attribute.
|
||||
|
||||
static void for_value_on(const parsed::value& v, const noncopyable_function<void(std::string_view)>& func) {
|
||||
std::visit(overloaded_functor {
|
||||
[&] (const parsed::constant& c) { },
|
||||
[&] (const parsed::value::function_call& f) {
|
||||
for (const parsed::value& value : f._parameters) {
|
||||
for_value_on(value, func);
|
||||
}
|
||||
},
|
||||
[&] (const parsed::path& p) {
|
||||
func(p.root());
|
||||
}
|
||||
}, v._value);
|
||||
}
|
||||
|
||||
void for_condition_expression_on(const parsed::condition_expression& ce, const noncopyable_function<void(std::string_view)>& func) {
|
||||
std::visit(overloaded_functor {
|
||||
[&] (const parsed::primitive_condition& cond) {
|
||||
for (const parsed::value& value : cond._values) {
|
||||
for_value_on(value, func);
|
||||
}
|
||||
},
|
||||
[&] (const parsed::condition_expression::condition_list& list) {
|
||||
for (const parsed::condition_expression& cond : list.conditions) {
|
||||
for_condition_expression_on(cond, func);
|
||||
}
|
||||
}
|
||||
}, ce._expression);
|
||||
}
|
||||
|
||||
// The following calculate_value() functions calculate, or evaluate, a parsed
|
||||
// expression. The parsed expression is assumed to have been "resolved", with
|
||||
// the matching resolve_* function.
|
||||
@@ -570,52 +603,8 @@ std::unordered_map<std::string_view, function_handler_type*> function_handlers {
|
||||
}
|
||||
rjson::value v1 = calculate_value(f._parameters[0], caller, previous_item);
|
||||
rjson::value v2 = calculate_value(f._parameters[1], caller, previous_item);
|
||||
// TODO: There's duplication here with check_BEGINS_WITH().
|
||||
// But unfortunately, the two functions differ a bit.
|
||||
|
||||
// If one of v1 or v2 is malformed or has an unsupported type
|
||||
// (not B or S), what we do depends on whether it came from
|
||||
// the user's query (is_constant()), or the item. Unsupported
|
||||
// values in the query result in an error, but if they are in
|
||||
// the item, we silently return false (no match).
|
||||
bool bad = false;
|
||||
if (!v1.IsObject() || v1.MemberCount() != 1) {
|
||||
bad = true;
|
||||
if (f._parameters[0].is_constant()) {
|
||||
throw api_error::validation(format("{}: begins_with() encountered malformed AttributeValue: {}", caller, v1));
|
||||
}
|
||||
} else if (v1.MemberBegin()->name != "S" && v1.MemberBegin()->name != "B") {
|
||||
bad = true;
|
||||
if (f._parameters[0].is_constant()) {
|
||||
throw api_error::validation(format("{}: begins_with() supports only string or binary in AttributeValue: {}", caller, v1));
|
||||
}
|
||||
}
|
||||
if (!v2.IsObject() || v2.MemberCount() != 1) {
|
||||
bad = true;
|
||||
if (f._parameters[1].is_constant()) {
|
||||
throw api_error::validation(format("{}: begins_with() encountered malformed AttributeValue: {}", caller, v2));
|
||||
}
|
||||
} else if (v2.MemberBegin()->name != "S" && v2.MemberBegin()->name != "B") {
|
||||
bad = true;
|
||||
if (f._parameters[1].is_constant()) {
|
||||
throw api_error::validation(format("{}: begins_with() supports only string or binary in AttributeValue: {}", caller, v2));
|
||||
}
|
||||
}
|
||||
bool ret = false;
|
||||
if (!bad) {
|
||||
auto it1 = v1.MemberBegin();
|
||||
auto it2 = v2.MemberBegin();
|
||||
if (it1->name == it2->name) {
|
||||
if (it2->name == "S") {
|
||||
std::string_view val1 = rjson::to_string_view(it1->value);
|
||||
std::string_view val2 = rjson::to_string_view(it2->value);
|
||||
ret = val1.starts_with(val2);
|
||||
} else /* it2->name == "B" */ {
|
||||
ret = base64_begins_with(rjson::to_string_view(it1->value), rjson::to_string_view(it2->value));
|
||||
}
|
||||
}
|
||||
}
|
||||
return to_bool_json(ret);
|
||||
return to_bool_json(check_BEGINS_WITH(v1.IsNull() ? nullptr : &v1, v2,
|
||||
f._parameters[0].is_constant(), f._parameters[1].is_constant()));
|
||||
}
|
||||
},
|
||||
{"contains", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
|
||||
|
||||
@@ -27,6 +27,8 @@
|
||||
#include <unordered_set>
|
||||
#include <string_view>
|
||||
|
||||
#include <seastar/util/noncopyable_function.hh>
|
||||
|
||||
#include "expressions_types.hh"
|
||||
#include "utils/rjson.hh"
|
||||
|
||||
@@ -59,6 +61,11 @@ void validate_value(const rjson::value& v, const char* caller);
|
||||
|
||||
bool condition_expression_on(const parsed::condition_expression& ce, std::string_view attribute);
|
||||
|
||||
// for_condition_expression_on() runs the given function on the attributes
|
||||
// that the expression uses. It may run for the same attribute more than once
|
||||
// if the same attribute is used more than once in the expression.
|
||||
void for_condition_expression_on(const parsed::condition_expression& ce, const noncopyable_function<void(std::string_view)>& func);
|
||||
|
||||
// calculate_value() behaves slightly different (especially, different
|
||||
// functions supported) when used in different types of expressions, as
|
||||
// enumerated in this enum:
|
||||
|
||||
@@ -849,6 +849,7 @@ future<executor::request_return_type> executor::get_records(client_state& client
|
||||
|
||||
static const bytes timestamp_column_name = cdc::log_meta_column_name_bytes("time");
|
||||
static const bytes op_column_name = cdc::log_meta_column_name_bytes("operation");
|
||||
static const bytes eor_column_name = cdc::log_meta_column_name_bytes("end_of_batch");
|
||||
|
||||
auto key_names = boost::copy_range<std::unordered_set<std::string>>(
|
||||
boost::range::join(std::move(base->partition_key_columns()), std::move(base->clustering_key_columns()))
|
||||
@@ -872,7 +873,7 @@ future<executor::request_return_type> executor::get_records(client_state& client
|
||||
std::transform(cks.begin(), cks.end(), std::back_inserter(columns), [](auto& c) { return &c; });
|
||||
|
||||
auto regular_columns = boost::copy_range<query::column_id_vector>(schema->regular_columns()
|
||||
| boost::adaptors::filtered([](const column_definition& cdef) { return cdef.name() == op_column_name || !cdc::is_cdc_metacolumn_name(cdef.name_as_text()); })
|
||||
| boost::adaptors::filtered([](const column_definition& cdef) { return cdef.name() == op_column_name || cdef.name() == eor_column_name || !cdc::is_cdc_metacolumn_name(cdef.name_as_text()); })
|
||||
| boost::adaptors::transformed([&] (const column_definition& cdef) { columns.emplace_back(&cdef); return cdef.id; })
|
||||
);
|
||||
|
||||
@@ -905,6 +906,11 @@ future<executor::request_return_type> executor::get_records(client_state& client
|
||||
return cdef->name->name() == timestamp_column_name;
|
||||
})
|
||||
);
|
||||
auto eor_index = std::distance(metadata.get_names().begin(),
|
||||
std::find_if(metadata.get_names().begin(), metadata.get_names().end(), [](const lw_shared_ptr<cql3::column_specification>& cdef) {
|
||||
return cdef->name->name() == eor_column_name;
|
||||
})
|
||||
);
|
||||
|
||||
std::optional<utils::UUID> timestamp;
|
||||
auto dynamodb = rjson::empty_object();
|
||||
@@ -930,15 +936,7 @@ future<executor::request_return_type> executor::get_records(client_state& client
|
||||
for (auto& row : result_set->rows()) {
|
||||
auto op = static_cast<cdc::operation>(value_cast<op_utype>(data_type_for<op_utype>()->deserialize(*row[op_index])));
|
||||
auto ts = value_cast<utils::UUID>(data_type_for<utils::UUID>()->deserialize(*row[ts_index]));
|
||||
|
||||
if (timestamp && timestamp != ts) {
|
||||
maybe_add_record();
|
||||
if (limit == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
timestamp = ts;
|
||||
auto eor = row[eor_index].has_value() ? value_cast<bool>(boolean_type->deserialize(*row[eor_index])) : false;
|
||||
|
||||
if (!dynamodb.HasMember("Keys")) {
|
||||
auto keys = rjson::empty_object();
|
||||
@@ -991,9 +989,13 @@ future<executor::request_return_type> executor::get_records(client_state& client
|
||||
rjson::set(record, "eventName", "REMOVE");
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (limit > 0 && timestamp) {
|
||||
maybe_add_record();
|
||||
if (eor) {
|
||||
maybe_add_record();
|
||||
timestamp = ts;
|
||||
if (limit == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto ret = rjson::empty_object();
|
||||
@@ -1047,6 +1049,9 @@ void executor::add_stream_options(const rjson::value& stream_specification, sche
|
||||
if (!db.features().cluster_supports_cdc()) {
|
||||
throw api_error::validation("StreamSpecification: streams (CDC) feature not enabled in cluster.");
|
||||
}
|
||||
if (!db.features().cluster_supports_alternator_streams()) {
|
||||
throw api_error::validation("StreamSpecification: alternator streams feature not enabled in cluster.");
|
||||
}
|
||||
|
||||
cdc::options opts;
|
||||
opts.enabled(true);
|
||||
|
||||
@@ -656,7 +656,7 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
cf::get_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
|
||||
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return sst->filter_size();
|
||||
return s + sst->filter_size();
|
||||
});
|
||||
}, std::plus<uint64_t>());
|
||||
});
|
||||
@@ -664,7 +664,7 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
cf::get_all_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
|
||||
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return sst->filter_size();
|
||||
return s + sst->filter_size();
|
||||
});
|
||||
}, std::plus<uint64_t>());
|
||||
});
|
||||
@@ -672,7 +672,7 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
cf::get_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
|
||||
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return sst->filter_memory_size();
|
||||
return s + sst->filter_memory_size();
|
||||
});
|
||||
}, std::plus<uint64_t>());
|
||||
});
|
||||
@@ -680,7 +680,7 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
cf::get_all_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
|
||||
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return sst->filter_memory_size();
|
||||
return s + sst->filter_memory_size();
|
||||
});
|
||||
}, std::plus<uint64_t>());
|
||||
});
|
||||
@@ -688,7 +688,7 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
cf::get_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
|
||||
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return sst->get_summary().memory_footprint();
|
||||
return s + sst->get_summary().memory_footprint();
|
||||
});
|
||||
}, std::plus<uint64_t>());
|
||||
});
|
||||
@@ -696,7 +696,7 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
cf::get_all_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
|
||||
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return sst->get_summary().memory_footprint();
|
||||
return s + sst->get_summary().memory_footprint();
|
||||
});
|
||||
}, std::plus<uint64_t>());
|
||||
});
|
||||
|
||||
@@ -20,10 +20,16 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
|
||||
#include <seastar/core/sstring.hh>
|
||||
|
||||
#include "bytes.hh"
|
||||
#include "serializer.hh"
|
||||
#include "db/extensions.hh"
|
||||
#include "cdc/cdc_options.hh"
|
||||
#include "schema.hh"
|
||||
#include "serializer_impl.hh"
|
||||
|
||||
namespace cdc {
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
#include <random>
|
||||
#include <unordered_set>
|
||||
#include <seastar/core/sleep.hh>
|
||||
#include <algorithm>
|
||||
|
||||
#include "keys.hh"
|
||||
#include "schema_builder.hh"
|
||||
@@ -174,10 +175,29 @@ bool topology_description::operator==(const topology_description& o) const {
|
||||
return _entries == o._entries;
|
||||
}
|
||||
|
||||
const std::vector<token_range_description>& topology_description::entries() const {
|
||||
const std::vector<token_range_description>& topology_description::entries() const& {
|
||||
return _entries;
|
||||
}
|
||||
|
||||
std::vector<token_range_description>&& topology_description::entries() && {
|
||||
return std::move(_entries);
|
||||
}
|
||||
|
||||
static std::vector<stream_id> create_stream_ids(
|
||||
size_t index, dht::token start, dht::token end, size_t shard_count, uint8_t ignore_msb) {
|
||||
std::vector<stream_id> result;
|
||||
result.reserve(shard_count);
|
||||
dht::sharder sharder(shard_count, ignore_msb);
|
||||
for (size_t shard_idx = 0; shard_idx < shard_count; ++shard_idx) {
|
||||
auto t = dht::find_first_token_for_shard(sharder, start, end, shard_idx);
|
||||
// compose the id from token and the "index" of the range end owning vnode
|
||||
// as defined by token sort order. Basically grouping within this
|
||||
// shard set.
|
||||
result.emplace_back(stream_id(t, index));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
class topology_description_generator final {
|
||||
const db::config& _cfg;
|
||||
const std::unordered_set<dht::token>& _bootstrap_tokens;
|
||||
@@ -217,18 +237,9 @@ class topology_description_generator final {
|
||||
desc.token_range_end = end;
|
||||
|
||||
auto [shard_count, ignore_msb] = get_sharding_info(end);
|
||||
desc.streams.reserve(shard_count);
|
||||
desc.streams = create_stream_ids(index, start, end, shard_count, ignore_msb);
|
||||
desc.sharding_ignore_msb = ignore_msb;
|
||||
|
||||
dht::sharder sharder(shard_count, ignore_msb);
|
||||
for (size_t shard_idx = 0; shard_idx < shard_count; ++shard_idx) {
|
||||
auto t = dht::find_first_token_for_shard(sharder, start, end, shard_idx);
|
||||
// compose the id from token and the "index" of the range end owning vnode
|
||||
// as defined by token sort order. Basically grouping within this
|
||||
// shard set.
|
||||
desc.streams.emplace_back(stream_id(t, index));
|
||||
}
|
||||
|
||||
return desc;
|
||||
}
|
||||
public:
|
||||
@@ -294,6 +305,38 @@ future<db_clock::time_point> get_local_streams_timestamp() {
|
||||
});
|
||||
}
|
||||
|
||||
// non-static for testing
|
||||
size_t limit_of_streams_in_topology_description() {
|
||||
// Each stream takes 16B and we don't want to exceed 4MB so we can have
|
||||
// at most 262144 streams but not less than 1 per vnode.
|
||||
return 4 * 1024 * 1024 / 16;
|
||||
}
|
||||
|
||||
// non-static for testing
|
||||
topology_description limit_number_of_streams_if_needed(topology_description&& desc) {
|
||||
int64_t streams_count = 0;
|
||||
for (auto& tr_desc : desc.entries()) {
|
||||
streams_count += tr_desc.streams.size();
|
||||
}
|
||||
|
||||
size_t limit = std::max(limit_of_streams_in_topology_description(), desc.entries().size());
|
||||
if (limit >= size_t(streams_count)) {
|
||||
return std::move(desc);
|
||||
}
|
||||
size_t streams_per_vnode_limit = limit / desc.entries().size();
|
||||
auto entries = std::move(desc).entries();
|
||||
auto start = entries.back().token_range_end;
|
||||
for (size_t idx = 0; idx < entries.size(); ++idx) {
|
||||
auto end = entries[idx].token_range_end;
|
||||
if (entries[idx].streams.size() > streams_per_vnode_limit) {
|
||||
entries[idx].streams =
|
||||
create_stream_ids(idx, start, end, streams_per_vnode_limit, entries[idx].sharding_ignore_msb);
|
||||
}
|
||||
start = end;
|
||||
}
|
||||
return topology_description(std::move(entries));
|
||||
}
|
||||
|
||||
// Run inside seastar::async context.
|
||||
db_clock::time_point make_new_cdc_generation(
|
||||
const db::config& cfg,
|
||||
@@ -306,6 +349,18 @@ db_clock::time_point make_new_cdc_generation(
|
||||
using namespace std::chrono;
|
||||
auto gen = topology_description_generator(cfg, bootstrap_tokens, tm, g).generate();
|
||||
|
||||
// If the cluster is large we may end up with a generation that contains
|
||||
// large number of streams. This is problematic because we store the
|
||||
// generation in a single row. For a generation with large number of rows
|
||||
// this will lead to a row that can be as big as 32MB. This is much more
|
||||
// than the limit imposed by commitlog_segment_size_in_mb. If the size of
|
||||
// the row that describes a new generation grows above
|
||||
// commitlog_segment_size_in_mb, the write will fail and the new node won't
|
||||
// be able to join. To avoid such problem we make sure that such row is
|
||||
// always smaller than 4MB. We do that by removing some CDC streams from
|
||||
// each vnode if the total number of streams is too large.
|
||||
gen = limit_number_of_streams_if_needed(std::move(gen));
|
||||
|
||||
// Begin the race.
|
||||
auto ts = db_clock::now() + (
|
||||
(for_testing || ring_delay == milliseconds(0)) ? milliseconds(0) : (
|
||||
|
||||
@@ -68,6 +68,7 @@ public:
|
||||
|
||||
stream_id() = default;
|
||||
stream_id(bytes);
|
||||
stream_id(dht::token, size_t);
|
||||
|
||||
bool is_set() const;
|
||||
bool operator==(const stream_id&) const;
|
||||
@@ -81,9 +82,6 @@ public:
|
||||
|
||||
partition_key to_partition_key(const schema& log_schema) const;
|
||||
static int64_t token_from_bytes(bytes_view);
|
||||
private:
|
||||
friend class topology_description_generator;
|
||||
stream_id(dht::token, size_t);
|
||||
};
|
||||
|
||||
/* Describes a mapping of tokens to CDC streams in a token range.
|
||||
@@ -116,7 +114,8 @@ public:
|
||||
topology_description(std::vector<token_range_description> entries);
|
||||
bool operator==(const topology_description&) const;
|
||||
|
||||
const std::vector<token_range_description>& entries() const;
|
||||
const std::vector<token_range_description>& entries() const&;
|
||||
std::vector<token_range_description>&& entries() &&;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -154,7 +153,7 @@ bool should_propose_first_generation(const gms::inet_address& me, const gms::gos
|
||||
future<db_clock::time_point> get_local_streams_timestamp();
|
||||
|
||||
/* Generate a new set of CDC streams and insert it into the distributed cdc_generation_descriptions table.
|
||||
* Returns the timestamp of this new generation.
|
||||
* Returns the timestamp of this new generation
|
||||
*
|
||||
* Should be called when starting the node for the first time (i.e., joining the ring).
|
||||
*
|
||||
|
||||
32
cdc/log.cc
32
cdc/log.cc
@@ -519,6 +519,7 @@ static schema_ptr create_log_schema(const schema& s, std::optional<utils::UUID>
|
||||
b.with_column(log_meta_column_name_bytes("batch_seq_no"), int32_type, column_kind::clustering_key);
|
||||
b.with_column(log_meta_column_name_bytes("operation"), data_type_for<operation_native_type>());
|
||||
b.with_column(log_meta_column_name_bytes("ttl"), long_type);
|
||||
b.with_column(log_meta_column_name_bytes("end_of_batch"), boolean_type);
|
||||
b.set_caching_options(caching_options::get_disabled_caching_options());
|
||||
auto add_columns = [&] (const schema::const_iterator_range_type& columns, bool is_data_col = false) {
|
||||
for (const auto& column : columns) {
|
||||
@@ -880,14 +881,26 @@ public:
|
||||
return _base_schema;
|
||||
}
|
||||
|
||||
clustering_key create_ck(int batch) const {
|
||||
return clustering_key::from_exploded(_log_schema, { _tuuid, int32_type->decompose(batch) });
|
||||
}
|
||||
|
||||
// Creates a new clustering row in the mutation, assigning it the next `cdc$batch_seq_no`.
|
||||
// The numbering of batch sequence numbers starts from 0.
|
||||
clustering_key allocate_new_log_row() {
|
||||
auto log_ck = clustering_key::from_exploded(_log_schema, { _tuuid, int32_type->decompose(_batch_no++) });
|
||||
auto log_ck = create_ck(_batch_no++);
|
||||
set_key_columns(log_ck, _base_schema.partition_key_columns(), _base_pk);
|
||||
return log_ck;
|
||||
}
|
||||
|
||||
bool has_rows() const {
|
||||
return _batch_no != 0;
|
||||
}
|
||||
|
||||
clustering_key last_row_key() const {
|
||||
return create_ck(_batch_no - 1);
|
||||
}
|
||||
|
||||
// A common pattern is to allocate a row and then immediately set its `cdc$operation` column.
|
||||
clustering_key allocate_new_log_row(operation op) {
|
||||
auto log_ck = allocate_new_log_row();
|
||||
@@ -944,6 +957,11 @@ public:
|
||||
_log_mut.set_cell(log_ck, log_cdef, atomic_cell::make_live(*log_cdef.type, _ts, deleted_elements, _ttl));
|
||||
}
|
||||
|
||||
void end_record() {
|
||||
if (has_rows()) {
|
||||
_log_mut.set_cell(last_row_key(), log_meta_column_name_bytes("end_of_batch"), data_value(true), _ts, _ttl);
|
||||
}
|
||||
}
|
||||
private:
|
||||
void set_key_columns(const clustering_key& log_ck, schema::const_iterator_range_type columns, const std::vector<bytes>& key) {
|
||||
size_t pos = 0;
|
||||
@@ -1272,6 +1290,13 @@ struct process_change_visitor {
|
||||
_clustering_row_states, _generate_delta_values);
|
||||
visit_row_cells(v);
|
||||
|
||||
if (_enable_updating_state) {
|
||||
// #7716: if there are no regular columns, our visitor would not have visited any cells,
|
||||
// hence it would not have created a row_state for this row. In effect, postimage wouldn't be produced.
|
||||
// Ensure that the row state exists.
|
||||
_clustering_row_states.try_emplace(ckey);
|
||||
}
|
||||
|
||||
_builder.set_operation(log_ck, v._cdc_op);
|
||||
_builder.set_ttl(log_ck, v._ttl_column);
|
||||
}
|
||||
@@ -1519,6 +1544,11 @@ public:
|
||||
cdc::inspect_mutation(m, v);
|
||||
}
|
||||
|
||||
void end_record() override {
|
||||
assert(_builder);
|
||||
_builder->end_record();
|
||||
}
|
||||
|
||||
// Takes and returns generated cdc log mutations and associated statistics about parts touched during transformer's lifetime.
|
||||
// The `transformer` object on which this method was called on should not be used anymore.
|
||||
std::tuple<std::vector<mutation>, stats::part_type_set> finish() && {
|
||||
|
||||
@@ -51,7 +51,8 @@ static cdc::stream_id get_stream(
|
||||
return entry.streams[shard_id];
|
||||
}
|
||||
|
||||
static cdc::stream_id get_stream(
|
||||
// non-static for testing
|
||||
cdc::stream_id get_stream(
|
||||
const std::vector<cdc::token_range_description>& entries,
|
||||
dht::token tok) {
|
||||
if (entries.empty()) {
|
||||
|
||||
@@ -684,6 +684,8 @@ void process_changes_with_splitting(const mutation& base_mutation, change_proces
|
||||
processor.produce_postimage(&ck);
|
||||
}
|
||||
}
|
||||
|
||||
processor.end_record();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -731,6 +733,8 @@ void process_changes_without_splitting(const mutation& base_mutation, change_pro
|
||||
processor.produce_postimage(&cr.key());
|
||||
}
|
||||
}
|
||||
|
||||
processor.end_record();
|
||||
}
|
||||
|
||||
} // namespace cdc
|
||||
|
||||
@@ -77,6 +77,10 @@ public:
|
||||
// both columns have different timestamp or TTL set.
|
||||
// m - the small mutation to be converted into CDC log rows.
|
||||
virtual void process_change(const mutation& m) = 0;
|
||||
|
||||
// Tells processor we have reached end of record - last part
|
||||
// of a given timestamp batch
|
||||
virtual void end_record() = 0;
|
||||
};
|
||||
|
||||
bool should_split(const mutation& base_mutation);
|
||||
|
||||
@@ -275,6 +275,7 @@ modes = {
|
||||
|
||||
scylla_tests = set([
|
||||
'test/boost/UUID_test',
|
||||
'test/boost/cdc_generation_test',
|
||||
'test/boost/aggregate_fcts_test',
|
||||
'test/boost/allocation_strategy_test',
|
||||
'test/boost/alternator_base64_test',
|
||||
@@ -854,6 +855,7 @@ scylla_core = (['database.cc',
|
||||
'utils/error_injection.cc',
|
||||
'mutation_writer/timestamp_based_splitting_writer.cc',
|
||||
'mutation_writer/shard_based_splitting_writer.cc',
|
||||
'mutation_writer/feed_writers.cc',
|
||||
'lua.cc',
|
||||
] + [Antlr3Grammar('cql3/Cql.g')] + [Thrift('interface/cassandra.thrift', 'Cassandra')]
|
||||
)
|
||||
|
||||
@@ -192,9 +192,12 @@ public:
|
||||
|
||||
virtual ::shared_ptr<terminal> bind(const query_options& options) override {
|
||||
auto bytes = bind_and_get(options);
|
||||
if (!bytes) {
|
||||
if (bytes.is_null()) {
|
||||
return ::shared_ptr<terminal>{};
|
||||
}
|
||||
if (bytes.is_unset_value()) {
|
||||
return UNSET_VALUE;
|
||||
}
|
||||
return ::make_shared<constants::value>(std::move(cql3::raw_value::make_value(to_bytes(*bytes))));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -27,7 +27,9 @@
|
||||
#include <fmt/ostream.h>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "cql3/constants.hh"
|
||||
#include "cql3/lists.hh"
|
||||
#include "cql3/statements/request_validations.hh"
|
||||
#include "cql3/tuples.hh"
|
||||
#include "index/secondary_index_manager.hh"
|
||||
#include "types/list.hh"
|
||||
@@ -417,6 +419,8 @@ bool is_one_of(const column_value& col, term& rhs, const column_value_eval_bag&
|
||||
} else if (auto mkr = dynamic_cast<lists::marker*>(&rhs)) {
|
||||
// This is `a IN ?`. RHS elements are values representable as bytes_opt.
|
||||
const auto values = static_pointer_cast<lists::value>(mkr->bind(bag.options));
|
||||
statements::request_validations::check_not_null(
|
||||
values, "Invalid null value for column %s", col.col->name_as_text());
|
||||
return boost::algorithm::any_of(values->get_elements(), [&] (const bytes_opt& b) {
|
||||
return equal(b, col, bag);
|
||||
});
|
||||
@@ -568,7 +572,8 @@ const auto deref = boost::adaptors::transformed([] (const bytes_opt& b) { return
|
||||
|
||||
/// Returns possible values from t, which must be RHS of IN.
|
||||
value_list get_IN_values(
|
||||
const ::shared_ptr<term>& t, const query_options& options, const serialized_compare& comparator) {
|
||||
const ::shared_ptr<term>& t, const query_options& options, const serialized_compare& comparator,
|
||||
sstring_view column_name) {
|
||||
// RHS is prepared differently for different CQL cases. Cast it dynamically to discern which case this is.
|
||||
if (auto dv = dynamic_pointer_cast<lists::delayed_value>(t)) {
|
||||
// Case `a IN (1,2,3)`.
|
||||
@@ -578,8 +583,12 @@ value_list get_IN_values(
|
||||
return to_sorted_vector(std::move(result_range), comparator);
|
||||
} else if (auto mkr = dynamic_pointer_cast<lists::marker>(t)) {
|
||||
// Case `a IN ?`. Collect all list-element values.
|
||||
const auto val = static_pointer_cast<lists::value>(mkr->bind(options));
|
||||
return to_sorted_vector(val->get_elements() | non_null | deref, comparator);
|
||||
const auto val = mkr->bind(options);
|
||||
if (val == constants::UNSET_VALUE) {
|
||||
throw exceptions::invalid_request_exception(format("Invalid unset value for column {}", column_name));
|
||||
}
|
||||
statements::request_validations::check_not_null(val, "Invalid null value for IN tuple");
|
||||
return to_sorted_vector(static_pointer_cast<lists::value>(val)->get_elements() | non_null | deref, comparator);
|
||||
}
|
||||
throw std::logic_error(format("get_IN_values(single column) on invalid term {}", *t));
|
||||
}
|
||||
@@ -686,7 +695,7 @@ value_set possible_lhs_values(const column_definition* cdef, const expression& e
|
||||
return oper.op == oper_t::EQ ? value_set(value_list{*val})
|
||||
: to_range(oper.op, *val);
|
||||
} else if (oper.op == oper_t::IN) {
|
||||
return get_IN_values(oper.rhs, options, type->as_less_comparator());
|
||||
return get_IN_values(oper.rhs, options, type->as_less_comparator(), cdef->name_as_text());
|
||||
}
|
||||
throw std::logic_error(format("possible_lhs_values: unhandled operator {}", oper));
|
||||
},
|
||||
|
||||
@@ -305,6 +305,12 @@ maps::setter_by_key::execute(mutation& m, const clustering_key_prefix& prefix, c
|
||||
assert(column.type->is_multi_cell()); // "Attempted to set a value for a single key on a frozen map"m
|
||||
auto key = _k->bind_and_get(params._options);
|
||||
auto value = _t->bind_and_get(params._options);
|
||||
if (value.is_unset_value()) {
|
||||
return;
|
||||
}
|
||||
if (key.is_unset_value() || value.is_unset_value()) {
|
||||
throw invalid_request_exception("Invalid unset map key");
|
||||
}
|
||||
if (!key) {
|
||||
throw invalid_request_exception("Invalid null map key");
|
||||
}
|
||||
|
||||
@@ -315,7 +315,7 @@ sets::discarder::execute(mutation& m, const clustering_key_prefix& row_key, cons
|
||||
assert(column.type->is_multi_cell()); // "Attempted to remove items from a frozen set";
|
||||
|
||||
auto&& value = _t->bind(params._options);
|
||||
if (!value) {
|
||||
if (!value || value == constants::UNSET_VALUE) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -59,6 +59,7 @@
|
||||
#include "db/timeout_clock.hh"
|
||||
#include "db/consistency_level_validations.hh"
|
||||
#include "database.hh"
|
||||
#include "test/lib/select_statement_utils.hh"
|
||||
#include <boost/algorithm/cxx11/any_of.hpp>
|
||||
|
||||
bool is_system_keyspace(const sstring& name);
|
||||
@@ -67,6 +68,8 @@ namespace cql3 {
|
||||
|
||||
namespace statements {
|
||||
|
||||
static constexpr int DEFAULT_INTERNAL_PAGING_SIZE = select_statement::DEFAULT_COUNT_PAGE_SIZE;
|
||||
thread_local int internal_paging_size = DEFAULT_INTERNAL_PAGING_SIZE;
|
||||
thread_local const lw_shared_ptr<const select_statement::parameters> select_statement::_default_parameters = make_lw_shared<select_statement::parameters>();
|
||||
|
||||
select_statement::parameters::parameters()
|
||||
@@ -338,7 +341,7 @@ select_statement::do_execute(service::storage_proxy& proxy,
|
||||
const bool aggregate = _selection->is_aggregate() || has_group_by();
|
||||
const bool nonpaged_filtering = restrictions_need_filtering && page_size <= 0;
|
||||
if (aggregate || nonpaged_filtering) {
|
||||
page_size = DEFAULT_COUNT_PAGE_SIZE;
|
||||
page_size = internal_paging_size;
|
||||
}
|
||||
|
||||
auto key_ranges = _restrictions->get_partition_key_ranges(options);
|
||||
@@ -453,7 +456,7 @@ generate_base_key_from_index_pk(const partition_key& index_pk, const std::option
|
||||
if (!view_col) {
|
||||
throw std::runtime_error(format("Base key column not found in the view: {}", base_col.name_as_text()));
|
||||
}
|
||||
if (base_col.type != view_col->type) {
|
||||
if (base_col.type->without_reversed() != *view_col->type) {
|
||||
throw std::runtime_error(format("Mismatched types for base and view columns {}: {} and {}",
|
||||
base_col.name_as_text(), base_col.type->cql3_type_name(), view_col->type->cql3_type_name()));
|
||||
}
|
||||
@@ -541,13 +544,29 @@ indexed_table_select_statement::do_execute_base_query(
|
||||
if (old_paging_state && concurrency == 1) {
|
||||
auto base_pk = generate_base_key_from_index_pk<partition_key>(old_paging_state->get_partition_key(),
|
||||
old_paging_state->get_clustering_key(), *_schema, *_view_schema);
|
||||
auto row_ranges = command->slice.default_row_ranges();
|
||||
if (old_paging_state->get_clustering_key() && _schema->clustering_key_size() > 0) {
|
||||
auto base_ck = generate_base_key_from_index_pk<clustering_key>(old_paging_state->get_partition_key(),
|
||||
old_paging_state->get_clustering_key(), *_schema, *_view_schema);
|
||||
command->slice.set_range(*_schema, base_pk,
|
||||
std::vector<query::clustering_range>{query::clustering_range::make_starting_with(range_bound<clustering_key>(base_ck, false))});
|
||||
|
||||
query::trim_clustering_row_ranges_to(*_schema, row_ranges, base_ck, false);
|
||||
command->slice.set_range(*_schema, base_pk, row_ranges);
|
||||
} else {
|
||||
command->slice.set_range(*_schema, base_pk, std::vector<query::clustering_range>{query::clustering_range::make_open_ended_both_sides()});
|
||||
// There is no clustering key in old_paging_state and/or no clustering key in
|
||||
// _schema, therefore read an entire partition (whole clustering range).
|
||||
//
|
||||
// The only exception to applying no restrictions on clustering key
|
||||
// is a case when we have a secondary index on the first column
|
||||
// of clustering key. In such a case we should not read the
|
||||
// entire clustering range - only a range in which first column
|
||||
// of clustering key has the correct value.
|
||||
//
|
||||
// This means that we should not set a open_ended_both_sides
|
||||
// clustering range on base_pk, instead intersect it with
|
||||
// _row_ranges (which contains the restrictions neccessary for the
|
||||
// case described above). The result of such intersection is just
|
||||
// _row_ranges, which we explicity set on base_pk.
|
||||
command->slice.set_range(*_schema, base_pk, row_ranges);
|
||||
}
|
||||
}
|
||||
concurrency *= 2;
|
||||
@@ -992,12 +1011,16 @@ indexed_table_select_statement::do_execute(service::storage_proxy& proxy,
|
||||
const bool aggregate = _selection->is_aggregate() || has_group_by();
|
||||
if (aggregate) {
|
||||
const bool restrictions_need_filtering = _restrictions->need_filtering();
|
||||
return do_with(cql3::selection::result_set_builder(*_selection, now, options.get_cql_serialization_format()), std::make_unique<cql3::query_options>(cql3::query_options(options)),
|
||||
return do_with(cql3::selection::result_set_builder(*_selection, now, options.get_cql_serialization_format(), *_group_by_cell_indices), std::make_unique<cql3::query_options>(cql3::query_options(options)),
|
||||
[this, &options, &proxy, &state, now, whole_partitions, partition_slices, restrictions_need_filtering] (cql3::selection::result_set_builder& builder, std::unique_ptr<cql3::query_options>& internal_options) {
|
||||
// page size is set to the internal count page size, regardless of the user-provided value
|
||||
internal_options.reset(new cql3::query_options(std::move(internal_options), options.get_paging_state(), DEFAULT_COUNT_PAGE_SIZE));
|
||||
internal_options.reset(new cql3::query_options(std::move(internal_options), options.get_paging_state(), internal_paging_size));
|
||||
return repeat([this, &builder, &options, &internal_options, &proxy, &state, now, whole_partitions, partition_slices, restrictions_need_filtering] () {
|
||||
auto consume_results = [this, &builder, &options, &internal_options, restrictions_need_filtering] (foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd) {
|
||||
auto consume_results = [this, &builder, &options, &internal_options, &proxy, &state, restrictions_need_filtering] (foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd, lw_shared_ptr<const service::pager::paging_state> paging_state) {
|
||||
if (paging_state) {
|
||||
paging_state = generate_view_paging_state_from_base_query_results(paging_state, results, proxy, state, options);
|
||||
}
|
||||
internal_options.reset(new cql3::query_options(std::move(internal_options), paging_state ? make_lw_shared<service::pager::paging_state>(*paging_state) : nullptr));
|
||||
if (restrictions_need_filtering) {
|
||||
_stats.filtered_rows_read_total += *results->row_count();
|
||||
query::result_view::consume(*results, cmd->slice, cql3::selection::result_set_builder::visitor(builder, *_schema, *_selection,
|
||||
@@ -1005,24 +1028,24 @@ indexed_table_select_statement::do_execute(service::storage_proxy& proxy,
|
||||
} else {
|
||||
query::result_view::consume(*results, cmd->slice, cql3::selection::result_set_builder::visitor(builder, *_schema, *_selection));
|
||||
}
|
||||
bool has_more_pages = paging_state && paging_state->get_remaining() > 0;
|
||||
return stop_iteration(!has_more_pages);
|
||||
};
|
||||
|
||||
if (whole_partitions || partition_slices) {
|
||||
return find_index_partition_ranges(proxy, state, *internal_options).then_unpack(
|
||||
[this, now, &state, &internal_options, &proxy, consume_results = std::move(consume_results)] (dht::partition_range_vector partition_ranges, lw_shared_ptr<const service::pager::paging_state> paging_state) {
|
||||
bool has_more_pages = paging_state && paging_state->get_remaining() > 0;
|
||||
internal_options.reset(new cql3::query_options(std::move(internal_options), paging_state ? make_lw_shared<service::pager::paging_state>(*paging_state) : nullptr));
|
||||
return do_execute_base_query(proxy, std::move(partition_ranges), state, *internal_options, now, std::move(paging_state)).then_unpack(consume_results).then([has_more_pages] {
|
||||
return stop_iteration(!has_more_pages);
|
||||
return do_execute_base_query(proxy, std::move(partition_ranges), state, *internal_options, now, paging_state)
|
||||
.then_unpack([paging_state, consume_results = std::move(consume_results)](foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd) {
|
||||
return consume_results(std::move(results), std::move(cmd), std::move(paging_state));
|
||||
});
|
||||
});
|
||||
} else {
|
||||
return find_index_clustering_rows(proxy, state, *internal_options).then_unpack(
|
||||
[this, now, &state, &internal_options, &proxy, consume_results = std::move(consume_results)] (std::vector<primary_key> primary_keys, lw_shared_ptr<const service::pager::paging_state> paging_state) {
|
||||
bool has_more_pages = paging_state && paging_state->get_remaining() > 0;
|
||||
internal_options.reset(new cql3::query_options(std::move(internal_options), paging_state ? make_lw_shared<service::pager::paging_state>(*paging_state) : nullptr));
|
||||
return this->do_execute_base_query(proxy, std::move(primary_keys), state, *internal_options, now, std::move(paging_state)).then_unpack(consume_results).then([has_more_pages] {
|
||||
return stop_iteration(!has_more_pages);
|
||||
return this->do_execute_base_query(proxy, std::move(primary_keys), state, *internal_options, now, paging_state)
|
||||
.then_unpack([paging_state, consume_results = std::move(consume_results)](foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd) {
|
||||
return consume_results(std::move(results), std::move(cmd), std::move(paging_state));
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -1687,6 +1710,16 @@ std::vector<size_t> select_statement::prepare_group_by(const schema& schema, sel
|
||||
|
||||
}
|
||||
|
||||
future<> set_internal_paging_size(int paging_size) {
|
||||
return seastar::smp::invoke_on_all([paging_size] {
|
||||
internal_paging_size = paging_size;
|
||||
});
|
||||
}
|
||||
|
||||
future<> reset_internal_paging_size() {
|
||||
return set_internal_paging_size(DEFAULT_INTERNAL_PAGING_SIZE);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace util {
|
||||
|
||||
47
database.cc
47
database.cc
@@ -572,9 +572,6 @@ void database::set_format_by_config() {
|
||||
}
|
||||
|
||||
database::~database() {
|
||||
_read_concurrency_sem.clear_inactive_reads();
|
||||
_streaming_concurrency_sem.clear_inactive_reads();
|
||||
_system_read_concurrency_sem.clear_inactive_reads();
|
||||
}
|
||||
|
||||
void database::update_version(const utils::UUID& version) {
|
||||
@@ -662,11 +659,22 @@ future<> database::parse_system_tables(distributed<service::storage_proxy>& prox
|
||||
});
|
||||
}).then([&proxy, &mm, this] {
|
||||
return do_parse_schema_tables(proxy, db::schema_tables::VIEWS, [this, &proxy, &mm] (schema_result_value_type &v) {
|
||||
return create_views_from_schema_partition(proxy, v.second).then([this, &mm] (std::vector<view_ptr> views) {
|
||||
return parallel_for_each(views.begin(), views.end(), [this, &mm] (auto&& v) {
|
||||
return this->add_column_family_and_make_directory(v).then([this, &mm, v] {
|
||||
return maybe_update_legacy_secondary_index_mv_schema(mm.local(), *this, v);
|
||||
});
|
||||
return create_views_from_schema_partition(proxy, v.second).then([this, &mm, &proxy] (std::vector<view_ptr> views) {
|
||||
return parallel_for_each(views.begin(), views.end(), [this, &mm, &proxy] (auto&& v) {
|
||||
// TODO: Remove once computed columns are guaranteed to be featured in the whole cluster.
|
||||
// we fix here the schema in place in oreder to avoid races (write commands comming from other coordinators).
|
||||
view_ptr fixed_v = maybe_fix_legacy_secondary_index_mv_schema(*this, v, nullptr, preserve_version::yes);
|
||||
view_ptr v_to_add = fixed_v ? fixed_v : v;
|
||||
future<> f = this->add_column_family_and_make_directory(v_to_add);
|
||||
if (bool(fixed_v)) {
|
||||
v_to_add = fixed_v;
|
||||
auto&& keyspace = find_keyspace(v->ks_name()).metadata();
|
||||
auto mutations = db::schema_tables::make_update_view_mutations(keyspace, view_ptr(v), fixed_v, api::new_timestamp(), true);
|
||||
f = f.then([this, &proxy, mutations = std::move(mutations)] {
|
||||
return db::schema_tables::merge_schema(proxy, _feat, std::move(mutations));
|
||||
});
|
||||
}
|
||||
return f;
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -801,7 +809,7 @@ future<> database::drop_column_family(const sstring& ks_name, const sstring& cf_
|
||||
remove(*cf);
|
||||
cf->clear_views();
|
||||
auto& ks = find_keyspace(ks_name);
|
||||
return when_all_succeed(cf->await_pending_writes(), cf->await_pending_reads()).then_unpack([this, &ks, cf, tsf = std::move(tsf), snapshot] {
|
||||
return cf->await_pending_ops().then([this, &ks, cf, tsf = std::move(tsf), snapshot] {
|
||||
return truncate(ks, *cf, std::move(tsf), snapshot).finally([this, cf] {
|
||||
return cf->stop();
|
||||
});
|
||||
@@ -1808,6 +1816,13 @@ future<>
|
||||
database::stop() {
|
||||
assert(!_large_data_handler->running());
|
||||
|
||||
// Inactive reads might hold on to sstables, blocking the
|
||||
// `sstables_manager::close()` calls below. No one will come back for these
|
||||
// reads at this point so clear them before proceeding with the shutdown.
|
||||
_read_concurrency_sem.clear_inactive_reads();
|
||||
_streaming_concurrency_sem.clear_inactive_reads();
|
||||
_system_read_concurrency_sem.clear_inactive_reads();
|
||||
|
||||
// try to ensure that CL has done disk flushing
|
||||
future<> maybe_shutdown_commitlog = _commitlog != nullptr ? _commitlog->shutdown() : make_ready_future<>();
|
||||
return maybe_shutdown_commitlog.then([this] {
|
||||
@@ -1859,26 +1874,28 @@ future<> database::truncate(const keyspace& ks, column_family& cf, timestamp_fun
|
||||
|
||||
return cf.run_with_compaction_disabled([this, &cf, should_flush, auto_snapshot, tsf = std::move(tsf), low_mark]() mutable {
|
||||
future<> f = make_ready_future<>();
|
||||
if (should_flush) {
|
||||
bool did_flush = false;
|
||||
if (should_flush && cf.can_flush()) {
|
||||
// TODO:
|
||||
// this is not really a guarantee at all that we've actually
|
||||
// gotten all things to disk. Again, need queue-ish or something.
|
||||
f = cf.flush();
|
||||
did_flush = true;
|
||||
} else {
|
||||
f = cf.clear();
|
||||
}
|
||||
return f.then([this, &cf, auto_snapshot, tsf = std::move(tsf), low_mark, should_flush] {
|
||||
return f.then([this, &cf, auto_snapshot, tsf = std::move(tsf), low_mark, should_flush, did_flush] {
|
||||
dblog.debug("Discarding sstable data for truncated CF + indexes");
|
||||
// TODO: notify truncation
|
||||
|
||||
return tsf().then([this, &cf, auto_snapshot, low_mark, should_flush](db_clock::time_point truncated_at) {
|
||||
return tsf().then([this, &cf, auto_snapshot, low_mark, should_flush, did_flush](db_clock::time_point truncated_at) {
|
||||
future<> f = make_ready_future<>();
|
||||
if (auto_snapshot) {
|
||||
auto name = format("{:d}-{}", truncated_at.time_since_epoch().count(), cf.schema()->cf_name());
|
||||
f = cf.snapshot(*this, name);
|
||||
}
|
||||
return f.then([this, &cf, truncated_at, low_mark, should_flush] {
|
||||
return cf.discard_sstables(truncated_at).then([this, &cf, truncated_at, low_mark, should_flush](db::replay_position rp) {
|
||||
return f.then([this, &cf, truncated_at, low_mark, should_flush, did_flush] {
|
||||
return cf.discard_sstables(truncated_at).then([this, &cf, truncated_at, low_mark, should_flush, did_flush](db::replay_position rp) {
|
||||
// TODO: indexes.
|
||||
// Note: since discard_sstables was changed to only count tables owned by this shard,
|
||||
// we can get zero rp back. Changed assert, and ensure we save at least low_mark.
|
||||
@@ -1886,7 +1903,7 @@ future<> database::truncate(const keyspace& ks, column_family& cf, timestamp_fun
|
||||
// We nowadays do not flush tables with sstables but autosnapshot=false. This means
|
||||
// the low_mark assertion does not hold, because we maybe/probably never got around to
|
||||
// creating the sstables that would create them.
|
||||
assert(!should_flush || low_mark <= rp || rp == db::replay_position());
|
||||
assert(!did_flush || low_mark <= rp || rp == db::replay_position());
|
||||
rp = std::max(low_mark, rp);
|
||||
return truncate_views(cf, truncated_at, should_flush).then([&cf, truncated_at, rp] {
|
||||
// save_truncation_record() may actually fail after we cached the truncation time
|
||||
|
||||
16
database.hh
16
database.hh
@@ -224,6 +224,10 @@ public:
|
||||
return bool(_seal_immediate_fn);
|
||||
}
|
||||
|
||||
bool can_flush() const {
|
||||
return may_flush() && !empty();
|
||||
}
|
||||
|
||||
bool empty() const {
|
||||
for (auto& m : _memtables) {
|
||||
if (!m->empty()) {
|
||||
@@ -505,6 +509,8 @@ private:
|
||||
utils::phased_barrier _pending_reads_phaser;
|
||||
// Corresponding phaser for in-progress streams
|
||||
utils::phased_barrier _pending_streams_phaser;
|
||||
// Corresponding phaser for in-progress flushes
|
||||
utils::phased_barrier _pending_flushes_phaser;
|
||||
|
||||
// This field cashes the last truncation time for the table.
|
||||
// The master resides in system.truncated table
|
||||
@@ -780,6 +786,8 @@ public:
|
||||
// to them, and then pass that + 1 as "start".
|
||||
future<std::vector<sstables::entry_descriptor>> reshuffle_sstables(std::set<int64_t> all_generations, int64_t start);
|
||||
|
||||
bool can_flush() const;
|
||||
|
||||
// FIXME: this is just an example, should be changed to something more
|
||||
// general. compact_all_sstables() starts a compaction of all sstables.
|
||||
// It doesn't flush the current memtable first. It's just a ad-hoc method,
|
||||
@@ -932,6 +940,14 @@ public:
|
||||
return _pending_streams_phaser.advance_and_await();
|
||||
}
|
||||
|
||||
future<> await_pending_flushes() {
|
||||
return _pending_flushes_phaser.advance_and_await();
|
||||
}
|
||||
|
||||
future<> await_pending_ops() {
|
||||
return when_all(await_pending_reads(), await_pending_writes(), await_pending_streams(), await_pending_flushes()).discard_result();
|
||||
}
|
||||
|
||||
void add_or_update_view(view_ptr v);
|
||||
void remove_view(view_ptr v);
|
||||
void clear_views();
|
||||
|
||||
15
db/config.cc
15
db/config.cc
@@ -31,6 +31,7 @@
|
||||
#include <seastar/core/print.hh>
|
||||
#include <seastar/util/log.hh>
|
||||
|
||||
#include "cdc/cdc_extension.hh"
|
||||
#include "config.hh"
|
||||
#include "extensions.hh"
|
||||
#include "log.hh"
|
||||
@@ -694,7 +695,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
||||
, replace_address(this, "replace_address", value_status::Used, "", "The listen_address or broadcast_address of the dead node to replace. Same as -Dcassandra.replace_address.")
|
||||
, replace_address_first_boot(this, "replace_address_first_boot", value_status::Used, "", "Like replace_address option, but if the node has been bootstrapped successfully it will be ignored. Same as -Dcassandra.replace_address_first_boot.")
|
||||
, override_decommission(this, "override_decommission", value_status::Used, false, "Set true to force a decommissioned node to join the cluster")
|
||||
, enable_repair_based_node_ops(this, "enable_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, true, "Set true to use enable repair based node operations instead of streaming based")
|
||||
, enable_repair_based_node_ops(this, "enable_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, false, "Set true to use enable repair based node operations instead of streaming based")
|
||||
, ring_delay_ms(this, "ring_delay_ms", value_status::Used, 30 * 1000, "Time a node waits to hear from other nodes before joining the ring in milliseconds. Same as -Dcassandra.ring_delay_ms in cassandra.")
|
||||
, shadow_round_ms(this, "shadow_round_ms", value_status::Used, 300 * 1000, "The maximum gossip shadow round time. Can be used to reduce the gossip feature check time during node boot up.")
|
||||
, fd_max_interval_ms(this, "fd_max_interval_ms", value_status::Used, 2 * 1000, "The maximum failure_detector interval time in milliseconds. Interval larger than the maximum will be ignored. Larger cluster may need to increase the default.")
|
||||
@@ -792,6 +793,10 @@ db::config::config()
|
||||
db::config::~config()
|
||||
{}
|
||||
|
||||
void db::config::add_cdc_extension() {
|
||||
_extensions->add_schema_extension<cdc::cdc_extension>(cdc::cdc_extension::NAME);
|
||||
}
|
||||
|
||||
void db::config::setup_directories() {
|
||||
maybe_in_workdir(commitlog_directory, "commitlog");
|
||||
maybe_in_workdir(data_file_directories, "data");
|
||||
@@ -874,7 +879,7 @@ db::fs::path db::config::get_conf_sub(db::fs::path sub) {
|
||||
}
|
||||
|
||||
bool db::config::check_experimental(experimental_features_t::feature f) const {
|
||||
if (experimental() && f != experimental_features_t::UNUSED) {
|
||||
if (experimental() && f != experimental_features_t::UNUSED && f != experimental_features_t::UNUSED_CDC) {
|
||||
return true;
|
||||
}
|
||||
const auto& optval = experimental_features();
|
||||
@@ -928,11 +933,13 @@ std::unordered_map<sstring, db::experimental_features_t::feature> db::experiment
|
||||
// https://github.com/scylladb/scylla/pull/5369#discussion_r353614807
|
||||
// Lightweight transactions are no longer experimental. Map them
|
||||
// to UNUSED switch for a while, then remove altogether.
|
||||
return {{"lwt", UNUSED}, {"udf", UDF}, {"cdc", CDC}};
|
||||
// Change Data Capture is no longer experimental. Map it
|
||||
// to UNUSED_CDC switch for a while, then remove altogether.
|
||||
return {{"lwt", UNUSED}, {"udf", UDF}, {"cdc", UNUSED_CDC}, {"alternator-streams", ALTERNATOR_STREAMS}};
|
||||
}
|
||||
|
||||
std::vector<enum_option<db::experimental_features_t>> db::experimental_features_t::all() {
|
||||
return {UDF, CDC};
|
||||
return {UDF, ALTERNATOR_STREAMS};
|
||||
}
|
||||
|
||||
template struct utils::config_file::named_value<seastar::log_level>;
|
||||
|
||||
@@ -81,7 +81,7 @@ namespace db {
|
||||
|
||||
/// Enumeration of all valid values for the `experimental` config entry.
|
||||
struct experimental_features_t {
|
||||
enum feature { UNUSED, UDF, CDC };
|
||||
enum feature { UNUSED, UDF, UNUSED_CDC, ALTERNATOR_STREAMS };
|
||||
static std::unordered_map<sstring, feature> map(); // See enum_option.
|
||||
static std::vector<enum_option<experimental_features_t>> all();
|
||||
};
|
||||
@@ -92,6 +92,9 @@ public:
|
||||
config(std::shared_ptr<db::extensions>);
|
||||
~config();
|
||||
|
||||
// For testing only
|
||||
void add_cdc_extension();
|
||||
|
||||
/// True iff the feature is enabled.
|
||||
bool check_experimental(experimental_features_t::feature f) const;
|
||||
|
||||
|
||||
@@ -113,7 +113,7 @@ future<> cql_table_large_data_handler::record_large_cells(const sstables::sstabl
|
||||
auto ck_str = key_to_str(*clustering_key, s);
|
||||
return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, format("{} {}", ck_str, column_name), extra_fields, ck_str, column_name);
|
||||
} else {
|
||||
return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, nullptr, column_name);
|
||||
return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, data_value::make_null(utf8_type), column_name);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -125,7 +125,7 @@ future<> cql_table_large_data_handler::record_large_rows(const sstables::sstable
|
||||
std::string ck_str = key_to_str(*clustering_key, s);
|
||||
return try_record("row", sst, partition_key, int64_t(row_size), "row", ck_str, extra_fields, ck_str);
|
||||
} else {
|
||||
return try_record("row", sst, partition_key, int64_t(row_size), "static row", "", extra_fields, nullptr);
|
||||
return try_record("row", sst, partition_key, int64_t(row_size), "static row", "", extra_fields, data_value::make_null(utf8_type));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -111,27 +111,12 @@ public:
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
future<> maybe_delete_large_data_entries(const schema& s, sstring filename, uint64_t data_size) {
|
||||
future<> maybe_delete_large_data_entries(const schema& /*s*/, sstring /*filename*/, uint64_t /*data_size*/) {
|
||||
assert(running());
|
||||
future<> large_partitions = make_ready_future<>();
|
||||
if (__builtin_expect(data_size > _partition_threshold_bytes, false)) {
|
||||
large_partitions = with_sem([&s, filename, this] () mutable {
|
||||
return delete_large_data_entries(s, std::move(filename), db::system_keyspace::LARGE_PARTITIONS);
|
||||
});
|
||||
}
|
||||
future<> large_rows = make_ready_future<>();
|
||||
if (__builtin_expect(data_size > _row_threshold_bytes, false)) {
|
||||
large_rows = with_sem([&s, filename, this] () mutable {
|
||||
return delete_large_data_entries(s, std::move(filename), db::system_keyspace::LARGE_ROWS);
|
||||
});
|
||||
}
|
||||
future<> large_cells = make_ready_future<>();
|
||||
if (__builtin_expect(data_size > _cell_threshold_bytes, false)) {
|
||||
large_cells = with_sem([&s, filename, this] () mutable {
|
||||
return delete_large_data_entries(s, std::move(filename), db::system_keyspace::LARGE_CELLS);
|
||||
});
|
||||
}
|
||||
return when_all(std::move(large_partitions), std::move(large_rows), std::move(large_cells)).discard_result();
|
||||
|
||||
// Deletion of large data entries is disabled due to #7668
|
||||
// They will evetually expire based on the 30 days TTL.
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
const large_data_handler::stats& stats() const { return _stats; }
|
||||
|
||||
@@ -58,6 +58,7 @@
|
||||
#include "schema_registry.hh"
|
||||
#include "mutation_query.hh"
|
||||
#include "system_keyspace.hh"
|
||||
#include "system_distributed_keyspace.hh"
|
||||
#include "cql3/cql3_type.hh"
|
||||
#include "cql3/functions/functions.hh"
|
||||
#include "cql3/util.hh"
|
||||
@@ -104,6 +105,11 @@ using namespace std::chrono_literals;
|
||||
|
||||
static logging::logger diff_logger("schema_diff");
|
||||
|
||||
static bool is_extra_durable(const sstring& ks_name, const sstring& cf_name) {
|
||||
return (is_system_keyspace(ks_name) && db::system_keyspace::is_extra_durable(cf_name))
|
||||
|| (ks_name == db::system_distributed_keyspace::NAME && db::system_distributed_keyspace::is_extra_durable(cf_name));
|
||||
}
|
||||
|
||||
|
||||
/** system.schema_* tables used to store keyspace/table/type attributes prior to C* 3.0 */
|
||||
namespace db {
|
||||
@@ -1202,7 +1208,42 @@ static void merge_tables_and_views(distributed<service::storage_proxy>& proxy,
|
||||
return create_table_from_mutations(proxy, std::move(sm));
|
||||
});
|
||||
auto views_diff = diff_table_or_view(proxy, std::move(views_before), std::move(views_after), [&] (schema_mutations sm) {
|
||||
return create_view_from_mutations(proxy, std::move(sm));
|
||||
// The view schema mutation should be created with reference to the base table schema because we definitely know it by now.
|
||||
// If we don't do it we are leaving a window where write commands to this schema are illegal.
|
||||
// There are 3 possibilities:
|
||||
// 1. The table was altered - in this case we want the view to correspond to this new table schema.
|
||||
// 2. The table was just created - the table is guarantied to be published with the view in that case.
|
||||
// 3. The view itself was altered - in that case we already know the base table so we can take it from
|
||||
// the database object.
|
||||
view_ptr vp = create_view_from_mutations(proxy, std::move(sm));
|
||||
schema_ptr base_schema;
|
||||
for (auto&& s : tables_diff.altered) {
|
||||
if (s.new_schema.get()->ks_name() == vp->ks_name() && s.new_schema.get()->cf_name() == vp->view_info()->base_name() ) {
|
||||
base_schema = s.new_schema;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!base_schema) {
|
||||
for (auto&& s : tables_diff.created) {
|
||||
if (s.get()->ks_name() == vp->ks_name() && s.get()->cf_name() == vp->view_info()->base_name() ) {
|
||||
base_schema = s;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!base_schema) {
|
||||
base_schema = proxy.local().local_db().find_schema(vp->ks_name(), vp->view_info()->base_name());
|
||||
}
|
||||
|
||||
// Now when we have a referenced base - just in case we are registering an old view (this can happen in a mixed cluster)
|
||||
// lets make it write enabled by updating it's compute columns.
|
||||
view_ptr fixed_vp = maybe_fix_legacy_secondary_index_mv_schema(proxy.local().get_db().local(), vp, base_schema, preserve_version::yes);
|
||||
if(fixed_vp) {
|
||||
vp = fixed_vp;
|
||||
}
|
||||
vp->view_info()->set_base_info(vp->view_info()->make_base_dependent_view_info(*base_schema));
|
||||
return vp;
|
||||
});
|
||||
|
||||
proxy.local().get_db().invoke_on_all([&] (database& db) {
|
||||
@@ -2499,7 +2540,7 @@ schema_ptr create_table_from_mutations(const schema_ctxt& ctxt, schema_mutations
|
||||
builder.with_sharder(smp::count, ctxt.murmur3_partitioner_ignore_msb_bits());
|
||||
}
|
||||
|
||||
if (is_system_keyspace(ks_name) && is_extra_durable(cf_name)) {
|
||||
if (is_extra_durable(ks_name, cf_name)) {
|
||||
builder.set_wait_for_sync_to_commitlog(true);
|
||||
}
|
||||
|
||||
@@ -3027,39 +3068,40 @@ std::vector<sstring> all_table_names(schema_features features) {
|
||||
boost::adaptors::transformed([] (auto schema) { return schema->cf_name(); }));
|
||||
}
|
||||
|
||||
future<> maybe_update_legacy_secondary_index_mv_schema(service::migration_manager& mm, database& db, view_ptr v) {
|
||||
// TODO(sarna): Remove once computed columns are guaranteed to be featured in the whole cluster.
|
||||
view_ptr maybe_fix_legacy_secondary_index_mv_schema(database& db, const view_ptr& v, schema_ptr base_schema, preserve_version preserve_version) {
|
||||
// Legacy format for a secondary index used a hardcoded "token" column, which ensured a proper
|
||||
// order for indexed queries. This "token" column is now implemented as a computed column,
|
||||
// but for the sake of compatibility we assume that there might be indexes created in the legacy
|
||||
// format, where "token" is not marked as computed. Once we're sure that all indexes have their
|
||||
// columns marked as computed (because they were either created on a node that supports computed
|
||||
// columns or were fixed by this utility function), it's safe to remove this function altogether.
|
||||
if (!db.features().cluster_supports_computed_columns()) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
if (v->clustering_key_size() == 0) {
|
||||
return make_ready_future<>();
|
||||
return view_ptr(nullptr);
|
||||
}
|
||||
const column_definition& first_view_ck = v->clustering_key_columns().front();
|
||||
if (first_view_ck.is_computed()) {
|
||||
return make_ready_future<>();
|
||||
return view_ptr(nullptr);
|
||||
}
|
||||
|
||||
if (!base_schema) {
|
||||
base_schema = db.find_schema(v->view_info()->base_id());
|
||||
}
|
||||
|
||||
table& base = db.find_column_family(v->view_info()->base_id());
|
||||
schema_ptr base_schema = base.schema();
|
||||
// If the first clustering key part of a view is a column with name not found in base schema,
|
||||
// it implies it might be backing an index created before computed columns were introduced,
|
||||
// and as such it must be recreated properly.
|
||||
if (!base_schema->columns_by_name().contains(first_view_ck.name())) {
|
||||
schema_builder builder{schema_ptr(v)};
|
||||
builder.mark_column_computed(first_view_ck.name(), std::make_unique<token_column_computation>());
|
||||
return mm.announce_view_update(view_ptr(builder.build()), true);
|
||||
if (preserve_version) {
|
||||
builder.with_version(v->version());
|
||||
}
|
||||
return view_ptr(builder.build());
|
||||
}
|
||||
return make_ready_future<>();
|
||||
return view_ptr(nullptr);
|
||||
}
|
||||
|
||||
|
||||
namespace legacy {
|
||||
|
||||
table_schema_version schema_mutations::digest() const {
|
||||
|
||||
@@ -238,7 +238,9 @@ std::vector<mutation> make_update_view_mutations(lw_shared_ptr<keyspace_metadata
|
||||
|
||||
std::vector<mutation> make_drop_view_mutations(lw_shared_ptr<keyspace_metadata> keyspace, view_ptr view, api::timestamp_type timestamp);
|
||||
|
||||
future<> maybe_update_legacy_secondary_index_mv_schema(service::migration_manager& mm, database& db, view_ptr v);
|
||||
class preserve_version_tag {};
|
||||
using preserve_version = bool_class<preserve_version_tag>;
|
||||
view_ptr maybe_fix_legacy_secondary_index_mv_schema(database& db, const view_ptr& v, schema_ptr base_schema, preserve_version preserve_version);
|
||||
|
||||
sstring serialize_kind(column_kind kind);
|
||||
column_kind deserialize_kind(sstring kind);
|
||||
|
||||
@@ -201,10 +201,10 @@ static future<std::vector<token_range>> get_local_ranges(database& db) {
|
||||
// All queries will be on that table, where all entries are text and there's no notion of
|
||||
// token ranges form the CQL point of view.
|
||||
auto left_inf = boost::find_if(ranges, [] (auto&& r) {
|
||||
return !r.start() || r.start()->value() == dht::minimum_token();
|
||||
return r.end() && (!r.start() || r.start()->value() == dht::minimum_token());
|
||||
});
|
||||
auto right_inf = boost::find_if(ranges, [] (auto&& r) {
|
||||
return !r.end() || r.start()->value() == dht::maximum_token();
|
||||
return r.start() && (!r.end() || r.end()->value() == dht::maximum_token());
|
||||
});
|
||||
if (left_inf != right_inf && left_inf != ranges.end() && right_inf != ranges.end()) {
|
||||
local_ranges.push_back(token_range{to_bytes(right_inf->start()), to_bytes(left_inf->end())});
|
||||
|
||||
@@ -113,6 +113,10 @@ static std::vector<schema_ptr> all_tables() {
|
||||
};
|
||||
}
|
||||
|
||||
bool system_distributed_keyspace::is_extra_durable(const sstring& cf_name) {
|
||||
return cf_name == CDC_TOPOLOGY_DESCRIPTION;
|
||||
}
|
||||
|
||||
system_distributed_keyspace::system_distributed_keyspace(cql3::query_processor& qp, service::migration_manager& mm)
|
||||
: _qp(qp)
|
||||
, _mm(mm) {
|
||||
|
||||
@@ -64,6 +64,10 @@ private:
|
||||
service::migration_manager& _mm;
|
||||
|
||||
public:
|
||||
/* Should writes to the given table always be synchronized by commitlog (flushed to disk)
|
||||
* before being acknowledged? */
|
||||
static bool is_extra_durable(const sstring& cf_name);
|
||||
|
||||
system_distributed_keyspace(cql3::query_processor&, service::migration_manager&);
|
||||
|
||||
future<> start();
|
||||
|
||||
@@ -1241,6 +1241,14 @@ future<> mutate_MV(
|
||||
}
|
||||
}
|
||||
}
|
||||
// It's still possible that a target endpoint is dupliated in the remote endpoints list,
|
||||
// so let's get rid of the duplicate if it exists
|
||||
if (target_endpoint) {
|
||||
auto remote_it = std::find(remote_endpoints.begin(), remote_endpoints.end(), *target_endpoint);
|
||||
if (remote_it != remote_endpoints.end()) {
|
||||
remote_endpoints.erase(remote_it);
|
||||
}
|
||||
}
|
||||
|
||||
if (target_endpoint && *target_endpoint == my_address) {
|
||||
++stats.view_updates_pushed_local;
|
||||
|
||||
5
dist/common/scripts/node_exporter_install
vendored
5
dist/common/scripts/node_exporter_install
vendored
@@ -24,6 +24,8 @@ import os
|
||||
import sys
|
||||
import tempfile
|
||||
import tarfile
|
||||
import shutil
|
||||
import glob
|
||||
from scylla_util import *
|
||||
import argparse
|
||||
|
||||
@@ -61,6 +63,9 @@ if __name__ == '__main__':
|
||||
f.write(data)
|
||||
with tarfile.open('/var/tmp/node_exporter-{version}.linux-amd64.tar.gz'.format(version=VERSION)) as tf:
|
||||
tf.extractall(INSTALL_DIR)
|
||||
shutil.chown(f'{INSTALL_DIR}/node_exporter-{VERSION}.linux-amd64', 'root', 'root')
|
||||
for f in glob.glob(f'{INSTALL_DIR}/node_exporter-{VERSION}.linux-amd64/*'):
|
||||
shutil.chown(f, 'root', 'root')
|
||||
os.remove('/var/tmp/node_exporter-{version}.linux-amd64.tar.gz'.format(version=VERSION))
|
||||
if node_exporter_p.exists():
|
||||
node_exporter_p.unlink()
|
||||
|
||||
4
dist/common/scripts/scylla_io_setup
vendored
4
dist/common/scripts/scylla_io_setup
vendored
@@ -244,12 +244,12 @@ if __name__ == "__main__":
|
||||
# and https://cloud.google.com/compute/docs/disks/local-ssd#nvme
|
||||
# note that scylla iotune might measure more, this is GCP recommended
|
||||
mbs=1024*1024
|
||||
if nr_disks >= 1 & nr_disks < 4:
|
||||
if nr_disks >= 1 and nr_disks < 4:
|
||||
disk_properties["read_iops"] = 170000 * nr_disks
|
||||
disk_properties["read_bandwidth"] = 660 * mbs * nr_disks
|
||||
disk_properties["write_iops"] = 90000 * nr_disks
|
||||
disk_properties["write_bandwidth"] = 350 * mbs * nr_disks
|
||||
elif nr_disks >= 4 & nr_disks <= 8:
|
||||
elif nr_disks >= 4 and nr_disks <= 8:
|
||||
disk_properties["read_iops"] = 680000
|
||||
disk_properties["read_bandwidth"] = 2650 * mbs
|
||||
disk_properties["write_iops"] = 360000
|
||||
|
||||
13
dist/common/scripts/scylla_prepare
vendored
13
dist/common/scripts/scylla_prepare
vendored
@@ -27,6 +27,7 @@ import platform
|
||||
import distro
|
||||
|
||||
from scylla_util import *
|
||||
from multiprocessing import cpu_count
|
||||
|
||||
def get_mode_cpuset(nic, mode):
|
||||
mode_cpu_mask = out('/opt/scylladb/scripts/perftune.py --tune net --nic {} --mode {} --get-cpu-mask-quiet'.format(nic, mode))
|
||||
@@ -97,6 +98,16 @@ def verify_cpu():
|
||||
print('\nIf this is a virtual machine, please update its CPU feature configuration or upgrade to a newer hypervisor.')
|
||||
sys.exit(1)
|
||||
|
||||
def configure_aio_slots():
|
||||
with open('/proc/sys/fs/aio-max-nr') as f:
|
||||
aio_max_nr = int(f.read())
|
||||
# (10000 + 1024 + 2) * ncpus for scylla,
|
||||
# 65536 for other apps
|
||||
required_aio_slots = cpu_count() * 11026 + 65536
|
||||
if aio_max_nr < required_aio_slots:
|
||||
with open('/proc/sys/fs/aio-max-nr', 'w') as f:
|
||||
f.write(str(required_aio_slots))
|
||||
|
||||
if __name__ == '__main__':
|
||||
verify_cpu()
|
||||
|
||||
@@ -114,6 +125,8 @@ if __name__ == '__main__':
|
||||
os.remove('/etc/scylla/ami_disabled')
|
||||
sys.exit(1)
|
||||
|
||||
configure_aio_slots()
|
||||
|
||||
if mode == 'virtio':
|
||||
tap = cfg.get('TAP')
|
||||
user = cfg.get('USER')
|
||||
|
||||
33
dist/common/scripts/scylla_raid_setup
vendored
33
dist/common/scripts/scylla_raid_setup
vendored
@@ -36,7 +36,7 @@ if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Configure RAID volume for Scylla.')
|
||||
parser.add_argument('--disks', required=True,
|
||||
help='specify disks for RAID')
|
||||
parser.add_argument('--raiddev', default='/dev/md0',
|
||||
parser.add_argument('--raiddev',
|
||||
help='MD device name for RAID')
|
||||
parser.add_argument('--enable-on-nextboot', '--update-fstab', action='store_true', default=False,
|
||||
help='mount RAID on next boot')
|
||||
@@ -73,9 +73,25 @@ if __name__ == '__main__':
|
||||
print('{} is busy'.format(disk))
|
||||
sys.exit(1)
|
||||
|
||||
if os.path.exists(args.raiddev):
|
||||
print('{} is already using'.format(args.raiddev))
|
||||
sys.exit(1)
|
||||
if len(disks) == 1 and not args.force_raid:
|
||||
raid = False
|
||||
fsdev = disks[0]
|
||||
else:
|
||||
raid = True
|
||||
if args.raiddev is None:
|
||||
raiddevs_to_try = [f'/dev/md{i}' for i in range(10)]
|
||||
else:
|
||||
raiddevs_to_try = [args.raiddev, ]
|
||||
for fsdev in raiddevs_to_try:
|
||||
raiddevname = os.path.basename(fsdev)
|
||||
if not os.path.exists(f'/sys/block/{raiddevname}/md/array_state'):
|
||||
break
|
||||
print(f'{fsdev} is already using')
|
||||
else:
|
||||
if args.raiddev is None:
|
||||
print("Can't find unused /dev/mdX")
|
||||
sys.exit(1)
|
||||
print(f'{fsdev} will be used to setup a RAID')
|
||||
|
||||
if os.path.ismount(mount_at):
|
||||
print('{} is already mounted'.format(mount_at))
|
||||
@@ -94,13 +110,6 @@ if __name__ == '__main__':
|
||||
except SystemdException:
|
||||
md_service = systemd_unit('mdadm.service')
|
||||
|
||||
if len(disks) == 1 and not args.force_raid:
|
||||
raid = False
|
||||
fsdev = disks[0]
|
||||
else:
|
||||
raid = True
|
||||
fsdev = args.raiddev
|
||||
|
||||
print('Creating {type} for scylla using {nr_disk} disk(s): {disks}'.format(type='RAID0' if raid else 'XFS volume', nr_disk=len(disks), disks=args.disks))
|
||||
if distro.name() == 'Ubuntu' and distro.version() == '14.04':
|
||||
if raid:
|
||||
@@ -151,7 +160,7 @@ Before=scylla-server.service
|
||||
After={after}
|
||||
|
||||
[Mount]
|
||||
What=UUID={uuid}
|
||||
What=/dev/disk/by-uuid/{uuid}
|
||||
Where={mount_at}
|
||||
Type=xfs
|
||||
Options=noatime
|
||||
|
||||
57
dist/common/scripts/scylla_util.py
vendored
57
dist/common/scripts/scylla_util.py
vendored
@@ -92,7 +92,7 @@ def scyllabindir():
|
||||
|
||||
|
||||
# @param headers dict of k:v
|
||||
def curl(url, headers=None, byte=False, timeout=3, max_retries=5):
|
||||
def curl(url, headers=None, byte=False, timeout=3, max_retries=5, retry_interval=5):
|
||||
retries = 0
|
||||
while True:
|
||||
try:
|
||||
@@ -102,9 +102,8 @@ def curl(url, headers=None, byte=False, timeout=3, max_retries=5):
|
||||
return res.read()
|
||||
else:
|
||||
return res.read().decode('utf-8')
|
||||
except urllib.error.HTTPError:
|
||||
logging.warning("Failed to grab %s..." % url)
|
||||
time.sleep(5)
|
||||
except urllib.error.URLError:
|
||||
time.sleep(retry_interval)
|
||||
retries += 1
|
||||
if retries >= max_retries:
|
||||
raise
|
||||
@@ -188,7 +187,7 @@ class gcp_instance:
|
||||
"""get list of nvme disks from metadata server"""
|
||||
import json
|
||||
try:
|
||||
disksREST=self.__instance_metadata("disks")
|
||||
disksREST=self.__instance_metadata("disks", True)
|
||||
disksobj=json.loads(disksREST)
|
||||
nvmedisks=list(filter(self.isNVME, disksobj))
|
||||
except Exception as e:
|
||||
@@ -236,7 +235,8 @@ class gcp_instance:
|
||||
|
||||
def instance_size(self):
|
||||
"""Returns the size of the instance we are running in. i.e.: 2"""
|
||||
return self.instancetype.split("-")[2]
|
||||
instancetypesplit = self.instancetype.split("-")
|
||||
return instancetypesplit[2] if len(instancetypesplit)>2 else 0
|
||||
|
||||
def instance_class(self):
|
||||
"""Returns the class of the instance we are running in. i.e.: n2"""
|
||||
@@ -298,22 +298,31 @@ class gcp_instance:
|
||||
return self.__firstNvmeSize
|
||||
|
||||
def is_recommended_instance(self):
|
||||
if self.is_recommended_instance_size() and not self.is_unsupported_instance_class() and self.is_supported_instance_class():
|
||||
if not self.is_unsupported_instance_class() and self.is_supported_instance_class() and self.is_recommended_instance_size():
|
||||
# at least 1:2GB cpu:ram ratio , GCP is at 1:4, so this should be fine
|
||||
if self.cpu/self.memoryGB < 0.5:
|
||||
# 30:1 Disk/RAM ratio must be kept at least(AWS), we relax this a little bit
|
||||
# on GCP we are OK with 50:1 , n1-standard-2 can cope with 1 disk, not more
|
||||
diskCount = self.nvmeDiskCount
|
||||
# to reach max performance for > 16 disks we mandate 32 or more vcpus
|
||||
# https://cloud.google.com/compute/docs/disks/local-ssd#performance
|
||||
if diskCount >= 16 and self.cpu < 32:
|
||||
return False
|
||||
diskSize= self.firstNvmeSize
|
||||
if diskCount < 1:
|
||||
return False
|
||||
disktoramratio = (diskCount*diskSize)/self.memoryGB
|
||||
if (disktoramratio <= 50) and (disktoramratio > 0):
|
||||
return True
|
||||
diskCount = self.nvmeDiskCount
|
||||
# to reach max performance for > 16 disks we mandate 32 or more vcpus
|
||||
# https://cloud.google.com/compute/docs/disks/local-ssd#performance
|
||||
if diskCount >= 16 and self.cpu < 32:
|
||||
logging.warning(
|
||||
"This machine doesn't have enough CPUs for allocated number of NVMEs (at least 32 cpus for >=16 disks). Performance will suffer.")
|
||||
return False
|
||||
if diskCount < 1:
|
||||
logging.warning("No ephemeral disks were found.")
|
||||
return False
|
||||
diskSize = self.firstNvmeSize
|
||||
max_disktoramratio = 105
|
||||
# 30:1 Disk/RAM ratio must be kept at least(AWS), we relax this a little bit
|
||||
# on GCP we are OK with {max_disktoramratio}:1 , n1-standard-2 can cope with 1 disk, not more
|
||||
disktoramratio = (diskCount * diskSize) / self.memoryGB
|
||||
if (disktoramratio > max_disktoramratio):
|
||||
logging.warning(
|
||||
f"Instance disk-to-RAM ratio is {disktoramratio}, which is higher than the recommended ratio {max_disktoramratio}. Performance may suffer.")
|
||||
return False
|
||||
return True
|
||||
else:
|
||||
logging.warning("At least 2G of RAM per CPU is needed. Performance will suffer.")
|
||||
return False
|
||||
|
||||
def private_ipv4(self):
|
||||
@@ -365,6 +374,8 @@ class aws_instance:
|
||||
raise Exception("found more than one disk mounted at root'".format(root_dev_candidates))
|
||||
|
||||
root_dev = root_dev_candidates[0].device
|
||||
if root_dev == '/dev/root':
|
||||
root_dev = run('findmnt -n -o SOURCE /', shell=True, check=True, capture_output=True, encoding='utf-8').stdout.strip()
|
||||
nvmes_present = list(filter(nvme_re.match, os.listdir("/dev")))
|
||||
return {"root": [ root_dev ], "ephemeral": [ x for x in nvmes_present if not root_dev.startswith(os.path.join("/dev/", x)) ] }
|
||||
|
||||
@@ -398,7 +409,7 @@ class aws_instance:
|
||||
def is_aws_instance(cls):
|
||||
"""Check if it's AWS instance via query to metadata server."""
|
||||
try:
|
||||
curl(cls.META_DATA_BASE_URL, max_retries=2)
|
||||
curl(cls.META_DATA_BASE_URL, max_retries=2, retry_interval=1)
|
||||
return True
|
||||
except (urllib.error.URLError, urllib.error.HTTPError):
|
||||
return False
|
||||
@@ -462,7 +473,7 @@ class aws_instance:
|
||||
|
||||
def ebs_disks(self):
|
||||
"""Returns all EBS disks"""
|
||||
return set(self._disks["ephemeral"])
|
||||
return set(self._disks["ebs"])
|
||||
|
||||
def public_ipv4(self):
|
||||
"""Returns the public IPv4 address of this instance"""
|
||||
@@ -490,9 +501,7 @@ class aws_instance:
|
||||
return curl(self.META_DATA_BASE_URL + "user-data")
|
||||
|
||||
|
||||
# When a CLI tool is not installed, use relocatable CLI tool provided by Scylla
|
||||
scylla_env = os.environ.copy()
|
||||
scylla_env['PATH'] = '{}:{}'.format(scyllabindir(), scylla_env['PATH'])
|
||||
scylla_env['DEBIAN_FRONTEND'] = 'noninteractive'
|
||||
|
||||
def run(cmd, shell=False, silent=False, exception=True):
|
||||
|
||||
2
dist/common/sysctl.d/99-scylla-aio.conf
vendored
2
dist/common/sysctl.d/99-scylla-aio.conf
vendored
@@ -1,2 +0,0 @@
|
||||
# Raise max AIO events
|
||||
fs.aio-max-nr = 1048576
|
||||
4
dist/common/sysctl.d/99-scylla-inotify.conf
vendored
Normal file
4
dist/common/sysctl.d/99-scylla-inotify.conf
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
# allocate enough inotify instances for large machines
|
||||
# each tls instance needs 1 inotify instance, and there can be
|
||||
# multiple tls instances per shard.
|
||||
fs.inotify.max_user_instances = 1200
|
||||
@@ -9,8 +9,8 @@ if [[ $KVER =~ 3\.13\.0\-([0-9]+)-generic ]]; then
|
||||
else
|
||||
# expect failures in virtualized environments
|
||||
sysctl -p/usr/lib/sysctl.d/99-scylla-sched.conf || :
|
||||
sysctl -p/usr/lib/sysctl.d/99-scylla-aio.conf || :
|
||||
sysctl -p/usr/lib/sysctl.d/99-scylla-vm.conf || :
|
||||
sysctl -p/usr/lib/sysctl.d/99-scylla-inotify.conf || :
|
||||
fi
|
||||
|
||||
#DEBHELPER#
|
||||
|
||||
4
dist/docker/redhat/Dockerfile
vendored
4
dist/docker/redhat/Dockerfile
vendored
@@ -5,8 +5,8 @@ MAINTAINER Avi Kivity <avi@cloudius-systems.com>
|
||||
ENV container docker
|
||||
|
||||
# The SCYLLA_REPO_URL argument specifies the URL to the RPM repository this Docker image uses to install Scylla. The default value is the Scylla's unstable RPM repository, which contains the daily build.
|
||||
ARG SCYLLA_REPO_URL=http://downloads.scylladb.com/rpm/unstable/centos/master/latest/scylla.repo
|
||||
ARG VERSION=666.development
|
||||
ARG SCYLLA_REPO_URL=http://downloads.scylladb.com/rpm/unstable/centos/scylla-4.3/latest/scylla.repo
|
||||
ARG VERSION=4.3.rc0
|
||||
|
||||
ADD scylla_bashrc /scylla_bashrc
|
||||
|
||||
|
||||
18
dist/redhat/scylla.spec
vendored
18
dist/redhat/scylla.spec
vendored
@@ -76,13 +76,18 @@ getent passwd scylla || /usr/sbin/useradd -g scylla -s /sbin/nologin -r -d %{_sh
|
||||
%post server
|
||||
/opt/scylladb/scripts/scylla_post_install.sh
|
||||
|
||||
%systemd_post scylla-server.service
|
||||
if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset scylla-server.service ||:
|
||||
fi
|
||||
|
||||
%preun server
|
||||
%systemd_preun scylla-server.service
|
||||
if [ $1 -eq 0 ] ; then
|
||||
/usr/bin/systemctl --no-reload disable scylla-server.service ||:
|
||||
/usr/bin/systemctl stop scylla-server.service ||:
|
||||
fi
|
||||
|
||||
%postun server
|
||||
%systemd_postun scylla-server.service
|
||||
/usr/bin/systemctl daemon-reload ||:
|
||||
|
||||
%posttrans server
|
||||
if [ -d /tmp/%{name}-%{version}-%{release} ]; then
|
||||
@@ -129,10 +134,9 @@ rm -rf $RPM_BUILD_ROOT
|
||||
%attr(0755,scylla,scylla) %dir %{_sharedstatedir}/scylla-housekeeping
|
||||
%ghost /etc/systemd/system/scylla-helper.slice.d/
|
||||
%ghost /etc/systemd/system/scylla-helper.slice.d/memory.conf
|
||||
%ghost /etc/systemd/system/scylla-server.service.d/
|
||||
%ghost /etc/systemd/system/scylla-server.service.d/capabilities.conf
|
||||
%ghost /etc/systemd/system/scylla-server.service.d/mounts.conf
|
||||
%ghost /etc/systemd/system/scylla-server.service.d/dependencies.conf
|
||||
/etc/systemd/system/scylla-server.service.d/dependencies.conf
|
||||
%ghost /etc/systemd/system/var-lib-systemd-coredump.mount
|
||||
%ghost /etc/systemd/system/scylla-cpupower.service
|
||||
%ghost /etc/systemd/system/var-lib-scylla.mount
|
||||
@@ -190,6 +194,8 @@ Summary: Scylla configuration package for the Linux kernel
|
||||
License: AGPLv3
|
||||
URL: http://www.scylladb.com/
|
||||
Requires: kmod
|
||||
# tuned overwrites our sysctl settings
|
||||
Obsoletes: tuned
|
||||
|
||||
%description kernel-conf
|
||||
This package contains Linux kernel configuration changes for the Scylla database. Install this package
|
||||
@@ -199,8 +205,8 @@ if Scylla is the main application on your server and you wish to optimize its la
|
||||
# We cannot use the sysctl_apply rpm macro because it is not present in 7.0
|
||||
# following is a "manual" expansion
|
||||
/usr/lib/systemd/systemd-sysctl 99-scylla-sched.conf >/dev/null 2>&1 || :
|
||||
/usr/lib/systemd/systemd-sysctl 99-scylla-aio.conf >/dev/null 2>&1 || :
|
||||
/usr/lib/systemd/systemd-sysctl 99-scylla-vm.conf >/dev/null 2>&1 || :
|
||||
/usr/lib/systemd/systemd-sysctl 99-scylla-inotify.conf >/dev/null 2>&1 || :
|
||||
|
||||
%files kernel-conf
|
||||
%defattr(-,root,root)
|
||||
|
||||
@@ -143,6 +143,7 @@ extern const std::string_view LWT;
|
||||
extern const std::string_view PER_TABLE_PARTITIONERS;
|
||||
extern const std::string_view PER_TABLE_CACHING;
|
||||
extern const std::string_view DIGEST_FOR_NULL_VALUES;
|
||||
extern const std::string_view ALTERNATOR_STREAMS;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -62,6 +62,7 @@ constexpr std::string_view features::LWT = "LWT";
|
||||
constexpr std::string_view features::PER_TABLE_PARTITIONERS = "PER_TABLE_PARTITIONERS";
|
||||
constexpr std::string_view features::PER_TABLE_CACHING = "PER_TABLE_CACHING";
|
||||
constexpr std::string_view features::DIGEST_FOR_NULL_VALUES = "DIGEST_FOR_NULL_VALUES";
|
||||
constexpr std::string_view features::ALTERNATOR_STREAMS = "ALTERNATOR_STREAMS";
|
||||
|
||||
static logging::logger logger("features");
|
||||
|
||||
@@ -86,6 +87,7 @@ feature_service::feature_service(feature_config cfg) : _config(cfg)
|
||||
, _per_table_partitioners_feature(*this, features::PER_TABLE_PARTITIONERS)
|
||||
, _per_table_caching_feature(*this, features::PER_TABLE_CACHING)
|
||||
, _digest_for_null_values_feature(*this, features::DIGEST_FOR_NULL_VALUES)
|
||||
, _alternator_streams_feature(*this, features::ALTERNATOR_STREAMS)
|
||||
{}
|
||||
|
||||
feature_config feature_config_from_db_config(db::config& cfg, std::set<sstring> disabled) {
|
||||
@@ -116,8 +118,8 @@ feature_config feature_config_from_db_config(db::config& cfg, std::set<sstring>
|
||||
}
|
||||
}
|
||||
|
||||
if (!cfg.check_experimental(db::experimental_features_t::CDC)) {
|
||||
fcfg._disabled_features.insert(sstring(gms::features::CDC));
|
||||
if (!cfg.check_experimental(db::experimental_features_t::ALTERNATOR_STREAMS)) {
|
||||
fcfg._disabled_features.insert(sstring(gms::features::ALTERNATOR_STREAMS));
|
||||
}
|
||||
|
||||
return fcfg;
|
||||
@@ -187,6 +189,7 @@ std::set<std::string_view> feature_service::known_feature_set() {
|
||||
gms::features::UDF,
|
||||
gms::features::CDC,
|
||||
gms::features::DIGEST_FOR_NULL_VALUES,
|
||||
gms::features::ALTERNATOR_STREAMS,
|
||||
};
|
||||
|
||||
for (const sstring& s : _config._disabled_features) {
|
||||
@@ -266,6 +269,7 @@ void feature_service::enable(const std::set<std::string_view>& list) {
|
||||
std::ref(_per_table_partitioners_feature),
|
||||
std::ref(_per_table_caching_feature),
|
||||
std::ref(_digest_for_null_values_feature),
|
||||
std::ref(_alternator_streams_feature),
|
||||
})
|
||||
{
|
||||
if (list.contains(f.name())) {
|
||||
|
||||
@@ -92,6 +92,7 @@ private:
|
||||
gms::feature _per_table_partitioners_feature;
|
||||
gms::feature _per_table_caching_feature;
|
||||
gms::feature _digest_for_null_values_feature;
|
||||
gms::feature _alternator_streams_feature;
|
||||
|
||||
public:
|
||||
bool cluster_supports_user_defined_functions() const {
|
||||
@@ -160,6 +161,10 @@ public:
|
||||
bool cluster_supports_lwt() const {
|
||||
return bool(_lwt_feature);
|
||||
}
|
||||
|
||||
bool cluster_supports_alternator_streams() const {
|
||||
return bool(_alternator_streams_feature);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace gms
|
||||
|
||||
@@ -1774,6 +1774,8 @@ future<> gossiper::do_shadow_round(std::unordered_set<gms::inet_address> nodes,
|
||||
}).handle_exception_type([node, &fall_back_to_syn_msg] (seastar::rpc::unknown_verb_error&) {
|
||||
logger.warn("Node {} does not support get_endpoint_states verb", node);
|
||||
fall_back_to_syn_msg = true;
|
||||
}).handle_exception_type([node, &nodes_down] (seastar::rpc::timeout_error&) {
|
||||
logger.warn("The get_endpoint_states verb to node {} was timeout", node);
|
||||
}).handle_exception_type([node, &nodes_down] (seastar::rpc::closed_error&) {
|
||||
nodes_down++;
|
||||
logger.warn("Node {} is down for get_endpoint_states verb", node);
|
||||
|
||||
22
install.sh
22
install.sh
@@ -142,11 +142,15 @@ DEBIAN_SSL_CERT_FILE="/etc/ssl/certs/ca-certificates.crt"
|
||||
if [ -f "\${DEBIAN_SSL_CERT_FILE}" ]; then
|
||||
c=\${DEBIAN_SSL_CERT_FILE}
|
||||
fi
|
||||
PYTHONPATH="\${d}:\${d}/libexec:\$PYTHONPATH" PATH="\${d}/$pythonpath:\${PATH}" SSL_CERT_FILE="\${c}" exec -a "\$0" "\${d}/libexec/\${b}" "\$@"
|
||||
PYTHONPATH="\${d}:\${d}/libexec:\$PYTHONPATH" PATH="\${d}/../bin:\${d}/$pythonpath:\${PATH}" SSL_CERT_FILE="\${c}" exec -a "\$0" "\${d}/libexec/\${b}" "\$@"
|
||||
EOF
|
||||
chmod +x "$install"
|
||||
}
|
||||
|
||||
install() {
|
||||
command install -Z "$@"
|
||||
}
|
||||
|
||||
installconfig() {
|
||||
local perm="$1"
|
||||
local src="$2"
|
||||
@@ -197,13 +201,13 @@ if [ -z "$python3" ]; then
|
||||
fi
|
||||
rpython3=$(realpath -m "$root/$python3")
|
||||
if ! $nonroot; then
|
||||
retc="$root/etc"
|
||||
rsysconfdir="$root/$sysconfdir"
|
||||
rusr="$root/usr"
|
||||
rsystemd="$rusr/lib/systemd/system"
|
||||
retc=$(realpath -m "$root/etc")
|
||||
rsysconfdir=$(realpath -m "$root/$sysconfdir")
|
||||
rusr=$(realpath -m "$root/usr")
|
||||
rsystemd=$(realpath -m "$rusr/lib/systemd/system")
|
||||
rdoc="$rprefix/share/doc"
|
||||
rdata="$root/var/lib/scylla"
|
||||
rhkdata="$root/var/lib/scylla-housekeeping"
|
||||
rdata=$(realpath -m "$root/var/lib/scylla")
|
||||
rhkdata=$(realpath -m "$root/var/lib/scylla-housekeeping")
|
||||
else
|
||||
retc="$rprefix/etc"
|
||||
rsysconfdir="$rprefix/$sysconfdir"
|
||||
@@ -412,6 +416,10 @@ elif ! $packaging; then
|
||||
chown -R scylla:scylla $rdata
|
||||
chown -R scylla:scylla $rhkdata
|
||||
|
||||
for file in dist/common/sysctl.d/*.conf; do
|
||||
bn=$(basename "$file")
|
||||
sysctl -p "$rusr"/lib/sysctl.d/"$bn"
|
||||
done
|
||||
$rprefix/scripts/scylla_post_install.sh
|
||||
echo "Scylla offline install completed."
|
||||
fi
|
||||
|
||||
3
main.cc
3
main.cc
@@ -1023,8 +1023,7 @@ int main(int ac, char** av) {
|
||||
proxy.invoke_on_all([] (service::storage_proxy& local_proxy) {
|
||||
auto& ss = service::get_local_storage_service();
|
||||
ss.register_subscriber(&local_proxy);
|
||||
//FIXME: discarded future
|
||||
(void)local_proxy.start_hints_manager(gms::get_local_gossiper().shared_from_this(), ss.shared_from_this());
|
||||
return local_proxy.start_hints_manager(gms::get_local_gossiper().shared_from_this(), ss.shared_from_this());
|
||||
}).get();
|
||||
|
||||
supervisor::notify("starting messaging service");
|
||||
|
||||
@@ -2044,11 +2044,13 @@ public:
|
||||
}
|
||||
}
|
||||
void abort(std::exception_ptr ep) {
|
||||
_end_of_stream = true;
|
||||
_ex = std::move(ep);
|
||||
if (_full) {
|
||||
_full->set_exception(_ex);
|
||||
_full.reset();
|
||||
} else if (_not_full) {
|
||||
_not_full->set_exception(_ex);
|
||||
_not_full.reset();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
52
mutation_writer/feed_writers.cc
Normal file
52
mutation_writer/feed_writers.cc
Normal file
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright (C) 2021 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "feed_writers.hh"
|
||||
|
||||
namespace mutation_writer {
|
||||
|
||||
bucket_writer::bucket_writer(schema_ptr schema, std::pair<flat_mutation_reader, queue_reader_handle> queue_reader, reader_consumer& consumer)
|
||||
: _schema(schema)
|
||||
, _handle(std::move(queue_reader.second))
|
||||
, _consume_fut(consumer(std::move(queue_reader.first)))
|
||||
{ }
|
||||
|
||||
bucket_writer::bucket_writer(schema_ptr schema, reader_permit permit, reader_consumer& consumer)
|
||||
: bucket_writer(schema, make_queue_reader(schema, std::move(permit)), consumer)
|
||||
{ }
|
||||
|
||||
future<> bucket_writer::consume(mutation_fragment mf) {
|
||||
return _handle.push(std::move(mf));
|
||||
}
|
||||
|
||||
void bucket_writer::consume_end_of_stream() {
|
||||
_handle.push_end_of_stream();
|
||||
}
|
||||
|
||||
void bucket_writer::abort(std::exception_ptr ep) noexcept {
|
||||
_handle.abort(std::move(ep));
|
||||
}
|
||||
|
||||
future<> bucket_writer::close() noexcept {
|
||||
return std::move(_consume_fut);
|
||||
}
|
||||
|
||||
} // mutation_writer
|
||||
@@ -22,10 +22,31 @@
|
||||
#pragma once
|
||||
|
||||
#include "flat_mutation_reader.hh"
|
||||
#include "mutation_reader.hh"
|
||||
|
||||
namespace mutation_writer {
|
||||
using reader_consumer = noncopyable_function<future<> (flat_mutation_reader)>;
|
||||
|
||||
class bucket_writer {
|
||||
schema_ptr _schema;
|
||||
queue_reader_handle _handle;
|
||||
future<> _consume_fut;
|
||||
|
||||
private:
|
||||
bucket_writer(schema_ptr schema, std::pair<flat_mutation_reader, queue_reader_handle> queue_reader, reader_consumer& consumer);
|
||||
|
||||
public:
|
||||
bucket_writer(schema_ptr schema, reader_permit permit, reader_consumer& consumer);
|
||||
|
||||
future<> consume(mutation_fragment mf);
|
||||
|
||||
void consume_end_of_stream();
|
||||
|
||||
void abort(std::exception_ptr ep) noexcept;
|
||||
|
||||
future<> close() noexcept;
|
||||
};
|
||||
|
||||
template <typename Writer>
|
||||
requires MutationFragmentConsumer<Writer, future<>>
|
||||
future<> feed_writer(flat_mutation_reader&& rd, Writer&& wr) {
|
||||
@@ -36,8 +57,22 @@ future<> feed_writer(flat_mutation_reader&& rd, Writer&& wr) {
|
||||
auto f2 = rd.is_buffer_empty() ? rd.fill_buffer(db::no_timeout) : make_ready_future<>();
|
||||
return when_all_succeed(std::move(f1), std::move(f2)).discard_result();
|
||||
});
|
||||
}).finally([&wr] {
|
||||
return wr.consume_end_of_stream();
|
||||
}).then_wrapped([&wr] (future<> f) {
|
||||
if (f.failed()) {
|
||||
auto ex = f.get_exception();
|
||||
wr.abort(ex);
|
||||
return wr.close().then_wrapped([ex = std::move(ex)] (future<> f) mutable {
|
||||
if (f.failed()) {
|
||||
// The consumer is expected to fail when aborted,
|
||||
// so just ignore any exception.
|
||||
(void)f.get_exception();
|
||||
}
|
||||
return make_exception_future<>(std::move(ex));
|
||||
});
|
||||
} else {
|
||||
wr.consume_end_of_stream();
|
||||
return wr.close();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -31,33 +31,7 @@
|
||||
namespace mutation_writer {
|
||||
|
||||
class shard_based_splitting_mutation_writer {
|
||||
class shard_writer {
|
||||
queue_reader_handle _handle;
|
||||
future<> _consume_fut;
|
||||
private:
|
||||
shard_writer(schema_ptr schema, std::pair<flat_mutation_reader, queue_reader_handle> queue_reader, reader_consumer& consumer)
|
||||
: _handle(std::move(queue_reader.second))
|
||||
, _consume_fut(consumer(std::move(queue_reader.first))) {
|
||||
}
|
||||
|
||||
public:
|
||||
shard_writer(schema_ptr schema, reader_permit permit, reader_consumer& consumer)
|
||||
: shard_writer(schema, make_queue_reader(schema, std::move(permit)), consumer) {
|
||||
}
|
||||
future<> consume(mutation_fragment mf) {
|
||||
return _handle.push(std::move(mf));
|
||||
}
|
||||
future<> consume_end_of_stream() {
|
||||
// consume_end_of_stream is always called from a finally block,
|
||||
// and that's because we wait for _consume_fut to return. We
|
||||
// don't want to generate another exception here if the read was
|
||||
// aborted.
|
||||
if (!_handle.is_terminated()) {
|
||||
_handle.push_end_of_stream();
|
||||
}
|
||||
return std::move(_consume_fut);
|
||||
}
|
||||
};
|
||||
using shard_writer = bucket_writer;
|
||||
|
||||
private:
|
||||
schema_ptr _schema;
|
||||
@@ -102,12 +76,23 @@ public:
|
||||
return write_to_shard(mutation_fragment(*_schema, _permit, std::move(pe)));
|
||||
}
|
||||
|
||||
future<> consume_end_of_stream() {
|
||||
return parallel_for_each(_shards, [] (std::optional<shard_writer>& shard) {
|
||||
if (!shard) {
|
||||
return make_ready_future<>();
|
||||
void consume_end_of_stream() {
|
||||
for (auto& shard : _shards) {
|
||||
if (shard) {
|
||||
shard->consume_end_of_stream();
|
||||
}
|
||||
return shard->consume_end_of_stream();
|
||||
}
|
||||
}
|
||||
void abort(std::exception_ptr ep) {
|
||||
for (auto&& shard : _shards) {
|
||||
if (shard) {
|
||||
shard->abort(ep);
|
||||
}
|
||||
}
|
||||
}
|
||||
future<> close() noexcept {
|
||||
return parallel_for_each(_shards, [] (std::optional<shard_writer>& shard) {
|
||||
return shard ? shard->close() : make_ready_future<>();
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
@@ -109,22 +109,12 @@ small_flat_map<Key, Value, Size>::find(const key_type& k) {
|
||||
class timestamp_based_splitting_mutation_writer {
|
||||
using bucket_id = int64_t;
|
||||
|
||||
class bucket_writer {
|
||||
schema_ptr _schema;
|
||||
queue_reader_handle _handle;
|
||||
future<> _consume_fut;
|
||||
class timestamp_bucket_writer : public bucket_writer {
|
||||
bool _has_current_partition = false;
|
||||
|
||||
private:
|
||||
bucket_writer(schema_ptr schema, std::pair<flat_mutation_reader, queue_reader_handle> queue_reader, reader_consumer& consumer)
|
||||
: _schema(std::move(schema))
|
||||
, _handle(std::move(queue_reader.second))
|
||||
, _consume_fut(consumer(std::move(queue_reader.first))) {
|
||||
}
|
||||
|
||||
public:
|
||||
bucket_writer(schema_ptr schema, reader_permit permit, reader_consumer& consumer)
|
||||
: bucket_writer(schema, make_queue_reader(schema, std::move(permit)), consumer) {
|
||||
timestamp_bucket_writer(schema_ptr schema, reader_permit permit, reader_consumer& consumer)
|
||||
: bucket_writer(schema, std::move(permit), consumer) {
|
||||
}
|
||||
void set_has_current_partition() {
|
||||
_has_current_partition = true;
|
||||
@@ -135,15 +125,6 @@ class timestamp_based_splitting_mutation_writer {
|
||||
bool has_current_partition() const {
|
||||
return _has_current_partition;
|
||||
}
|
||||
future<> consume(mutation_fragment mf) {
|
||||
return _handle.push(std::move(mf));
|
||||
}
|
||||
future<> consume_end_of_stream() {
|
||||
if (!_handle.is_terminated()) {
|
||||
_handle.push_end_of_stream();
|
||||
}
|
||||
return std::move(_consume_fut);
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
@@ -152,7 +133,7 @@ private:
|
||||
classify_by_timestamp _classifier;
|
||||
reader_consumer _consumer;
|
||||
partition_start _current_partition_start;
|
||||
std::unordered_map<bucket_id, bucket_writer> _buckets;
|
||||
std::unordered_map<bucket_id, timestamp_bucket_writer> _buckets;
|
||||
std::vector<bucket_id> _buckets_used_for_current_partition;
|
||||
|
||||
private:
|
||||
@@ -183,9 +164,19 @@ public:
|
||||
future<> consume(range_tombstone&& rt);
|
||||
future<> consume(partition_end&& pe);
|
||||
|
||||
future<> consume_end_of_stream() {
|
||||
return parallel_for_each(_buckets, [] (std::pair<const bucket_id, bucket_writer>& bucket) {
|
||||
return bucket.second.consume_end_of_stream();
|
||||
void consume_end_of_stream() {
|
||||
for (auto& b : _buckets) {
|
||||
b.second.consume_end_of_stream();
|
||||
}
|
||||
}
|
||||
void abort(std::exception_ptr ep) {
|
||||
for (auto&& b : _buckets) {
|
||||
b.second.abort(ep);
|
||||
}
|
||||
}
|
||||
future<> close() noexcept {
|
||||
return parallel_for_each(_buckets, [] (std::pair<const bucket_id, timestamp_bucket_writer>& b) {
|
||||
return b.second.close();
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
@@ -542,12 +542,12 @@ partition_snapshot_ptr partition_entry::read(logalloc::region& r,
|
||||
return partition_snapshot_ptr(std::move(snp));
|
||||
}
|
||||
|
||||
std::vector<range_tombstone>
|
||||
partition_snapshot::range_tombstone_result
|
||||
partition_snapshot::range_tombstones(position_in_partition_view start, position_in_partition_view end)
|
||||
{
|
||||
partition_version* v = &*version();
|
||||
if (!v->next()) {
|
||||
return boost::copy_range<std::vector<range_tombstone>>(
|
||||
return boost::copy_range<range_tombstone_result>(
|
||||
v->partition().row_tombstones().slice(*_schema, start, end));
|
||||
}
|
||||
range_tombstone_list list(*_schema);
|
||||
@@ -557,10 +557,10 @@ partition_snapshot::range_tombstones(position_in_partition_view start, position_
|
||||
}
|
||||
v = v->next();
|
||||
}
|
||||
return boost::copy_range<std::vector<range_tombstone>>(list.slice(*_schema, start, end));
|
||||
return boost::copy_range<range_tombstone_result>(list.slice(*_schema, start, end));
|
||||
}
|
||||
|
||||
std::vector<range_tombstone>
|
||||
partition_snapshot::range_tombstone_result
|
||||
partition_snapshot::range_tombstones()
|
||||
{
|
||||
return range_tombstones(
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "utils/anchorless_list.hh"
|
||||
#include "utils/logalloc.hh"
|
||||
#include "utils/coroutine.hh"
|
||||
#include "utils/chunked_vector.hh"
|
||||
|
||||
#include <boost/intrusive/parent_from_member.hpp>
|
||||
#include <boost/intrusive/slist.hpp>
|
||||
@@ -400,10 +401,13 @@ public:
|
||||
::static_row static_row(bool digest_requested) const;
|
||||
bool static_row_continuous() const;
|
||||
mutation_partition squashed() const;
|
||||
|
||||
using range_tombstone_result = utils::chunked_vector<range_tombstone>;
|
||||
|
||||
// Returns range tombstones overlapping with [start, end)
|
||||
std::vector<range_tombstone> range_tombstones(position_in_partition_view start, position_in_partition_view end);
|
||||
range_tombstone_result range_tombstones(position_in_partition_view start, position_in_partition_view end);
|
||||
// Returns all range tombstones
|
||||
std::vector<range_tombstone> range_tombstones();
|
||||
range_tombstone_result range_tombstones();
|
||||
};
|
||||
|
||||
class partition_snapshot_ptr {
|
||||
|
||||
@@ -205,6 +205,10 @@ public:
|
||||
auto to_block = std::min(_used_memory - _blocked_bytes, n);
|
||||
_blocked_bytes += to_block;
|
||||
stop = (_limiter->update_and_check(to_block) && _stop_on_global_limit) || stop;
|
||||
if (stop && !_short_read_allowed) {
|
||||
// If we are here we stopped because of the global limit.
|
||||
throw std::runtime_error("Maximum amount of memory for building query results is exhausted, unpaged query cannot be finished");
|
||||
}
|
||||
}
|
||||
return stop;
|
||||
}
|
||||
|
||||
@@ -75,7 +75,7 @@ class reader_permit::impl : public boost::intrusive::list_base_hook<boost::intru
|
||||
sstring _op_name;
|
||||
std::string_view _op_name_view;
|
||||
reader_resources _resources;
|
||||
reader_permit::state _state = reader_permit::state::registered;
|
||||
reader_permit::state _state = reader_permit::state::active;
|
||||
|
||||
public:
|
||||
struct value_tag {};
|
||||
@@ -123,22 +123,17 @@ public:
|
||||
}
|
||||
|
||||
void on_admission() {
|
||||
_state = reader_permit::state::admitted;
|
||||
_semaphore.consume(_resources);
|
||||
_state = reader_permit::state::active;
|
||||
}
|
||||
|
||||
void consume(reader_resources res) {
|
||||
_resources += res;
|
||||
if (_state == reader_permit::state::admitted) {
|
||||
_semaphore.consume(res);
|
||||
}
|
||||
_semaphore.consume(res);
|
||||
}
|
||||
|
||||
void signal(reader_resources res) {
|
||||
_resources -= res;
|
||||
if (_state == reader_permit::state::admitted) {
|
||||
_semaphore.signal(res);
|
||||
}
|
||||
_semaphore.signal(res);
|
||||
}
|
||||
|
||||
reader_resources resources() const {
|
||||
@@ -205,14 +200,11 @@ reader_resources reader_permit::consumed_resources() const {
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, reader_permit::state s) {
|
||||
switch (s) {
|
||||
case reader_permit::state::registered:
|
||||
os << "registered";
|
||||
break;
|
||||
case reader_permit::state::waiting:
|
||||
os << "waiting";
|
||||
break;
|
||||
case reader_permit::state::admitted:
|
||||
os << "admitted";
|
||||
case reader_permit::state::active:
|
||||
os << "active";
|
||||
break;
|
||||
}
|
||||
return os;
|
||||
@@ -249,7 +241,7 @@ struct permit_group_key_hash {
|
||||
|
||||
using permit_groups = std::unordered_map<permit_group_key, permit_stats, permit_group_key_hash>;
|
||||
|
||||
static permit_stats do_dump_reader_permit_diagnostics(std::ostream& os, const permit_groups& permits, reader_permit::state state, bool sort_by_memory) {
|
||||
static permit_stats do_dump_reader_permit_diagnostics(std::ostream& os, const permit_groups& permits, reader_permit::state state) {
|
||||
struct permit_summary {
|
||||
const schema* s;
|
||||
std::string_view op_name;
|
||||
@@ -265,25 +257,17 @@ static permit_stats do_dump_reader_permit_diagnostics(std::ostream& os, const pe
|
||||
}
|
||||
}
|
||||
|
||||
std::ranges::sort(permit_summaries, [sort_by_memory] (const permit_summary& a, const permit_summary& b) {
|
||||
if (sort_by_memory) {
|
||||
return a.memory < b.memory;
|
||||
} else {
|
||||
return a.count < b.count;
|
||||
}
|
||||
std::ranges::sort(permit_summaries, [] (const permit_summary& a, const permit_summary& b) {
|
||||
return a.memory < b.memory;
|
||||
});
|
||||
|
||||
permit_stats total;
|
||||
|
||||
auto print_line = [&os, sort_by_memory] (auto col1, auto col2, auto col3) {
|
||||
if (sort_by_memory) {
|
||||
fmt::print(os, "{}\t{}\t{}\n", col2, col1, col3);
|
||||
} else {
|
||||
fmt::print(os, "{}\t{}\t{}\n", col1, col2, col3);
|
||||
}
|
||||
auto print_line = [&os] (auto col1, auto col2, auto col3) {
|
||||
fmt::print(os, "{}\t{}\t{}\n", col2, col1, col3);
|
||||
};
|
||||
|
||||
fmt::print(os, "Permits with state {}, sorted by {}\n", state, sort_by_memory ? "memory" : "count");
|
||||
fmt::print(os, "Permits with state {}\n", state);
|
||||
print_line("count", "memory", "name");
|
||||
for (const auto& summary : permit_summaries) {
|
||||
total.count += summary.count;
|
||||
@@ -309,11 +293,9 @@ static void do_dump_reader_permit_diagnostics(std::ostream& os, const reader_con
|
||||
permit_stats total;
|
||||
|
||||
fmt::print(os, "Semaphore {}: {}, dumping permit diagnostics:\n", semaphore.name(), problem);
|
||||
total += do_dump_reader_permit_diagnostics(os, permits, reader_permit::state::admitted, true);
|
||||
total += do_dump_reader_permit_diagnostics(os, permits, reader_permit::state::active);
|
||||
fmt::print(os, "\n");
|
||||
total += do_dump_reader_permit_diagnostics(os, permits, reader_permit::state::waiting, false);
|
||||
fmt::print(os, "\n");
|
||||
total += do_dump_reader_permit_diagnostics(os, permits, reader_permit::state::registered, false);
|
||||
total += do_dump_reader_permit_diagnostics(os, permits, reader_permit::state::waiting);
|
||||
fmt::print(os, "\n");
|
||||
fmt::print(os, "Total: permits: {}, memory: {}\n", total.count, utils::to_hr_size(total.memory));
|
||||
}
|
||||
@@ -374,7 +356,7 @@ reader_concurrency_semaphore::~reader_concurrency_semaphore() {
|
||||
reader_concurrency_semaphore::inactive_read_handle reader_concurrency_semaphore::register_inactive_read(std::unique_ptr<inactive_read> ir) {
|
||||
// Implies _inactive_reads.empty(), we don't queue new readers before
|
||||
// evicting all inactive reads.
|
||||
if (_wait_list.empty()) {
|
||||
if (_wait_list.empty() && _resources.memory > 0) {
|
||||
const auto [it, _] = _inactive_reads.emplace(_next_id++, std::move(ir));
|
||||
(void)_;
|
||||
++_stats.inactive_reads;
|
||||
@@ -424,13 +406,13 @@ bool reader_concurrency_semaphore::try_evict_one_inactive_read() {
|
||||
}
|
||||
|
||||
bool reader_concurrency_semaphore::has_available_units(const resources& r) const {
|
||||
return bool(_resources) && _resources >= r;
|
||||
// Special case: when there is no active reader (based on count) admit one
|
||||
// regardless of availability of memory.
|
||||
return (bool(_resources) && _resources >= r) || _resources.count == _initial_resources.count;
|
||||
}
|
||||
|
||||
bool reader_concurrency_semaphore::may_proceed(const resources& r) const {
|
||||
// Special case: when there is no active reader (based on count) admit one
|
||||
// regardless of availability of memory.
|
||||
return _wait_list.empty() && (has_available_units(r) || _resources.count == _initial_resources.count);
|
||||
return _wait_list.empty() && has_available_units(r);
|
||||
}
|
||||
|
||||
future<reader_permit::resource_units> reader_concurrency_semaphore::do_wait_admission(reader_permit permit, size_t memory,
|
||||
@@ -480,6 +462,12 @@ void reader_concurrency_semaphore::broken(std::exception_ptr ex) {
|
||||
}
|
||||
}
|
||||
|
||||
std::string reader_concurrency_semaphore::dump_diagnostics() const {
|
||||
std::ostringstream os;
|
||||
do_dump_reader_permit_diagnostics(os, *this, *_permit_list, "user request");
|
||||
return os.str();
|
||||
}
|
||||
|
||||
// A file that tracks the memory usage of buffers resulting from read
|
||||
// operations.
|
||||
class tracking_file_impl : public file_impl {
|
||||
|
||||
@@ -231,4 +231,6 @@ public:
|
||||
}
|
||||
|
||||
void broken(std::exception_ptr ex);
|
||||
|
||||
std::string dump_diagnostics() const;
|
||||
};
|
||||
|
||||
@@ -91,9 +91,8 @@ public:
|
||||
class resource_units;
|
||||
|
||||
enum class state {
|
||||
registered, // read is registered, but didn't attempt admission yet
|
||||
waiting, // waiting for admission
|
||||
admitted,
|
||||
active,
|
||||
};
|
||||
|
||||
class impl;
|
||||
|
||||
@@ -509,7 +509,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class repair_writer {
|
||||
class repair_writer : public enable_lw_shared_from_this<repair_writer> {
|
||||
schema_ptr _schema;
|
||||
reader_permit _permit;
|
||||
uint64_t _estimated_partitions;
|
||||
@@ -569,6 +569,7 @@ public:
|
||||
table& t = db.local().find_column_family(_schema->id());
|
||||
auto [queue_reader, queue_handle] = make_queue_reader(_schema, _permit);
|
||||
_mq[node_idx] = std::move(queue_handle);
|
||||
auto writer = shared_from_this();
|
||||
_writer_done[node_idx] = mutation_writer::distribute_reader_and_consume_on_shards(_schema, std::move(queue_reader),
|
||||
[&db, reason = this->_reason, estimated_partitions = this->_estimated_partitions] (flat_mutation_reader reader) {
|
||||
auto& t = db.local().find_column_family(reader.schema());
|
||||
@@ -598,13 +599,13 @@ public:
|
||||
return consumer(std::move(reader));
|
||||
});
|
||||
},
|
||||
t.stream_in_progress()).then([this, node_idx] (uint64_t partitions) {
|
||||
t.stream_in_progress()).then([node_idx, writer] (uint64_t partitions) {
|
||||
rlogger.debug("repair_writer: keyspace={}, table={}, managed to write partitions={} to sstable",
|
||||
_schema->ks_name(), _schema->cf_name(), partitions);
|
||||
}).handle_exception([this, node_idx] (std::exception_ptr ep) {
|
||||
writer->_schema->ks_name(), writer->_schema->cf_name(), partitions);
|
||||
}).handle_exception([node_idx, writer] (std::exception_ptr ep) {
|
||||
rlogger.warn("repair_writer: keyspace={}, table={}, multishard_writer failed: {}",
|
||||
_schema->ks_name(), _schema->cf_name(), ep);
|
||||
_mq[node_idx]->abort(ep);
|
||||
writer->_schema->ks_name(), writer->_schema->cf_name(), ep);
|
||||
writer->_mq[node_idx]->abort(ep);
|
||||
return make_exception_future<>(std::move(ep));
|
||||
});
|
||||
}
|
||||
@@ -718,7 +719,7 @@ private:
|
||||
size_t _nr_peer_nodes= 1;
|
||||
repair_stats _stats;
|
||||
repair_reader _repair_reader;
|
||||
repair_writer _repair_writer;
|
||||
lw_shared_ptr<repair_writer> _repair_writer;
|
||||
// Contains rows read from disk
|
||||
std::list<repair_row> _row_buf;
|
||||
// Contains rows we are working on to sync between peers
|
||||
@@ -822,7 +823,7 @@ public:
|
||||
_seed,
|
||||
repair_reader::is_local_reader(_repair_master || _same_sharding_config)
|
||||
)
|
||||
, _repair_writer(_schema, _permit, _estimated_partitions, _nr_peer_nodes, _reason)
|
||||
, _repair_writer(make_lw_shared<repair_writer>(_schema, _permit, _estimated_partitions, _nr_peer_nodes, _reason))
|
||||
, _sink_source_for_get_full_row_hashes(_repair_meta_id, _nr_peer_nodes,
|
||||
[&ms] (uint32_t repair_meta_id, netw::messaging_service::msg_addr addr) {
|
||||
return ms.local().make_sink_and_source_for_repair_get_full_row_hashes_with_rpc_stream(repair_meta_id, addr);
|
||||
@@ -855,7 +856,7 @@ public:
|
||||
auto f2 = _sink_source_for_get_row_diff.close();
|
||||
auto f3 = _sink_source_for_put_row_diff.close();
|
||||
return when_all_succeed(std::move(gate_future), std::move(f1), std::move(f2), std::move(f3)).discard_result().finally([this] {
|
||||
return _repair_writer.wait_for_writer_done();
|
||||
return _repair_writer->wait_for_writer_done();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1340,8 +1341,8 @@ private:
|
||||
|
||||
future<> do_apply_rows(std::list<repair_row>&& row_diff, unsigned node_idx, update_working_row_buf update_buf) {
|
||||
return do_with(std::move(row_diff), [this, node_idx, update_buf] (std::list<repair_row>& row_diff) {
|
||||
return with_semaphore(_repair_writer.sem(), 1, [this, node_idx, update_buf, &row_diff] {
|
||||
_repair_writer.create_writer(_db, node_idx);
|
||||
return with_semaphore(_repair_writer->sem(), 1, [this, node_idx, update_buf, &row_diff] {
|
||||
_repair_writer->create_writer(_db, node_idx);
|
||||
return repeat([this, node_idx, update_buf, &row_diff] () mutable {
|
||||
if (row_diff.empty()) {
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
@@ -1355,7 +1356,7 @@ private:
|
||||
// to_repair_rows_list above where the repair_row is created.
|
||||
mutation_fragment mf = std::move(r.get_mutation_fragment());
|
||||
auto dk_with_hash = r.get_dk_with_hash();
|
||||
return _repair_writer.do_write(node_idx, std::move(dk_with_hash), std::move(mf)).then([&row_diff] {
|
||||
return _repair_writer->do_write(node_idx, std::move(dk_with_hash), std::move(mf)).then([&row_diff] {
|
||||
row_diff.pop_front();
|
||||
return make_ready_future<stop_iteration>(stop_iteration::no);
|
||||
});
|
||||
|
||||
@@ -1263,7 +1263,9 @@ flat_mutation_reader cache_entry::read(row_cache& rc, read_context& reader, row_
|
||||
// Assumes reader is in the corresponding partition
|
||||
flat_mutation_reader cache_entry::do_read(row_cache& rc, read_context& reader) {
|
||||
auto snp = _pe.read(rc._tracker.region(), rc._tracker.cleaner(), _schema, &rc._tracker, reader.phase());
|
||||
auto ckr = query::clustering_key_filter_ranges::get_ranges(*_schema, reader.slice(), _key.key());
|
||||
auto ckr = with_linearized_managed_bytes([&] {
|
||||
return query::clustering_key_filter_ranges::get_ranges(*_schema, reader.slice(), _key.key());
|
||||
});
|
||||
auto r = make_cache_flat_mutation_reader(_schema, _key, std::move(ckr), rc, reader.shared_from_this(), std::move(snp));
|
||||
r.upgrade_schema(rc.schema());
|
||||
r.upgrade_schema(reader.schema());
|
||||
|
||||
@@ -456,6 +456,9 @@ schema::schema(const schema& o)
|
||||
rebuild();
|
||||
if (o.is_view()) {
|
||||
_view_info = std::make_unique<::view_info>(*this, o.view_info()->raw());
|
||||
if (o.view_info()->base_info()) {
|
||||
_view_info->set_base_info(o.view_info()->base_info());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -859,7 +862,7 @@ std::ostream& schema::describe(database& db, std::ostream& os) const {
|
||||
os << "}";
|
||||
os << "\n AND comment = '" << comment()<< "'";
|
||||
os << "\n AND compaction = {'class': '" << sstables::compaction_strategy::name(compaction_strategy()) << "'";
|
||||
map_as_cql_param(os, compaction_strategy_options()) << "}";
|
||||
map_as_cql_param(os, compaction_strategy_options(), false) << "}";
|
||||
os << "\n AND compression = {";
|
||||
map_as_cql_param(os, get_compressor_params().get_options());
|
||||
os << "}";
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "schema_registry.hh"
|
||||
#include "log.hh"
|
||||
#include "db/schema_tables.hh"
|
||||
#include "view_info.hh"
|
||||
|
||||
static logging::logger slogger("schema_registry");
|
||||
|
||||
@@ -274,22 +275,43 @@ global_schema_ptr::global_schema_ptr(global_schema_ptr&& o) noexcept {
|
||||
assert(o._cpu_of_origin == current);
|
||||
_ptr = std::move(o._ptr);
|
||||
_cpu_of_origin = current;
|
||||
_base_schema = std::move(o._base_schema);
|
||||
}
|
||||
|
||||
schema_ptr global_schema_ptr::get() const {
|
||||
if (this_shard_id() == _cpu_of_origin) {
|
||||
return _ptr;
|
||||
} else {
|
||||
// 'e' points to a foreign entry, but we know it won't be evicted
|
||||
// because _ptr is preventing this.
|
||||
const schema_registry_entry& e = *_ptr->registry_entry();
|
||||
schema_ptr s = local_schema_registry().get_or_null(e.version());
|
||||
if (!s) {
|
||||
s = local_schema_registry().get_or_load(e.version(), [&e](table_schema_version) {
|
||||
return e.frozen();
|
||||
});
|
||||
auto registered_schema = [](const schema_registry_entry& e) {
|
||||
schema_ptr ret = local_schema_registry().get_or_null(e.version());
|
||||
if (!ret) {
|
||||
ret = local_schema_registry().get_or_load(e.version(), [&e](table_schema_version) {
|
||||
return e.frozen();
|
||||
});
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
schema_ptr registered_bs;
|
||||
// the following code contains registry entry dereference of a foreign shard
|
||||
// however, it is guarantied to succeed since we made sure in the constructor
|
||||
// that _bs_schema and _ptr will have a registry on the foreign shard where this
|
||||
// object originated so as long as this object lives the registry entries lives too
|
||||
// and it is safe to reference them on foreign shards.
|
||||
if (_base_schema) {
|
||||
registered_bs = registered_schema(*_base_schema->registry_entry());
|
||||
if (_base_schema->registry_entry()->is_synced()) {
|
||||
registered_bs->registry_entry()->mark_synced();
|
||||
}
|
||||
}
|
||||
if (e.is_synced()) {
|
||||
schema_ptr s = registered_schema(*_ptr->registry_entry());
|
||||
if (s->is_view()) {
|
||||
if (!s->view_info()->base_info()) {
|
||||
// we know that registered_bs is valid here because we make sure of it in the constructors.
|
||||
s->view_info()->set_base_info(s->view_info()->make_base_dependent_view_info(*registered_bs));
|
||||
}
|
||||
}
|
||||
if (_ptr->registry_entry()->is_synced()) {
|
||||
s->registry_entry()->mark_synced();
|
||||
}
|
||||
return s;
|
||||
@@ -297,16 +319,33 @@ schema_ptr global_schema_ptr::get() const {
|
||||
}
|
||||
|
||||
global_schema_ptr::global_schema_ptr(const schema_ptr& ptr)
|
||||
: _ptr([&ptr]() {
|
||||
// _ptr must always have an associated registry entry,
|
||||
// if ptr doesn't, we need to load it into the registry.
|
||||
schema_registry_entry* e = ptr->registry_entry();
|
||||
: _cpu_of_origin(this_shard_id()) {
|
||||
// _ptr must always have an associated registry entry,
|
||||
// if ptr doesn't, we need to load it into the registry.
|
||||
auto ensure_registry_entry = [] (const schema_ptr& s) {
|
||||
schema_registry_entry* e = s->registry_entry();
|
||||
if (e) {
|
||||
return ptr;
|
||||
}
|
||||
return local_schema_registry().get_or_load(ptr->version(), [&ptr] (table_schema_version) {
|
||||
return frozen_schema(ptr);
|
||||
return s;
|
||||
} else {
|
||||
return local_schema_registry().get_or_load(s->version(), [&s] (table_schema_version) {
|
||||
return frozen_schema(s);
|
||||
});
|
||||
}())
|
||||
, _cpu_of_origin(this_shard_id())
|
||||
{ }
|
||||
}
|
||||
};
|
||||
|
||||
schema_ptr s = ensure_registry_entry(ptr);
|
||||
if (s->is_view()) {
|
||||
if (s->view_info()->base_info()) {
|
||||
_base_schema = ensure_registry_entry(s->view_info()->base_info()->base_schema());
|
||||
} else if (ptr->view_info()->base_info()) {
|
||||
_base_schema = ensure_registry_entry(ptr->view_info()->base_info()->base_schema());
|
||||
} else {
|
||||
on_internal_error(slogger, format("Tried to build a global schema for view {}.{} with an uninitialized base info", s->ks_name(), s->cf_name()));
|
||||
}
|
||||
|
||||
if (!s->view_info()->base_info() || !s->view_info()->base_info()->base_schema()->registry_entry()) {
|
||||
s->view_info()->set_base_info(s->view_info()->make_base_dependent_view_info(*_base_schema));
|
||||
}
|
||||
}
|
||||
_ptr = s;
|
||||
}
|
||||
|
||||
@@ -165,6 +165,7 @@ schema_registry& local_schema_registry();
|
||||
// chain will last.
|
||||
class global_schema_ptr {
|
||||
schema_ptr _ptr;
|
||||
schema_ptr _base_schema;
|
||||
unsigned _cpu_of_origin;
|
||||
public:
|
||||
// Note: the schema_ptr must come from the current shard and can't be nullptr.
|
||||
|
||||
2
seastar
2
seastar
Submodule seastar updated: 6973080cd1...b70b444924
@@ -53,6 +53,7 @@
|
||||
#include "database.hh"
|
||||
#include "db/schema_tables.hh"
|
||||
#include "types/user.hh"
|
||||
#include "db/schema_tables.hh"
|
||||
|
||||
namespace service {
|
||||
|
||||
@@ -1096,8 +1097,19 @@ future<schema_ptr> get_schema_definition(table_schema_version v, netw::messaging
|
||||
// referenced by the incoming request.
|
||||
// That means the column mapping for the schema should always be inserted
|
||||
// with TTL (refresh TTL in case column mapping already existed prior to that).
|
||||
return db::schema_tables::store_column_mapping(proxy, s.unfreeze(db::schema_ctxt(proxy)), true).then([s] {
|
||||
return s;
|
||||
auto us = s.unfreeze(db::schema_ctxt(proxy));
|
||||
// if this is a view - we might need to fix it's schema before registering it.
|
||||
if (us->is_view()) {
|
||||
auto& db = proxy.local().local_db();
|
||||
schema_ptr base_schema = db.find_schema(us->view_info()->base_id());
|
||||
auto fixed_view = db::schema_tables::maybe_fix_legacy_secondary_index_mv_schema(db, view_ptr(us), base_schema,
|
||||
db::schema_tables::preserve_version::yes);
|
||||
if (fixed_view) {
|
||||
us = fixed_view;
|
||||
}
|
||||
}
|
||||
return db::schema_tables::store_column_mapping(proxy, us, true).then([us] {
|
||||
return frozen_schema{us};
|
||||
});
|
||||
});
|
||||
}).then([] (schema_ptr s) {
|
||||
@@ -1105,7 +1117,7 @@ future<schema_ptr> get_schema_definition(table_schema_version v, netw::messaging
|
||||
// table.
|
||||
if (s->is_view()) {
|
||||
if (!s->view_info()->base_info()) {
|
||||
auto& db = service::get_local_storage_proxy().get_db().local();
|
||||
auto& db = service::get_local_storage_proxy().local_db();
|
||||
// This line might throw a no_such_column_family
|
||||
// It should be fine since if we tried to register a view for which
|
||||
// we don't know the base table, our registry is broken.
|
||||
|
||||
@@ -3624,6 +3624,11 @@ protected:
|
||||
|
||||
public:
|
||||
virtual future<foreign_ptr<lw_shared_ptr<query::result>>> execute(storage_proxy::clock_type::time_point timeout) {
|
||||
if (_targets.empty()) {
|
||||
// We may have no targets to read from if a DC with zero replication is queried with LOCACL_QUORUM.
|
||||
// Return an empty result in this case
|
||||
return make_ready_future<foreign_ptr<lw_shared_ptr<query::result>>>(make_foreign(make_lw_shared(query::result())));
|
||||
}
|
||||
digest_resolver_ptr digest_resolver = ::make_shared<digest_read_resolver>(_schema, _cl, _block_for,
|
||||
db::is_datacenter_local(_cl) ? db::count_local_endpoints(_targets): _targets.size(), timeout);
|
||||
auto exec = shared_from_this();
|
||||
@@ -4933,10 +4938,12 @@ void storage_proxy::init_messaging_service() {
|
||||
tracing::trace(trace_state_ptr, "read_data: message received from /{}", src_addr.addr);
|
||||
}
|
||||
auto da = oda.value_or(query::digest_algorithm::MD5);
|
||||
auto sp = get_local_shared_storage_proxy();
|
||||
if (!cmd.max_result_size) {
|
||||
cmd.max_result_size.emplace(cinfo.retrieve_auxiliary<uint64_t>("max_result_size"));
|
||||
auto& cfg = sp->_db.local().get_config();
|
||||
cmd.max_result_size.emplace(cfg.max_memory_for_unlimited_query_soft_limit(), cfg.max_memory_for_unlimited_query_hard_limit());
|
||||
}
|
||||
return do_with(std::move(pr), get_local_shared_storage_proxy(), std::move(trace_state_ptr), [&cinfo, cmd = make_lw_shared<query::read_command>(std::move(cmd)), src_addr = std::move(src_addr), da, t] (::compat::wrapping_partition_range& pr, shared_ptr<storage_proxy>& p, tracing::trace_state_ptr& trace_state_ptr) mutable {
|
||||
return do_with(std::move(pr), std::move(sp), std::move(trace_state_ptr), [&cinfo, cmd = make_lw_shared<query::read_command>(std::move(cmd)), src_addr = std::move(src_addr), da, t] (::compat::wrapping_partition_range& pr, shared_ptr<storage_proxy>& p, tracing::trace_state_ptr& trace_state_ptr) mutable {
|
||||
p->get_stats().replica_data_reads++;
|
||||
auto src_ip = src_addr.addr;
|
||||
return get_schema_for_read(cmd->schema_version, std::move(src_addr), p->_messaging).then([cmd, da, &pr, &p, &trace_state_ptr, t] (schema_ptr s) {
|
||||
|
||||
@@ -446,6 +446,12 @@ public:
|
||||
distributed<database>& get_db() {
|
||||
return _db;
|
||||
}
|
||||
const database& local_db() const noexcept {
|
||||
return _db.local();
|
||||
}
|
||||
database& local_db() noexcept {
|
||||
return _db.local();
|
||||
}
|
||||
|
||||
void set_cdc_service(cdc::cdc_service* cdc) {
|
||||
_cdc = cdc;
|
||||
|
||||
@@ -298,7 +298,7 @@ void storage_service::prepare_to_join(
|
||||
_token_metadata.update_normal_tokens(my_tokens, get_broadcast_address());
|
||||
|
||||
_cdc_streams_ts = db::system_keyspace::get_saved_cdc_streams_timestamp().get0();
|
||||
if (!_cdc_streams_ts && db().local().get_config().check_experimental(db::experimental_features_t::CDC)) {
|
||||
if (!_cdc_streams_ts) {
|
||||
// We could not have completed joining if we didn't generate and persist a CDC streams timestamp,
|
||||
// unless we are restarting after upgrading from non-CDC supported version.
|
||||
// In that case we won't begin a CDC generation: it should be done by one of the nodes
|
||||
@@ -550,7 +550,7 @@ void storage_service::join_token_ring(int delay) {
|
||||
assert(should_bootstrap() || db().local().is_replacing() || !_cdc_streams_ts);
|
||||
}
|
||||
|
||||
if (!_cdc_streams_ts && db().local().get_config().check_experimental(db::experimental_features_t::CDC)) {
|
||||
if (!_cdc_streams_ts) {
|
||||
// If we didn't choose a CDC streams timestamp at this point, then either
|
||||
// 1. we're replacing a node which didn't gossip a CDC streams timestamp for whatever reason,
|
||||
// 2. we've already bootstrapped, but are upgrading from a non-CDC version,
|
||||
@@ -570,10 +570,15 @@ void storage_service::join_token_ring(int delay) {
|
||||
if (!db().local().is_replacing()
|
||||
&& (!db::system_keyspace::bootstrap_complete()
|
||||
|| cdc::should_propose_first_generation(get_broadcast_address(), _gossiper))) {
|
||||
|
||||
_cdc_streams_ts = cdc::make_new_cdc_generation(db().local().get_config(),
|
||||
_bootstrap_tokens, _token_metadata, _gossiper,
|
||||
_sys_dist_ks.local(), get_ring_delay(), _for_testing);
|
||||
try {
|
||||
_cdc_streams_ts = cdc::make_new_cdc_generation(db().local().get_config(),
|
||||
_bootstrap_tokens, _token_metadata, _gossiper,
|
||||
_sys_dist_ks.local(), get_ring_delay(), _for_testing);
|
||||
} catch (...) {
|
||||
cdc_log.warn(
|
||||
"Could not create a new CDC generation: {}. This may make it impossible to use CDC. Use nodetool checkAndRepairCdcStreams to fix CDC generation",
|
||||
std::current_exception());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -893,24 +898,18 @@ void storage_service::bootstrap() {
|
||||
// It doesn't hurt: other nodes will (potentially) just do more generation switches.
|
||||
// We do this because with this new attempt at bootstrapping we picked a different set of tokens.
|
||||
|
||||
if (db().local().get_config().check_experimental(db::experimental_features_t::CDC)) {
|
||||
// Update pending ranges now, so we correctly count ourselves as a pending replica
|
||||
// when inserting the new CDC generation.
|
||||
_token_metadata.add_bootstrap_tokens(_bootstrap_tokens, get_broadcast_address());
|
||||
update_pending_ranges().get();
|
||||
// Update pending ranges now, so we correctly count ourselves as a pending replica
|
||||
// when inserting the new CDC generation.
|
||||
_token_metadata.add_bootstrap_tokens(_bootstrap_tokens, get_broadcast_address());
|
||||
update_pending_ranges().get();
|
||||
|
||||
// After we pick a generation timestamp, we start gossiping it, and we stick with it.
|
||||
// We don't do any other generation switches (unless we crash before complecting bootstrap).
|
||||
assert(!_cdc_streams_ts);
|
||||
// After we pick a generation timestamp, we start gossiping it, and we stick with it.
|
||||
// We don't do any other generation switches (unless we crash before complecting bootstrap).
|
||||
assert(!_cdc_streams_ts);
|
||||
|
||||
_cdc_streams_ts = cdc::make_new_cdc_generation(db().local().get_config(),
|
||||
_bootstrap_tokens, _token_metadata, _gossiper,
|
||||
_sys_dist_ks.local(), get_ring_delay(), _for_testing);
|
||||
} else {
|
||||
// We should not be able to join the cluster if other nodes support CDC but we don't.
|
||||
// The check should have been made somewhere in prepare_to_join (`check_knows_remote_features`).
|
||||
assert(!_feature_service.cluster_supports_cdc());
|
||||
}
|
||||
_cdc_streams_ts = cdc::make_new_cdc_generation(db().local().get_config(),
|
||||
_bootstrap_tokens, _token_metadata, _gossiper,
|
||||
_sys_dist_ks.local(), get_ring_delay(), _for_testing);
|
||||
|
||||
_gossiper.add_local_application_state({
|
||||
// Order is important: both the CDC streams timestamp and tokens must be known when a node handles our status.
|
||||
@@ -2036,9 +2035,8 @@ future<> storage_service::start_gossiping(bind_messaging_port do_bind) {
|
||||
return seastar::async([&ss, do_bind] {
|
||||
if (!ss._initialized) {
|
||||
slogger.warn("Starting gossip by operator request");
|
||||
bool cdc_enabled = ss.db().local().get_config().check_experimental(db::experimental_features_t::CDC);
|
||||
ss.set_gossip_tokens(db::system_keyspace::get_local_tokens().get0(),
|
||||
cdc_enabled ? std::make_optional(cdc::get_local_streams_timestamp().get0()) : std::nullopt);
|
||||
std::make_optional(cdc::get_local_streams_timestamp().get0()));
|
||||
ss._gossiper.force_newer_generation();
|
||||
ss._gossiper.start_gossiping(utils::get_generation_number(), gms::bind_messaging_port(bool(do_bind))).then([&ss] {
|
||||
ss._initialized = true;
|
||||
@@ -2338,7 +2336,7 @@ future<> storage_service::rebuild(sstring source_dc) {
|
||||
slogger.info("Streaming for rebuild successful");
|
||||
}).handle_exception([] (auto ep) {
|
||||
// This is used exclusively through JMX, so log the full trace but only throw a simple RTE
|
||||
slogger.warn("Error while rebuilding node: {}", std::current_exception());
|
||||
slogger.warn("Error while rebuilding node: {}", ep);
|
||||
return make_exception_future<>(std::move(ep));
|
||||
});
|
||||
});
|
||||
|
||||
@@ -212,16 +212,18 @@ public:
|
||||
};
|
||||
|
||||
struct compaction_writer {
|
||||
shared_sstable sst;
|
||||
// We use a ptr for pointer stability and so that it can be null
|
||||
// when using a noop monitor.
|
||||
sstable_writer writer;
|
||||
// The order in here is important. A monitor must be destroyed before the writer it is monitoring since it has a
|
||||
// periodic timer that checks the writer.
|
||||
// The writer must be destroyed before the shared_sstable since the it may depend on the sstable
|
||||
// (as in the mx::writer over compressed_file_data_sink_impl case that depends on sstables::compression).
|
||||
std::unique_ptr<compaction_write_monitor> monitor;
|
||||
shared_sstable sst;
|
||||
|
||||
compaction_writer(std::unique_ptr<compaction_write_monitor> monitor, sstable_writer writer, shared_sstable sst)
|
||||
: writer(std::move(writer)), monitor(std::move(monitor)), sst(std::move(sst)) {}
|
||||
: sst(std::move(sst)), writer(std::move(writer)), monitor(std::move(monitor)) {}
|
||||
compaction_writer(sstable_writer writer, shared_sstable sst)
|
||||
: compaction_writer(nullptr, std::move(writer), std::move(sst)) {}
|
||||
};
|
||||
@@ -609,10 +611,12 @@ private:
|
||||
std::move(gc_consumer));
|
||||
|
||||
return seastar::async([cfc = std::move(cfc), reader = std::move(reader), this] () mutable {
|
||||
reader.consume_in_thread(std::move(cfc), make_partition_filter(), db::no_timeout);
|
||||
reader.consume_in_thread(std::move(cfc), db::no_timeout);
|
||||
});
|
||||
});
|
||||
return consumer(make_sstable_reader());
|
||||
// producer will filter out a partition before it reaches the consumer(s)
|
||||
auto producer = make_filtering_reader(make_sstable_reader(), make_partition_filter());
|
||||
return consumer(std::move(producer));
|
||||
}
|
||||
|
||||
virtual reader_consumer make_interposer_consumer(reader_consumer end_consumer) {
|
||||
|
||||
@@ -311,6 +311,7 @@ future<> compaction_manager::run_custom_job(column_family* cf, sstring name, non
|
||||
cmlog.info("{} was abruptly stopped, reason: {}", name, e.what());
|
||||
} catch (...) {
|
||||
cmlog.error("{} failed: {}", name, std::current_exception());
|
||||
throw;
|
||||
}
|
||||
});
|
||||
return task->compaction_done.get_future().then([task] {});
|
||||
@@ -629,10 +630,11 @@ future<> compaction_manager::rewrite_sstables(column_family* cf, sstables::compa
|
||||
_tasks.push_back(task);
|
||||
|
||||
auto sstables = std::make_unique<std::vector<sstables::shared_sstable>>(get_func(*cf));
|
||||
auto compacting = make_lw_shared<compacting_sstable_registration>(this, *sstables);
|
||||
auto sstables_ptr = sstables.get();
|
||||
_stats.pending_tasks += sstables->size();
|
||||
|
||||
task->compaction_done = do_until([sstables_ptr] { return sstables_ptr->empty(); }, [this, task, options, sstables_ptr] () mutable {
|
||||
task->compaction_done = do_until([sstables_ptr] { return sstables_ptr->empty(); }, [this, task, options, sstables_ptr, compacting] () mutable {
|
||||
|
||||
// FIXME: lock cf here
|
||||
if (!can_proceed(task)) {
|
||||
@@ -642,7 +644,7 @@ future<> compaction_manager::rewrite_sstables(column_family* cf, sstables::compa
|
||||
auto sst = sstables_ptr->back();
|
||||
sstables_ptr->pop_back();
|
||||
|
||||
return repeat([this, task, options, sst = std::move(sst)] () mutable {
|
||||
return repeat([this, task, options, sst = std::move(sst), compacting] () mutable {
|
||||
column_family& cf = *task->compacting_cf;
|
||||
auto sstable_level = sst->get_sstable_level();
|
||||
auto run_identifier = sst->run_identifier();
|
||||
@@ -650,21 +652,22 @@ future<> compaction_manager::rewrite_sstables(column_family* cf, sstables::compa
|
||||
auto descriptor = sstables::compaction_descriptor({ sst }, cf.get_sstable_set(), service::get_local_compaction_priority(),
|
||||
sstable_level, sstables::compaction_descriptor::default_max_sstable_bytes, run_identifier, options);
|
||||
|
||||
auto compacting = make_lw_shared<compacting_sstable_registration>(this, descriptor.sstables);
|
||||
// Releases reference to cleaned sstable such that respective used disk space can be freed.
|
||||
descriptor.release_exhausted = [compacting] (const std::vector<sstables::shared_sstable>& exhausted_sstables) {
|
||||
compacting->release_compacting(exhausted_sstables);
|
||||
};
|
||||
|
||||
_stats.pending_tasks--;
|
||||
_stats.active_tasks++;
|
||||
task->compaction_running = true;
|
||||
compaction_backlog_tracker user_initiated(std::make_unique<user_initiated_backlog_tracker>(_compaction_controller.backlog_of_shares(200), _available_memory));
|
||||
return do_with(std::move(user_initiated), [this, &cf, descriptor = std::move(descriptor)] (compaction_backlog_tracker& bt) mutable {
|
||||
return with_scheduling_group(_scheduling_group, [this, &cf, descriptor = std::move(descriptor)] () mutable {
|
||||
return cf.run_compaction(std::move(descriptor));
|
||||
return with_semaphore(_rewrite_sstables_sem, 1, [this, task, &cf, descriptor = std::move(descriptor)] () mutable {
|
||||
_stats.pending_tasks--;
|
||||
_stats.active_tasks++;
|
||||
task->compaction_running = true;
|
||||
compaction_backlog_tracker user_initiated(std::make_unique<user_initiated_backlog_tracker>(_compaction_controller.backlog_of_shares(200), _available_memory));
|
||||
return do_with(std::move(user_initiated), [this, &cf, descriptor = std::move(descriptor)] (compaction_backlog_tracker& bt) mutable {
|
||||
return with_scheduling_group(_scheduling_group, [this, &cf, descriptor = std::move(descriptor)]() mutable {
|
||||
return cf.run_compaction(std::move(descriptor));
|
||||
});
|
||||
});
|
||||
}).then_wrapped([this, task, compacting = std::move(compacting)] (future<> f) mutable {
|
||||
}).then_wrapped([this, task, compacting] (future<> f) mutable {
|
||||
task->compaction_running = false;
|
||||
_stats.active_tasks--;
|
||||
if (!can_proceed(task)) {
|
||||
|
||||
@@ -111,6 +111,7 @@ private:
|
||||
std::unordered_map<column_family*, rwlock> _compaction_locks;
|
||||
|
||||
semaphore _custom_job_sem{1};
|
||||
seastar::named_semaphore _rewrite_sstables_sem = {1, named_semaphore_exception_factory{"rewrite sstables"}};
|
||||
|
||||
std::function<void()> compaction_submission_callback();
|
||||
// all registered column families are submitted for compaction at a constant interval.
|
||||
|
||||
@@ -315,8 +315,8 @@ void sstable_writer_k_l::write_collection(file_writer& out, const composite& clu
|
||||
void sstable_writer_k_l::write_clustered_row(file_writer& out, const schema& schema, const clustering_row& clustered_row) {
|
||||
auto clustering_key = composite::from_clustering_element(schema, clustered_row.key());
|
||||
|
||||
maybe_write_row_marker(out, schema, clustered_row.marker(), clustering_key);
|
||||
maybe_write_row_tombstone(out, clustering_key, clustered_row);
|
||||
maybe_write_row_marker(out, schema, clustered_row.marker(), clustering_key);
|
||||
|
||||
_collector.update_min_max_components(clustered_row.key());
|
||||
|
||||
|
||||
@@ -178,7 +178,7 @@ leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input
|
||||
|
||||
unsigned max_filled_level = 0;
|
||||
|
||||
size_t offstrategy_threshold = std::max(schema->min_compaction_threshold(), 4);
|
||||
size_t offstrategy_threshold = (mode == reshape_mode::strict) ? std::max(schema->min_compaction_threshold(), 4) : std::max(schema->max_compaction_threshold(), 32);
|
||||
size_t max_sstables = std::max(schema->max_compaction_threshold(), int(offstrategy_threshold));
|
||||
auto tolerance = [mode] (unsigned level) -> unsigned {
|
||||
if (mode == reshape_mode::strict) {
|
||||
|
||||
@@ -378,6 +378,7 @@ private:
|
||||
_fwd_end = _fwd ? position_in_partition::before_all_clustered_rows() : position_in_partition::after_all_clustered_rows();
|
||||
_out_of_range = false;
|
||||
_range_tombstones.reset();
|
||||
_ready = {};
|
||||
_first_row_encountered = false;
|
||||
}
|
||||
public:
|
||||
@@ -1144,7 +1145,11 @@ public:
|
||||
setup_for_partition(pk);
|
||||
auto dk = dht::decorate_key(*_schema, pk);
|
||||
_reader->on_next_partition(std::move(dk), tombstone(deltime));
|
||||
return proceed::yes;
|
||||
// Only partition start will be consumed if processing a large run of partition tombstones,
|
||||
// so let's stop the consumer if buffer is full.
|
||||
// Otherwise, partition tombstones will keep accumulating in memory till other fragment type
|
||||
// is found which can stop the consumer (perhaps there's none if sstable is full of tombstones).
|
||||
return proceed(!_reader->is_buffer_full());
|
||||
}
|
||||
|
||||
virtual consumer_m::row_processing_result consume_row_start(const std::vector<temporary_buffer<char>>& ecp) override {
|
||||
|
||||
@@ -162,7 +162,7 @@ time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> i
|
||||
for (auto& pair : all_buckets.first) {
|
||||
auto ssts = std::move(pair.second);
|
||||
if (ssts.size() > offstrategy_threshold) {
|
||||
ssts.resize(std::min(multi_window.size(), max_sstables));
|
||||
ssts.resize(std::min(ssts.size(), max_sstables));
|
||||
compaction_descriptor desc(std::move(ssts), std::optional<sstables::sstable_set>(), iop);
|
||||
desc.options = compaction_options::make_reshape();
|
||||
return desc;
|
||||
|
||||
@@ -101,7 +101,8 @@ class time_window_compaction_strategy : public compaction_strategy_impl {
|
||||
time_window_compaction_strategy_options _options;
|
||||
int64_t _estimated_remaining_tasks = 0;
|
||||
db_clock::time_point _last_expired_check;
|
||||
timestamp_type _highest_window_seen;
|
||||
// As timestamp_type is an int64_t, a primitive type, it must be initialized here.
|
||||
timestamp_type _highest_window_seen = 0;
|
||||
// Keep track of all recent active windows that still need to be compacted into a single SSTable
|
||||
std::unordered_set<timestamp_type> _recent_active_windows;
|
||||
size_tiered_compaction_strategy_options _stcs_options;
|
||||
|
||||
@@ -403,7 +403,7 @@ future<prepare_message> stream_session::prepare(std::vector<stream_request> requ
|
||||
try {
|
||||
db.find_column_family(ks, cf);
|
||||
} catch (no_such_column_family&) {
|
||||
auto err = format("[Stream #{{}}] prepare requested ks={{}} cf={{}} does not exist", ks, cf);
|
||||
auto err = format("[Stream #{{}}] prepare requested ks={{}} cf={{}} does not exist", plan_id, ks, cf);
|
||||
sslog.warn(err.c_str());
|
||||
throw std::runtime_error(err);
|
||||
}
|
||||
|
||||
9
table.cc
9
table.cc
@@ -832,7 +832,7 @@ table::stop() {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return _async_gate.close().then([this] {
|
||||
return when_all(await_pending_writes(), await_pending_reads(), await_pending_streams()).discard_result().finally([this] {
|
||||
return await_pending_ops().finally([this] {
|
||||
return _memtables->request_flush().finally([this] {
|
||||
return _compaction_manager.remove(this).then([this] {
|
||||
// Nest, instead of using when_all, so we don't lose any exceptions.
|
||||
@@ -1532,7 +1532,8 @@ future<std::unordered_map<sstring, table::snapshot_details>> table::get_snapshot
|
||||
}
|
||||
|
||||
future<> table::flush() {
|
||||
return _memtables->request_flush();
|
||||
auto op = _pending_flushes_phaser.start();
|
||||
return _memtables->request_flush().then([op = std::move(op)] {});
|
||||
}
|
||||
|
||||
// FIXME: We can do much better than this in terms of cache management. Right
|
||||
@@ -1550,6 +1551,10 @@ future<> table::flush_streaming_mutations(utils::UUID plan_id, dht::partition_ra
|
||||
});
|
||||
}
|
||||
|
||||
bool table::can_flush() const {
|
||||
return _memtables->can_flush();
|
||||
}
|
||||
|
||||
future<> table::clear() {
|
||||
if (_commitlog) {
|
||||
_commitlog->discard_completed_segments(_schema->id());
|
||||
|
||||
@@ -80,7 +80,7 @@ def dynamodb(request):
|
||||
verify = not request.config.getoption('https')
|
||||
return boto3.resource('dynamodb', endpoint_url=local_url, verify=verify,
|
||||
region_name='us-east-1', aws_access_key_id='alternator', aws_secret_access_key='secret_pass',
|
||||
config=botocore.client.Config(retries={"max_attempts": 3}))
|
||||
config=botocore.client.Config(retries={"max_attempts": 0}, read_timeout=300))
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def dynamodbstreams(request):
|
||||
|
||||
@@ -86,7 +86,7 @@ ln -s "$SCYLLA" "$SCYLLA_LINK"
|
||||
--alternator-write-isolation=always_use_lwt \
|
||||
--alternator-streams-time-window-s=0 \
|
||||
--developer-mode=1 \
|
||||
--experimental-features=cdc \
|
||||
--experimental-features=alternator-streams \
|
||||
--ring-delay-ms 0 --collectd 0 \
|
||||
--smp 2 -m 1G \
|
||||
--overprovisioned --unsafe-bypass-fsync 1 \
|
||||
|
||||
@@ -136,7 +136,7 @@ def test_update_condition_eq_different(test_table_s):
|
||||
ConditionExpression='a = :val2',
|
||||
ExpressionAttributeValues={':val1': val1, ':val2': val2})
|
||||
|
||||
# Also check an actual case of same time, but inequality.
|
||||
# Also check an actual case of same type, but inequality.
|
||||
def test_update_condition_eq_unequal(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
@@ -146,6 +146,13 @@ def test_update_condition_eq_unequal(test_table_s):
|
||||
UpdateExpression='SET a = :val1',
|
||||
ConditionExpression='a = :oldval',
|
||||
ExpressionAttributeValues={':val1': 3, ':oldval': 2})
|
||||
# If the attribute being compared doesn't exist, it's considered a failed
|
||||
# condition, not an error:
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET a = :val1',
|
||||
ConditionExpression='q = :oldval',
|
||||
ExpressionAttributeValues={':val1': 3, ':oldval': 2})
|
||||
|
||||
# Check that set equality is checked correctly. Unlike string equality (for
|
||||
# example), it cannot be done with just naive string comparison of the JSON
|
||||
@@ -269,15 +276,44 @@ def test_update_condition_lt(test_table_s):
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='a < :oldval',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
|
||||
# Trying to compare an unsupported type - e.g., in the following test
|
||||
# a boolean, is unfortunately caught by boto3 and cannot be tested here...
|
||||
#test_table_s.update_item(Key={'p': p},
|
||||
# AttributeUpdates={'d': {'Value': False, 'Action': 'PUT'}})
|
||||
#with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
# test_table_s.update_item(Key={'p': p},
|
||||
# UpdateExpression='SET z = :newval',
|
||||
# ConditionExpression='d < :oldval',
|
||||
# ExpressionAttributeValues={':newval': 2, ':oldval': True})
|
||||
# If the attribute being compared doesn't even exist, this is also
|
||||
# considered as a false condition - not an error.
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='q < :oldval',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression=':oldval < q',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
|
||||
# If a comparison parameter comes from a constant specified in the query,
|
||||
# and it has a type not supported by the comparison (e.g., a list), it's
|
||||
# not just a failed comparison - it is considered a ValidationException
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='a < :oldval',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression=':oldval < a',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
|
||||
# However, if when the wrong type comes from an item attribute, not the
|
||||
# query, the comparison is simply false - not a ValidationException.
|
||||
test_table_s.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': [1,2,3], 'Action': 'PUT'}})
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='x < :oldval',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': 1})
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression=':oldval < x',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': 1})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 4
|
||||
|
||||
# Test for ConditionExpression with operator "<="
|
||||
@@ -341,6 +377,44 @@ def test_update_condition_le(test_table_s):
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='a <= :oldval',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
|
||||
# If the attribute being compared doesn't even exist, this is also
|
||||
# considered as a false condition - not an error.
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='q <= :oldval',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression=':oldval <= q',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
|
||||
# If a comparison parameter comes from a constant specified in the query,
|
||||
# and it has a type not supported by the comparison (e.g., a list), it's
|
||||
# not just a failed comparison - it is considered a ValidationException
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='a <= :oldval',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression=':oldval <= a',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
|
||||
# However, if when the wrong type comes from an item attribute, not the
|
||||
# query, the comparison is simply false - not a ValidationException.
|
||||
test_table_s.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': [1,2,3], 'Action': 'PUT'}})
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='x <= :oldval',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': 1})
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression=':oldval <= x',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': 1})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 7
|
||||
|
||||
# Test for ConditionExpression with operator ">"
|
||||
@@ -404,6 +478,44 @@ def test_update_condition_gt(test_table_s):
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='a > :oldval',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
|
||||
# If the attribute being compared doesn't even exist, this is also
|
||||
# considered as a false condition - not an error.
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='q > :oldval',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression=':oldval > q',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
|
||||
# If a comparison parameter comes from a constant specified in the query,
|
||||
# and it has a type not supported by the comparison (e.g., a list), it's
|
||||
# not just a failed comparison - it is considered a ValidationException
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='a > :oldval',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression=':oldval > a',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
|
||||
# However, if when the wrong type comes from an item attribute, not the
|
||||
# query, the comparison is simply false - not a ValidationException.
|
||||
test_table_s.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': [1,2,3], 'Action': 'PUT'}})
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='x > :oldval',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': 1})
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression=':oldval > x',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': 1})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 4
|
||||
|
||||
# Test for ConditionExpression with operator ">="
|
||||
@@ -467,6 +579,44 @@ def test_update_condition_ge(test_table_s):
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='a >= :oldval',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': '0'})
|
||||
# If the attribute being compared doesn't even exist, this is also
|
||||
# considered as a false condition - not an error.
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='q >= :oldval',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression=':oldval >= q',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': '17'})
|
||||
# If a comparison parameter comes from a constant specified in the query,
|
||||
# and it has a type not supported by the comparison (e.g., a list), it's
|
||||
# not just a failed comparison - it is considered a ValidationException
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='a >= :oldval',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression=':oldval >= a',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': [1,2]})
|
||||
# However, if when the wrong type comes from an item attribute, not the
|
||||
# query, the comparison is simply false - not a ValidationException.
|
||||
test_table_s.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': [1,2,3], 'Action': 'PUT'}})
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='x >= :oldval',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': 1})
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression=':oldval >= x',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': 1})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 7
|
||||
|
||||
# Test for ConditionExpression with ternary operator "BETWEEN" (checking
|
||||
@@ -548,6 +698,60 @@ def test_update_condition_between(test_table_s):
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='a BETWEEN :oldval1 AND :oldval2',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval1': '0', ':oldval2': '2'})
|
||||
# If the attribute being compared doesn't even exist, this is also
|
||||
# considered as a false condition - not an error.
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='q BETWEEN :oldval1 AND :oldval2',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval1': b'dog', ':oldval2': b'zebra'})
|
||||
# If and operand from the query, and it has a type not supported by the
|
||||
# comparison (e.g., a list), it's not just a failed condition - it is
|
||||
# considered a ValidationException
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='a BETWEEN :oldval1 AND :oldval2',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval1': [1,2], ':oldval2': [2,3]})
|
||||
# However, if when the wrong type comes from an item attribute, not the
|
||||
# query, the comparison is simply false - not a ValidationException.
|
||||
test_table_s.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': [1,2,3], 'Action': 'PUT'},
|
||||
'y': {'Value': [2,3,4], 'Action': 'PUT'}})
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='a BETWEEN x and y',
|
||||
ExpressionAttributeValues={':newval': 2})
|
||||
# If the two operands come from the query (":val" references) then if they
|
||||
# have different types or the wrong order, this is a ValidationException.
|
||||
# But if one or more of the operands come from the item, this only causes
|
||||
# a false condition - not a ValidationException.
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='a BETWEEN :oldval1 AND :oldval2',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval1': 2, ':oldval2': 1})
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='a BETWEEN :oldval1 AND :oldval2',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval1': 2, ':oldval2': 'dog'})
|
||||
test_table_s.update_item(Key={'p': p}, AttributeUpdates={'two': {'Value': 2, 'Action': 'PUT'}})
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='a BETWEEN two AND :oldval',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': 1})
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='a BETWEEN :oldval AND two',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': 3})
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='a BETWEEN two AND :oldval',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': 'dog'})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 9
|
||||
|
||||
# Test for ConditionExpression with multi-operand operator "IN", checking
|
||||
@@ -605,6 +809,13 @@ def test_update_condition_in(test_table_s):
|
||||
UpdateExpression='SET c = :val37',
|
||||
ConditionExpression='a IN ()',
|
||||
ExpressionAttributeValues=values)
|
||||
# If the attribute being compared doesn't even exist, this is also
|
||||
# considered as a false condition - not an error.
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET c = :val37',
|
||||
ConditionExpression='q IN ({})'.format(','.join(values.keys())),
|
||||
ExpressionAttributeValues=values)
|
||||
|
||||
# Beyond the above operators, there are also test functions supported -
|
||||
# attribute_exists, attribute_not_exists, attribute_type, begins_with,
|
||||
|
||||
@@ -237,6 +237,30 @@ def test_update_expected_1_le(test_table_s):
|
||||
'AttributeValueList': [2, 3]}}
|
||||
)
|
||||
|
||||
# Comparison operators like le work only on numbers, strings or bytes.
|
||||
# As noted in issue #8043, if any other type is included in *the query*,
|
||||
# the result should be a ValidationException, but if the wrong type appears
|
||||
# in the item, not the query, the result is a failed condition.
|
||||
def test_update_expected_1_le_validation(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
|
||||
'b': {'Value': [1,2], 'Action': 'PUT'}})
|
||||
# Bad type (a list) in the query. Result is ValidationException.
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
|
||||
Expected={'a': {'ComparisonOperator': 'LE',
|
||||
'AttributeValueList': [[1,2,3]]}}
|
||||
)
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
|
||||
Expected={'b': {'ComparisonOperator': 'LE',
|
||||
'AttributeValueList': [3]}}
|
||||
)
|
||||
assert not 'z' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']
|
||||
|
||||
# Tests for Expected with ComparisonOperator = "LT":
|
||||
def test_update_expected_1_lt(test_table_s):
|
||||
p = random_string()
|
||||
@@ -894,6 +918,34 @@ def test_update_expected_1_between(test_table_s):
|
||||
AttributeUpdates={'z': {'Value': 2, 'Action': 'PUT'}},
|
||||
Expected={'d': {'ComparisonOperator': 'BETWEEN', 'AttributeValueList': [set([1]), set([2])]}})
|
||||
|
||||
# BETWEEN work only on numbers, strings or bytes. As noted in issue #8043,
|
||||
# if any other type is included in *the query*, the result should be a
|
||||
# ValidationException, but if the wrong type appears in the item, not the
|
||||
# query, the result is a failed condition.
|
||||
# BETWEEN should also generate ValidationException if the two ends of the
|
||||
# range are not of the same type or not in the correct order, but this
|
||||
# already is tested in the test above (test_update_expected_1_between).
|
||||
def test_update_expected_1_between_validation(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
|
||||
'b': {'Value': [1,2], 'Action': 'PUT'}})
|
||||
# Bad type (a list) in the query. Result is ValidationException.
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
|
||||
Expected={'a': {'ComparisonOperator': 'BETWEEN',
|
||||
'AttributeValueList': [[1,2,3], [2,3,4]]}}
|
||||
)
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'z': {'Value': 17, 'Action': 'PUT'}},
|
||||
Expected={'b': {'ComparisonOperator': 'BETWEEN',
|
||||
'AttributeValueList': [1,2]}}
|
||||
)
|
||||
assert not 'z' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']
|
||||
|
||||
|
||||
##############################################################################
|
||||
# Instead of ComparisonOperator and AttributeValueList, one can specify either
|
||||
# Value or Exists:
|
||||
|
||||
@@ -235,6 +235,30 @@ def test_filter_expression_ge(test_table_sn_with_data):
|
||||
expected_items = [item for item in items if item[xn] >= xv]
|
||||
assert(got_items == expected_items)
|
||||
|
||||
# Comparison operators such as >= or BETWEEN only work on numbers, strings or
|
||||
# bytes. When an expression's operands come from the item and has a wrong type
|
||||
# (e.g., a list), the result is that the item is skipped - aborting the scan
|
||||
# with a ValidationException is a bug (this was issue #8043).
|
||||
def test_filter_expression_le_bad_type(test_table_sn_with_data):
|
||||
table, p, items = test_table_sn_with_data
|
||||
got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression='l <= :xv',
|
||||
ExpressionAttributeValues={':p': p, ':xv': 3})
|
||||
assert got_items == []
|
||||
got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression=':xv <= l',
|
||||
ExpressionAttributeValues={':p': p, ':xv': 3})
|
||||
assert got_items == []
|
||||
def test_filter_expression_between_bad_type(test_table_sn_with_data):
|
||||
table, p, items = test_table_sn_with_data
|
||||
got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression='s between :xv and l',
|
||||
ExpressionAttributeValues={':p': p, ':xv': 'cat'})
|
||||
assert got_items == []
|
||||
got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression='s between l and :xv',
|
||||
ExpressionAttributeValues={':p': p, ':xv': 'cat'})
|
||||
assert got_items == []
|
||||
got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression='s between i and :xv',
|
||||
ExpressionAttributeValues={':p': p, ':xv': 'cat'})
|
||||
assert got_items == []
|
||||
|
||||
# Test the "BETWEEN/AND" ternary operator on a numeric, string and bytes
|
||||
# attribute. These keywords are case-insensitive.
|
||||
def test_filter_expression_between(test_table_sn_with_data):
|
||||
@@ -658,7 +682,6 @@ def test_filter_expression_and_sort_key_condition(test_table_sn_with_data):
|
||||
# In particular, test that FilterExpression may inspect attributes which will
|
||||
# not be returned by the query, because of the ProjectionExpression.
|
||||
# This test reproduces issue #6951.
|
||||
@pytest.mark.xfail(reason="issue #6951: cannot filter on non-returned attributes")
|
||||
def test_filter_expression_and_projection_expression(test_table):
|
||||
p = random_string()
|
||||
test_table.put_item(Item={'p': p, 'c': 'hi', 'x': 'dog', 'y': 'cat'})
|
||||
|
||||
@@ -386,3 +386,38 @@ def test_query_missing_key(test_table):
|
||||
full_query(test_table, KeyConditions={})
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
full_query(test_table)
|
||||
|
||||
# The paging tests above used a numeric sort key. Let's now also test paging
|
||||
# with a bytes sort key. We already have above a test that bytes sort keys
|
||||
# work and are sorted correctly (test_query_sort_order_bytes), but the
|
||||
# following test adds a check that *paging* works correctly for such keys.
|
||||
# We used to have a bug in this (issue #7768) - the returned LastEvaluatedKey
|
||||
# was incorrectly formatted, breaking the boto3's parsing of the response.
|
||||
# Note we only check the case of bytes *sort* keys in this test. For bytes
|
||||
# *partition* keys, see test_scan_paging_bytes().
|
||||
def test_query_paging_bytes(test_table_sb):
|
||||
p = random_string()
|
||||
items = [{'p': p, 'c': random_bytes()} for i in range(10)]
|
||||
with test_table_sb.batch_writer() as batch:
|
||||
for item in items:
|
||||
batch.put_item(item)
|
||||
# Deliberately pass Limit=1 to enforce paging even though we have
|
||||
# just 10 items in the partition.
|
||||
got_items = full_query(test_table_sb, Limit=1,
|
||||
KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})
|
||||
got_sort_keys = [x['c'] for x in got_items]
|
||||
expected_sort_keys = sorted(x['c'] for x in items)
|
||||
assert got_sort_keys == expected_sort_keys
|
||||
|
||||
# Similar for test for string clustering keys
|
||||
def test_query_paging_string(test_table_ss):
|
||||
p = random_string()
|
||||
items = [{'p': p, 'c': random_string()} for i in range(10)]
|
||||
with test_table_ss.batch_writer() as batch:
|
||||
for item in items:
|
||||
batch.put_item(item)
|
||||
got_items = full_query(test_table_ss, Limit=1,
|
||||
KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})
|
||||
got_sort_keys = [x['c'] for x in got_items]
|
||||
expected_sort_keys = sorted(x['c'] for x in items)
|
||||
assert got_sort_keys == expected_sort_keys
|
||||
|
||||
@@ -539,7 +539,6 @@ def test_query_filter_paging(test_table_sn_with_data):
|
||||
# In particular, test that QueryFilter may inspect attributes which will
|
||||
# not be returned by the query, because the AttributesToGet.
|
||||
# This test reproduces issue #6951.
|
||||
@pytest.mark.xfail(reason="issue #6951: cannot filter on non-returned attributes")
|
||||
def test_query_filter_and_attributes_to_get(test_table):
|
||||
p = random_string()
|
||||
test_table.put_item(Item={'p': p, 'c': 'hi', 'x': 'dog', 'y': 'cat'})
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
|
||||
import pytest
|
||||
from botocore.exceptions import ClientError
|
||||
from util import random_string, full_scan, full_scan_and_count, multiset
|
||||
from util import random_string, random_bytes, full_scan, full_scan_and_count, multiset
|
||||
from boto3.dynamodb.conditions import Attr
|
||||
|
||||
# Test that scanning works fine with/without pagination
|
||||
@@ -264,3 +264,20 @@ def test_scan_parallel_incorrect(filled_test_table):
|
||||
for segment in [7, 9]:
|
||||
with pytest.raises(ClientError, match='ValidationException.*Segment'):
|
||||
full_scan(test_table, TotalSegments=5, Segment=segment)
|
||||
|
||||
# We used to have a bug with formatting of LastEvaluatedKey in the response
|
||||
# of Query and Scan with bytes keys (issue #7768). In test_query_paging_byte()
|
||||
# (test_query.py) we tested the case of bytes *sort* keys. In the following
|
||||
# test we check bytes *partition* keys.
|
||||
def test_scan_paging_bytes(test_table_b):
|
||||
# We will not Scan the entire table - we have no idea what it contains.
|
||||
# But we don't need to scan the entire table - we just need the table
|
||||
# to contain at least two items, and then Scan it with Limit=1 and stop
|
||||
# after one page. Before #7768 was fixed, the test failed when the
|
||||
# LastEvaluatedKey in the response could not be parsed.
|
||||
items = [{'p': random_bytes()}, {'p': random_bytes()}]
|
||||
with test_table_b.batch_writer() as batch:
|
||||
for item in items:
|
||||
batch.put_item(item)
|
||||
response = test_table_b.scan(ConsistentRead=True, Limit=1)
|
||||
assert 'LastEvaluatedKey' in response
|
||||
|
||||
@@ -41,8 +41,10 @@ def test_fetch_from_system_tables(scylla_only, dynamodb):
|
||||
|
||||
key_columns = [item['column_name'] for item in col_response['Items'] if item['kind'] == 'clustering' or item['kind'] == 'partition_key']
|
||||
qualified_name = "{}{}.{}".format(internal_prefix, ks_name, table_name)
|
||||
response = client.scan(TableName=qualified_name, AttributesToGet=key_columns)
|
||||
print(ks_name, table_name, response)
|
||||
import time
|
||||
start = time.time()
|
||||
response = client.scan(TableName=qualified_name, AttributesToGet=key_columns, Limit=50)
|
||||
print(ks_name, table_name, len(str(response)), time.time()-start)
|
||||
|
||||
def test_block_access_to_non_system_tables_with_virtual_interface(scylla_only, test_table_s, dynamodb):
|
||||
client = dynamodb.meta.client
|
||||
|
||||
@@ -659,6 +659,24 @@ def test_update_expression_add_numbers(test_table_s):
|
||||
UpdateExpression='ADD b :val1',
|
||||
ExpressionAttributeValues={':val1': 1})
|
||||
|
||||
# In test_update_expression_add_numbers() above we tested ADDing a number to
|
||||
# an existing number. The following test check that ADD can be used to
|
||||
# create a *new* number, as if it was added to zero.
|
||||
def test_update_expression_add_numbers_new(test_table_s):
|
||||
# Test that "ADD" can create a new number attribute:
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hello'})
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='ADD b :val1',
|
||||
ExpressionAttributeValues={':val1': 7})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == 7
|
||||
# Test that "ADD" can create an entirely new item:
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='ADD b :val1',
|
||||
ExpressionAttributeValues={':val1': 8})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == 8
|
||||
|
||||
# Test "ADD" operation for sets
|
||||
def test_update_expression_add_sets(test_table_s):
|
||||
p = random_string()
|
||||
@@ -687,6 +705,24 @@ def test_update_expression_add_sets(test_table_s):
|
||||
UpdateExpression='ADD a :val1',
|
||||
ExpressionAttributeValues={':val1': 'hello'})
|
||||
|
||||
# In test_update_expression_add_sets() above we tested ADDing elements to an
|
||||
# existing set. The following test checks that ADD can be used to create a
|
||||
# *new* set, by adding its first item.
|
||||
def test_update_expression_add_sets_new(test_table_s):
|
||||
# Test that "ADD" can create a new set attribute:
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hello'})
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='ADD b :val1',
|
||||
ExpressionAttributeValues={':val1': set(['dog'])})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == set(['dog'])
|
||||
# Test that "ADD" can create an entirely new item:
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='ADD b :val1',
|
||||
ExpressionAttributeValues={':val1': set(['cat'])})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == set(['cat'])
|
||||
|
||||
# Test "DELETE" operation for sets
|
||||
def test_update_expression_delete_sets(test_table_s):
|
||||
p = random_string()
|
||||
|
||||
165
test/boost/cdc_generation_test.cc
Normal file
165
test/boost/cdc_generation_test.cc
Normal file
@@ -0,0 +1,165 @@
|
||||
/*
|
||||
* Copyright (C) 2021 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#define BOOST_TEST_MODULE core
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <vector>
|
||||
|
||||
#include "cdc/generation.hh"
|
||||
#include "test/lib/random_utils.hh"
|
||||
|
||||
namespace cdc {
|
||||
|
||||
size_t limit_of_streams_in_topology_description();
|
||||
topology_description limit_number_of_streams_if_needed(topology_description&& desc);
|
||||
|
||||
} // namespace cdc
|
||||
|
||||
static cdc::topology_description create_description(const std::vector<size_t>& streams_count_per_vnode) {
|
||||
std::vector<cdc::token_range_description> result;
|
||||
result.reserve(streams_count_per_vnode.size());
|
||||
size_t vnode_index = 0;
|
||||
int64_t token = std::numeric_limits<int64_t>::min() + 100;
|
||||
for (size_t streams_count : streams_count_per_vnode) {
|
||||
std::vector<cdc::stream_id> streams(streams_count);
|
||||
token += 500;
|
||||
for (size_t idx = 0; idx < streams_count; ++idx) {
|
||||
streams[idx] = cdc::stream_id{dht::token::from_int64(token), vnode_index};
|
||||
token += 100;
|
||||
}
|
||||
token += 10000;
|
||||
// sharding_ignore_msb should not matter for limit_number_of_streams_if_needed
|
||||
// so we're using sharding_ignore_msb equal to 12.
|
||||
result.push_back(
|
||||
cdc::token_range_description{dht::token::from_int64(token), std::move(streams), uint8_t{12}});
|
||||
++vnode_index;
|
||||
}
|
||||
return cdc::topology_description(std::move(result));
|
||||
}
|
||||
|
||||
static void assert_streams_count(const cdc::topology_description& desc, const std::vector<size_t>& expected_count) {
|
||||
BOOST_REQUIRE_EQUAL(expected_count.size(), desc.entries().size());
|
||||
|
||||
for (size_t idx = 0; idx < expected_count.size(); ++idx) {
|
||||
BOOST_REQUIRE_EQUAL(expected_count[idx], desc.entries()[idx].streams.size());
|
||||
}
|
||||
}
|
||||
|
||||
static void assert_stream_ids_in_right_token_ranges(const cdc::topology_description& desc) {
|
||||
dht::token start = desc.entries().back().token_range_end;
|
||||
dht::token end = desc.entries().front().token_range_end;
|
||||
for (auto& stream : desc.entries().front().streams) {
|
||||
dht::token t = stream.token();
|
||||
if (t > end) {
|
||||
BOOST_REQUIRE(start < t);
|
||||
} else {
|
||||
BOOST_REQUIRE(t <= end);
|
||||
}
|
||||
}
|
||||
for (size_t idx = 1; idx < desc.entries().size(); ++idx) {
|
||||
for (auto& stream : desc.entries()[idx].streams) {
|
||||
BOOST_REQUIRE(desc.entries()[idx - 1].token_range_end < stream.token());
|
||||
BOOST_REQUIRE(stream.token() <= desc.entries()[idx].token_range_end);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
cdc::stream_id get_stream(const std::vector<cdc::token_range_description>& entries, dht::token tok);
|
||||
|
||||
static void assert_random_tokens_mapped_to_streams_with_tokens_in_the_same_token_range(const cdc::topology_description& desc) {
|
||||
for (size_t count = 0; count < 100; ++count) {
|
||||
int64_t token_value = tests::random::get_int(std::numeric_limits<int64_t>::min(), std::numeric_limits<int64_t>::max());
|
||||
dht::token t = dht::token::from_int64(token_value);
|
||||
auto stream = get_stream(desc.entries(), t);
|
||||
auto& e = desc.entries().at(stream.index());
|
||||
BOOST_REQUIRE(std::find(e.streams.begin(), e.streams.end(), stream) != e.streams.end());
|
||||
if (stream.index() != 0) {
|
||||
BOOST_REQUIRE(t <= e.token_range_end);
|
||||
BOOST_REQUIRE(t > desc.entries().at(stream.index() - 1).token_range_end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_cdc_generation_limitting_single_vnode_should_not_limit) {
|
||||
cdc::topology_description given = create_description({cdc::limit_of_streams_in_topology_description()});
|
||||
|
||||
cdc::topology_description result = cdc::limit_number_of_streams_if_needed(std::move(given));
|
||||
|
||||
assert_streams_count(result, {cdc::limit_of_streams_in_topology_description()});
|
||||
assert_stream_ids_in_right_token_ranges(result);
|
||||
assert_random_tokens_mapped_to_streams_with_tokens_in_the_same_token_range(result);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_cdc_generation_limitting_single_vnode_should_limit) {
|
||||
cdc::topology_description given = create_description({cdc::limit_of_streams_in_topology_description() + 1});
|
||||
|
||||
cdc::topology_description result = cdc::limit_number_of_streams_if_needed(std::move(given));
|
||||
|
||||
assert_streams_count(result, {cdc::limit_of_streams_in_topology_description()});
|
||||
assert_stream_ids_in_right_token_ranges(result);
|
||||
assert_random_tokens_mapped_to_streams_with_tokens_in_the_same_token_range(result);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_cdc_generation_limitting_multiple_vnodes_should_not_limit) {
|
||||
size_t total = 0;
|
||||
std::vector<size_t> streams_count_per_vnode;
|
||||
size_t count_for_next_vnode = 1;
|
||||
while (total + count_for_next_vnode <= cdc::limit_of_streams_in_topology_description()) {
|
||||
streams_count_per_vnode.push_back(count_for_next_vnode);
|
||||
total += count_for_next_vnode;
|
||||
++count_for_next_vnode;
|
||||
}
|
||||
cdc::topology_description given = create_description(streams_count_per_vnode);
|
||||
|
||||
cdc::topology_description result = cdc::limit_number_of_streams_if_needed(std::move(given));
|
||||
|
||||
assert_streams_count(result, streams_count_per_vnode);
|
||||
assert_stream_ids_in_right_token_ranges(result);
|
||||
assert_random_tokens_mapped_to_streams_with_tokens_in_the_same_token_range(result);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_cdc_generation_limitting_multiple_vnodes_should_limit) {
|
||||
size_t total = 0;
|
||||
std::vector<size_t> streams_count_per_vnode;
|
||||
size_t count_for_next_vnode = 1;
|
||||
while (total + count_for_next_vnode <= cdc::limit_of_streams_in_topology_description()) {
|
||||
streams_count_per_vnode.push_back(count_for_next_vnode);
|
||||
total += count_for_next_vnode;
|
||||
++count_for_next_vnode;
|
||||
}
|
||||
streams_count_per_vnode.push_back(cdc::limit_of_streams_in_topology_description() - total + 1);
|
||||
cdc::topology_description given = create_description(streams_count_per_vnode);
|
||||
|
||||
cdc::topology_description result = cdc::limit_number_of_streams_if_needed(std::move(given));
|
||||
|
||||
assert(streams_count_per_vnode.size() <= cdc::limit_of_streams_in_topology_description());
|
||||
size_t per_vnode_limit = cdc::limit_of_streams_in_topology_description() / streams_count_per_vnode.size();
|
||||
for (auto& count : streams_count_per_vnode) {
|
||||
count = std::min(count, per_vnode_limit);
|
||||
}
|
||||
|
||||
assert_streams_count(result, streams_count_per_vnode);
|
||||
assert_stream_ids_in_right_token_ranges(result);
|
||||
assert_random_tokens_mapped_to_streams_with_tokens_in_the_same_token_range(result);
|
||||
}
|
||||
|
||||
@@ -42,16 +42,6 @@
|
||||
|
||||
using namespace std::string_literals;
|
||||
|
||||
static cql_test_config mk_cdc_test_config() {
|
||||
auto ext = std::make_shared<db::extensions>();
|
||||
ext->add_schema_extension<cdc::cdc_extension>(cdc::cdc_extension::NAME);
|
||||
auto cfg = ::make_shared<db::config>(std::move(ext));
|
||||
auto features = cfg->experimental_features();
|
||||
features.emplace_back(db::experimental_features_t::CDC);
|
||||
cfg->experimental_features(features);
|
||||
return cql_test_config(std::move(cfg));
|
||||
};
|
||||
|
||||
namespace cdc {
|
||||
api::timestamp_type find_timestamp(const mutation&);
|
||||
utils::UUID generate_timeuuid(api::timestamp_type);
|
||||
@@ -131,7 +121,7 @@ SEASTAR_THREAD_TEST_CASE(test_find_mutation_timestamp) {
|
||||
check_stmt("DELETE vut.b FROM t WHERE pk = 0 AND ck = 0");
|
||||
check_stmt("DELETE vfut FROM t WHERE pk = 0 AND ck = 0");
|
||||
check_stmt("DELETE vstatic FROM t WHERE pk = 0");
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_generate_timeuuid) {
|
||||
@@ -199,7 +189,7 @@ SEASTAR_THREAD_TEST_CASE(test_with_cdc_parameter) {
|
||||
test("WITH cdc = {'enabled':'false'}", "{'enabled':'true'}", "{'enabled':'false'}", {false}, {true}, {false});
|
||||
test("", "{'enabled':'true','preimage':'true','postimage':'true','ttl':'1'}", "{'enabled':'false'}", {false}, {true, true, true, 1}, {false});
|
||||
test("WITH cdc = {'enabled':'true','preimage':'true','postimage':'true','ttl':'1'}", "{'enabled':'false'}", "{'enabled':'true','preimage':'false','postimage':'true','ttl':'2'}", {true, true, true, 1}, {false}, {true, false, true, 2});
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_detecting_conflict_of_cdc_log_table_with_existing_table) {
|
||||
@@ -213,7 +203,7 @@ SEASTAR_THREAD_TEST_CASE(test_detecting_conflict_of_cdc_log_table_with_existing_
|
||||
e.execute_cql("CREATE TABLE ks.tbl (a int PRIMARY KEY)").get();
|
||||
e.require_table_exists("ks", "tbl").get();
|
||||
BOOST_REQUIRE_THROW(e.execute_cql("ALTER TABLE ks.tbl WITH cdc = {'enabled': true}").get(), exceptions::invalid_request_exception);
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_permissions_of_cdc_log_table) {
|
||||
@@ -247,7 +237,7 @@ SEASTAR_THREAD_TEST_CASE(test_permissions_of_cdc_log_table) {
|
||||
|
||||
// Disallow DROP
|
||||
assert_unauthorized("DROP TABLE " + log_table);
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_disallow_cdc_on_materialized_view) {
|
||||
@@ -257,7 +247,7 @@ SEASTAR_THREAD_TEST_CASE(test_disallow_cdc_on_materialized_view) {
|
||||
|
||||
BOOST_REQUIRE_THROW(e.execute_cql("CREATE MATERIALIZED VIEW ks.mv AS SELECT a FROM ks.tbl PRIMARY KEY (a) WITH cdc = {'enabled': true}").get(), exceptions::invalid_request_exception);
|
||||
e.require_table_does_not_exist("ks", "mv").get();
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_permissions_of_cdc_description) {
|
||||
@@ -285,7 +275,7 @@ SEASTAR_THREAD_TEST_CASE(test_permissions_of_cdc_description) {
|
||||
|
||||
test_table("cdc_streams_descriptions");
|
||||
test_table("cdc_generation_descriptions");
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_cdc_log_schema) {
|
||||
@@ -326,6 +316,7 @@ SEASTAR_THREAD_TEST_CASE(test_cdc_log_schema) {
|
||||
// cdc log clustering key
|
||||
assert_has_column(cdc::log_meta_column_name("operation"), byte_type);
|
||||
assert_has_column(cdc::log_meta_column_name("ttl"), long_type);
|
||||
assert_has_column(cdc::log_meta_column_name("end_of_batch"), boolean_type);
|
||||
|
||||
// pk
|
||||
assert_has_column(cdc::log_data_column_name("pk"), int32_type);
|
||||
@@ -370,7 +361,7 @@ SEASTAR_THREAD_TEST_CASE(test_cdc_log_schema) {
|
||||
|
||||
// Check if we missed something
|
||||
BOOST_REQUIRE_EQUAL(required_column_count, log_schema->all_columns_count());
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
static std::vector<std::vector<bytes_opt>> to_bytes(const cql_transport::messages::result_message::rows& rows) {
|
||||
@@ -512,7 +503,7 @@ SEASTAR_THREAD_TEST_CASE(test_primary_key_logging) {
|
||||
// DELETE FROM ks.tbl WHERE pk = 1 AND pk2 = 11
|
||||
assert_row(1, 11);
|
||||
BOOST_REQUIRE(actual_i == actual_end);
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_pre_post_image_logging) {
|
||||
@@ -534,6 +525,7 @@ SEASTAR_THREAD_TEST_CASE(test_pre_post_image_logging) {
|
||||
auto val_index = column_index(*rows, cdc::log_data_column_name("val"));
|
||||
auto val2_index = column_index(*rows, cdc::log_data_column_name("val2"));
|
||||
auto ttl_index = column_index(*rows, cdc::log_meta_column_name("ttl"));
|
||||
auto eor_index = column_index(*rows, cdc::log_meta_column_name("end_of_batch"));
|
||||
|
||||
auto val_type = int32_type;
|
||||
auto val = *first[0][val_index];
|
||||
@@ -567,7 +559,7 @@ SEASTAR_THREAD_TEST_CASE(test_pre_post_image_logging) {
|
||||
BOOST_REQUIRE_EQUAL(pre_image.size(), i + 1);
|
||||
|
||||
val = *pre_image.back()[val_index];
|
||||
// note: no val2 in pre-image, because we are not modifying it.
|
||||
// note: no val2 in pre-image, because we are not modifying it.
|
||||
BOOST_REQUIRE_EQUAL(int32_type->decompose(1111), *pre_image.back()[ck2_index]);
|
||||
BOOST_REQUIRE_EQUAL(data_value(last), val_type->deserialize(bytes_view(val)));
|
||||
BOOST_REQUIRE_EQUAL(bytes_opt(), pre_image.back()[ttl_index]);
|
||||
@@ -583,10 +575,12 @@ SEASTAR_THREAD_TEST_CASE(test_pre_post_image_logging) {
|
||||
if (post_enabled) {
|
||||
val = *post_image.back()[val_index];
|
||||
val2 = *post_image.back()[val2_index];
|
||||
auto eor = *post_image.back()[eor_index];
|
||||
|
||||
BOOST_REQUIRE_EQUAL(int32_type->decompose(1111), *post_image.back()[ck2_index]);
|
||||
BOOST_REQUIRE_EQUAL(data_value(nv), val_type->deserialize(bytes_view(val)));
|
||||
BOOST_REQUIRE_EQUAL(data_value(22222), val_type->deserialize(bytes_view(val2)));
|
||||
BOOST_REQUIRE_EQUAL(data_value(true), boolean_type->deserialize(bytes_view(eor)));
|
||||
}
|
||||
|
||||
const auto& ttl_cell = second[second.size() - 2][ttl_index];
|
||||
@@ -608,7 +602,7 @@ SEASTAR_THREAD_TEST_CASE(test_pre_post_image_logging) {
|
||||
}
|
||||
}
|
||||
}
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_pre_post_image_logging_static_row) {
|
||||
@@ -682,7 +676,7 @@ SEASTAR_THREAD_TEST_CASE(test_pre_post_image_logging_static_row) {
|
||||
test(true, false);
|
||||
test(false, true);
|
||||
test(false, false);
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_range_deletion) {
|
||||
@@ -691,7 +685,7 @@ SEASTAR_THREAD_TEST_CASE(test_range_deletion) {
|
||||
cquery_nofail(e, "DELETE FROM ks.tbl WHERE pk = 123 AND ck > 1 AND ck < 23");
|
||||
cquery_nofail(e, "DELETE FROM ks.tbl WHERE pk = 123 AND ck >= 4 AND ck <= 56");
|
||||
|
||||
auto msg = e.execute_cql(format("SELECT \"{}\", \"{}\", \"{}\", \"{}\" FROM ks.{}",
|
||||
auto msg = e.execute_cql(format("SELECT \"{}\", \"{}\", \"{}\", \"{}\" FROM ks.{}",
|
||||
cdc::log_meta_column_name("time"),
|
||||
cdc::log_data_column_name("pk"),
|
||||
cdc::log_data_column_name("ck"),
|
||||
@@ -726,7 +720,7 @@ SEASTAR_THREAD_TEST_CASE(test_range_deletion) {
|
||||
// ck >= 4 AND ck <= 56
|
||||
check_row(4, cdc::operation::range_delete_start_inclusive);
|
||||
check_row(56, cdc::operation::range_delete_end_inclusive);
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_add_columns) {
|
||||
@@ -750,11 +744,11 @@ SEASTAR_THREAD_TEST_CASE(test_add_columns) {
|
||||
auto kokos = *inserts.back()[kokos_index];
|
||||
|
||||
BOOST_REQUIRE_EQUAL(data_value("kaka"), kokos_type->deserialize(bytes_view(kokos)));
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
// #5582 - just quickly test that we can create the cdc enabled table on a different shard
|
||||
// and still get the logs proper.
|
||||
// #5582 - just quickly test that we can create the cdc enabled table on a different shard
|
||||
// and still get the logs proper.
|
||||
SEASTAR_THREAD_TEST_CASE(test_cdc_across_shards) {
|
||||
do_with_cql_env_thread([](cql_test_env& e) {
|
||||
if (smp::count < 2) {
|
||||
@@ -772,7 +766,7 @@ SEASTAR_THREAD_TEST_CASE(test_cdc_across_shards) {
|
||||
auto rows = select_log(e, "tbl");
|
||||
|
||||
BOOST_REQUIRE(!to_bytes_filtered(*rows, cdc::operation::insert).empty());
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_negative_ttl_fail) {
|
||||
@@ -780,7 +774,7 @@ SEASTAR_THREAD_TEST_CASE(test_negative_ttl_fail) {
|
||||
BOOST_REQUIRE_EXCEPTION(e.execute_cql("CREATE TABLE ks.fail (a int PRIMARY KEY, b int) WITH cdc = {'enabled':true,'ttl':'-1'}").get0(),
|
||||
exceptions::configuration_exception,
|
||||
exception_predicate::message_contains("ttl"));
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_ttls) {
|
||||
@@ -830,11 +824,11 @@ SEASTAR_THREAD_TEST_CASE(test_ttls) {
|
||||
auto cell_ttl_seconds = value_cast<int32_t>(cell_ttl);
|
||||
// 30% tolerance in case of slow execution (a little flaky...)
|
||||
BOOST_REQUIRE_CLOSE((float)cell_ttl_seconds, (float)ttl_seconds, 30.f);
|
||||
}
|
||||
}
|
||||
};
|
||||
test_ttl(0);
|
||||
test_ttl(10);
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
// helper funcs + structs for collection testing
|
||||
@@ -851,13 +845,13 @@ struct col_test {
|
||||
data_value post = data_value::make_null(int32_type); // whatever
|
||||
};
|
||||
|
||||
// iterate a set of updates and verify pre and delta values.
|
||||
// iterate a set of updates and verify pre and delta values.
|
||||
static void test_collection(cql_test_env& e, data_type val_type, data_type del_type, std::vector<col_test> tests, translate_func f = [](data_value v) { return v; }) {
|
||||
auto col_type = val_type;
|
||||
|
||||
for (auto& t : tests) {
|
||||
cquery_nofail(e, t.update);
|
||||
|
||||
|
||||
auto rows = select_log(e, "tbl");
|
||||
auto pre_image = to_bytes_filtered(*rows, cdc::operation::pre_image);
|
||||
auto updates = to_bytes_filtered(*rows, cdc::operation::update);
|
||||
@@ -918,7 +912,7 @@ SEASTAR_THREAD_TEST_CASE(test_map_logging) {
|
||||
auto map_keys_type = set_type_impl::get_instance(utf8_type, false);
|
||||
|
||||
test_collection(e, map_type, map_keys_type, {
|
||||
{
|
||||
{
|
||||
"UPDATE ks.tbl set val = { 'apa':'ko' } where pk=1 and pk2=11 and ck=111",
|
||||
data_value::make_null(map_type), // no previous value
|
||||
{
|
||||
@@ -930,7 +924,7 @@ SEASTAR_THREAD_TEST_CASE(test_map_logging) {
|
||||
},
|
||||
::make_map_value(map_type, { { "apa", "ko" } })
|
||||
},
|
||||
{
|
||||
{
|
||||
"UPDATE ks.tbl set val = val + { 'ninja':'mission' } where pk=1 and pk2=11 and ck=111",
|
||||
::make_map_value(map_type, { { "apa", "ko" } }),
|
||||
{
|
||||
@@ -941,9 +935,9 @@ SEASTAR_THREAD_TEST_CASE(test_map_logging) {
|
||||
},
|
||||
::make_map_value(map_type, { { "apa", "ko" }, { "ninja", "mission" } })
|
||||
},
|
||||
{
|
||||
{
|
||||
"UPDATE ks.tbl set val['ninja'] = 'shuriken' where pk=1 and pk2=11 and ck=111",
|
||||
::make_map_value(map_type, { { "apa", "ko" }, { "ninja", "mission" } }),
|
||||
::make_map_value(map_type, { { "apa", "ko" }, { "ninja", "mission" } }),
|
||||
{
|
||||
{
|
||||
::make_map_value(map_type, { { "ninja", "shuriken" } }),
|
||||
@@ -952,9 +946,9 @@ SEASTAR_THREAD_TEST_CASE(test_map_logging) {
|
||||
},
|
||||
::make_map_value(map_type, { { "apa", "ko" }, { "ninja", "shuriken" } })
|
||||
},
|
||||
{
|
||||
{
|
||||
"UPDATE ks.tbl set val['apa'] = null where pk=1 and pk2=11 and ck=111",
|
||||
::make_map_value(map_type, { { "apa", "ko" }, { "ninja", "shuriken" } }),
|
||||
::make_map_value(map_type, { { "apa", "ko" }, { "ninja", "shuriken" } }),
|
||||
{
|
||||
{
|
||||
data_value::make_null(map_type),
|
||||
@@ -963,9 +957,9 @@ SEASTAR_THREAD_TEST_CASE(test_map_logging) {
|
||||
},
|
||||
::make_map_value(map_type, { { "ninja", "shuriken" } })
|
||||
},
|
||||
{
|
||||
{
|
||||
"UPDATE ks.tbl set val['ninja'] = null, val['ola'] = 'kokos' where pk=1 and pk2=11 and ck=111",
|
||||
::make_map_value(map_type, { { "ninja", "shuriken" } }),
|
||||
::make_map_value(map_type, { { "ninja", "shuriken" } }),
|
||||
{
|
||||
{
|
||||
::make_map_value(map_type, { { "ola", "kokos" } }),
|
||||
@@ -974,9 +968,9 @@ SEASTAR_THREAD_TEST_CASE(test_map_logging) {
|
||||
},
|
||||
::make_map_value(map_type, { { "ola", "kokos" } })
|
||||
},
|
||||
{
|
||||
{
|
||||
"UPDATE ks.tbl set val = { 'bolla':'trolla', 'kork':'skruv' } where pk=1 and pk2=11 and ck=111",
|
||||
::make_map_value(map_type, { { "ola", "kokos" } }),
|
||||
::make_map_value(map_type, { { "ola", "kokos" } }),
|
||||
{
|
||||
{
|
||||
::make_map_value(map_type, { { "bolla", "trolla" }, { "kork", "skruv" } }),
|
||||
@@ -988,7 +982,7 @@ SEASTAR_THREAD_TEST_CASE(test_map_logging) {
|
||||
}
|
||||
|
||||
});
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_set_logging) {
|
||||
@@ -999,7 +993,7 @@ SEASTAR_THREAD_TEST_CASE(test_set_logging) {
|
||||
});
|
||||
|
||||
auto set_type = set_type_impl::get_instance(utf8_type, false);
|
||||
|
||||
|
||||
test_collection(e, set_type, set_type, {
|
||||
{
|
||||
"UPDATE ks.tbl set val = { 'apa', 'ko' } where pk=1 and pk2=11 and ck=111",
|
||||
@@ -1026,7 +1020,7 @@ SEASTAR_THREAD_TEST_CASE(test_set_logging) {
|
||||
},
|
||||
{
|
||||
"UPDATE ks.tbl set val = val - { 'apa' } where pk=1 and pk2=11 and ck=111",
|
||||
::make_set_value(set_type, { "apa", "ko", "mission", "ninja" }),
|
||||
::make_set_value(set_type, { "apa", "ko", "mission", "ninja" }),
|
||||
{
|
||||
{
|
||||
data_value::make_null(set_type),
|
||||
@@ -1037,7 +1031,7 @@ SEASTAR_THREAD_TEST_CASE(test_set_logging) {
|
||||
},
|
||||
{
|
||||
"UPDATE ks.tbl set val = val - { 'mission' }, val = val + { 'nils' } where pk=1 and pk2=11 and ck=111",
|
||||
::make_set_value(set_type, { "ko", "mission", "ninja" }),
|
||||
::make_set_value(set_type, { "ko", "mission", "ninja" }),
|
||||
{
|
||||
{
|
||||
::make_set_value(set_type, { "nils" }),
|
||||
@@ -1059,7 +1053,7 @@ SEASTAR_THREAD_TEST_CASE(test_set_logging) {
|
||||
::make_set_value(set_type, { "bolla", "trolla" })
|
||||
}
|
||||
});
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_list_logging) {
|
||||
@@ -1072,11 +1066,11 @@ SEASTAR_THREAD_TEST_CASE(test_list_logging) {
|
||||
auto list_type = list_type_impl::get_instance(utf8_type, false);
|
||||
auto uuids_type = set_type_impl::get_instance(timeuuid_type, false);
|
||||
auto val_type = map_type_impl::get_instance(list_type->name_comparator(), list_type->value_comparator(), false);
|
||||
|
||||
|
||||
test_collection(e, val_type, uuids_type, {
|
||||
{
|
||||
"UPDATE ks.tbl set val = [ 'apa', 'ko' ] where pk=1 and pk2=11 and ck=111",
|
||||
data_value::make_null(list_type),
|
||||
data_value::make_null(list_type),
|
||||
{
|
||||
{
|
||||
::make_list_value(list_type, { "apa", "ko" }),
|
||||
@@ -1121,7 +1115,7 @@ SEASTAR_THREAD_TEST_CASE(test_list_logging) {
|
||||
},
|
||||
{
|
||||
"UPDATE ks.tbl set val[0] = 'babar' where pk=1 and pk2=11 and ck=111",
|
||||
::make_list_value(list_type, { "apa", "ko", "ninja", "mission" }),
|
||||
::make_list_value(list_type, { "apa", "ko", "ninja", "mission" }),
|
||||
{
|
||||
{
|
||||
::make_list_value(list_type, { "babar" }),
|
||||
@@ -1151,7 +1145,7 @@ SEASTAR_THREAD_TEST_CASE(test_list_logging) {
|
||||
}
|
||||
return ::make_list_value(list_type, std::move(cpy));
|
||||
});
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_udt_logging) {
|
||||
@@ -1163,7 +1157,7 @@ SEASTAR_THREAD_TEST_CASE(test_udt_logging) {
|
||||
e.execute_cql("DROP TYPE ks.mytype").get();
|
||||
});
|
||||
|
||||
auto udt_type = user_type_impl::get_instance("ks", to_bytes("mytype"),
|
||||
auto udt_type = user_type_impl::get_instance("ks", to_bytes("mytype"),
|
||||
{ to_bytes("field0"), to_bytes("field1") },
|
||||
{ int32_type, utf8_type },
|
||||
false
|
||||
@@ -1171,18 +1165,18 @@ SEASTAR_THREAD_TEST_CASE(test_udt_logging) {
|
||||
auto index_set_type = set_type_impl::get_instance(short_type, false);
|
||||
auto f0_type = int32_type;
|
||||
auto f1_type = utf8_type;
|
||||
|
||||
|
||||
auto make_tuple = [&](std::optional<std::optional<int32_t>> i, std::optional<std::optional<sstring>> s) {
|
||||
return ::make_user_value(udt_type, {
|
||||
i ? ::data_value(*i) : data_value::make_null(f0_type),
|
||||
s ? ::data_value(*s) : data_value::make_null(f1_type),
|
||||
});
|
||||
};
|
||||
|
||||
|
||||
test_collection(e, udt_type, index_set_type, {
|
||||
{
|
||||
"UPDATE ks.tbl set val = { field0: 12, field1: 'ko' } where pk=1 and pk2=11 and ck=111",
|
||||
data_value::make_null(udt_type),
|
||||
data_value::make_null(udt_type),
|
||||
{
|
||||
{
|
||||
make_tuple(12, "ko"),
|
||||
@@ -1238,7 +1232,7 @@ SEASTAR_THREAD_TEST_CASE(test_udt_logging) {
|
||||
make_tuple(1, "bolla")
|
||||
},
|
||||
});
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_frozen_logging) {
|
||||
@@ -1289,7 +1283,7 @@ SEASTAR_THREAD_TEST_CASE(test_frozen_logging) {
|
||||
test_frozen("frozen<set<text>>", "{'a', 'bb', 'ccc'}");
|
||||
test_frozen("frozen<map<text, text>>", "{'a': 'bb', 'ccc': 'dddd'}");
|
||||
test_frozen("frozen<udt>", "{a: 'bb', ccc: 'dddd'}");
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_update_insert_delete_distinction) {
|
||||
@@ -1321,7 +1315,32 @@ SEASTAR_THREAD_TEST_CASE(test_update_insert_delete_distinction) {
|
||||
|
||||
BOOST_REQUIRE_EQUAL(results[3].size(), 1);
|
||||
BOOST_REQUIRE_EQUAL(*results[3].front(), data_value(static_cast<int8_t>(cdc::operation::row_delete)).serialize_nonnull()); // log entry from (3)
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
static std::vector<std::vector<data_value>> get_result(cql_test_env& e,
|
||||
const std::vector<data_type>& col_types, const sstring& query) {
|
||||
auto deser = [] (const data_type& t, const bytes_opt& b) -> data_value {
|
||||
if (!b) {
|
||||
return data_value::make_null(t);
|
||||
}
|
||||
return t->deserialize(*b);
|
||||
};
|
||||
|
||||
auto msg = e.execute_cql(query).get0();
|
||||
auto rows = dynamic_pointer_cast<cql_transport::messages::result_message::rows>(msg);
|
||||
BOOST_REQUIRE(rows);
|
||||
|
||||
std::vector<std::vector<data_value>> res;
|
||||
for (auto&& r: to_bytes(*rows)) {
|
||||
BOOST_REQUIRE_LE(col_types.size(), r.size());
|
||||
std::vector<data_value> res_r;
|
||||
for (size_t i = 0; i < col_types.size(); ++i) {
|
||||
res_r.push_back(deser(col_types[i], r[i]));
|
||||
}
|
||||
res.push_back(std::move(res_r));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_change_splitting) {
|
||||
@@ -1346,28 +1365,8 @@ SEASTAR_THREAD_TEST_CASE(test_change_splitting) {
|
||||
return make_set_value(keys_type, std::move(s));
|
||||
};
|
||||
|
||||
auto deser = [] (const data_type& t, const bytes_opt& b) -> data_value {
|
||||
if (!b) {
|
||||
return data_value::make_null(t);
|
||||
}
|
||||
return t->deserialize(*b);
|
||||
};
|
||||
|
||||
auto get_result = [&] (const std::vector<data_type>& col_types, const sstring& s) -> std::vector<std::vector<data_value>> {
|
||||
auto msg = e.execute_cql(s).get0();
|
||||
auto rows = dynamic_pointer_cast<cql_transport::messages::result_message::rows>(msg);
|
||||
BOOST_REQUIRE(rows);
|
||||
|
||||
std::vector<std::vector<data_value>> res;
|
||||
for (auto&& r: to_bytes(*rows)) {
|
||||
BOOST_REQUIRE_LE(col_types.size(), r.size());
|
||||
std::vector<data_value> res_r;
|
||||
for (size_t i = 0; i < col_types.size(); ++i) {
|
||||
res_r.push_back(deser(col_types[i], r[i]));
|
||||
}
|
||||
res.push_back(std::move(res_r));
|
||||
}
|
||||
return res;
|
||||
return ::get_result(e, col_types, s);
|
||||
};
|
||||
|
||||
cquery_nofail(e, "create table ks.t (pk int, ck int, s int static, v1 int, v2 int, m map<int, int>, primary key (pk, ck)) with cdc = {'enabled':true}");
|
||||
@@ -1566,7 +1565,7 @@ SEASTAR_THREAD_TEST_CASE(test_change_splitting) {
|
||||
};
|
||||
BOOST_REQUIRE_EQUAL(expected, result);
|
||||
}
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_batch_with_row_delete) {
|
||||
@@ -1630,7 +1629,7 @@ SEASTAR_THREAD_TEST_CASE(test_batch_with_row_delete) {
|
||||
BOOST_REQUIRE_EQUAL(deser(s_type, r[3]), er[3]);
|
||||
BOOST_REQUIRE_EQUAL(deser(oper_type, r[4]), er[4]);
|
||||
}
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
struct image_set {
|
||||
@@ -1939,7 +1938,7 @@ void test_batch_images(bool preimage, bool postimage) {
|
||||
}
|
||||
}
|
||||
}, preimage, postimage);
|
||||
}, mk_cdc_test_config()).get();
|
||||
}).get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_batch_pre_image) {
|
||||
@@ -1953,3 +1952,24 @@ SEASTAR_THREAD_TEST_CASE(test_batch_post_image) {
|
||||
SEASTAR_THREAD_TEST_CASE(test_batch_pre_post_image) {
|
||||
test_batch_images(true, true);
|
||||
}
|
||||
|
||||
// Regression test for #7716
|
||||
SEASTAR_THREAD_TEST_CASE(test_postimage_with_no_regular_columns) {
|
||||
do_with_cql_env_thread([] (cql_test_env& e) {
|
||||
using oper_ut = std::underlying_type_t<cdc::operation>;
|
||||
|
||||
cquery_nofail(e, "create table ks.t (pk int, ck int, primary key (pk, ck)) with cdc = {'enabled': true, 'postimage': true}");
|
||||
cquery_nofail(e, "insert into ks.t (pk, ck) values (1, 2)");
|
||||
|
||||
auto result = get_result(e,
|
||||
{data_type_for<oper_ut>(), int32_type, int32_type},
|
||||
"select \"cdc$operation\", pk, ck from ks.t_scylla_cdc_log");
|
||||
|
||||
std::vector<std::vector<data_value>> expected = {
|
||||
{ oper_ut(cdc::operation::insert), int32_t(1), int32_t(2) },
|
||||
{ oper_ut(cdc::operation::post_image), int32_t(1), int32_t(2) },
|
||||
};
|
||||
|
||||
BOOST_REQUIRE_EQUAL(expected, result);
|
||||
}).get();
|
||||
}
|
||||
|
||||
@@ -931,10 +931,11 @@ SEASTAR_TEST_CASE(test_parse_experimental_features_cdc) {
|
||||
auto cfg_ptr = std::make_unique<config>();
|
||||
config& cfg = *cfg_ptr;
|
||||
cfg.read_from_yaml("experimental_features:\n - cdc\n", throw_on_error);
|
||||
BOOST_CHECK_EQUAL(cfg.experimental_features(), features{ef::CDC});
|
||||
BOOST_CHECK(cfg.check_experimental(ef::CDC));
|
||||
BOOST_CHECK_EQUAL(cfg.experimental_features(), features{ef::UNUSED_CDC});
|
||||
BOOST_CHECK(cfg.check_experimental(ef::UNUSED_CDC));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::UNUSED));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::UDF));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::ALTERNATOR_STREAMS));
|
||||
return make_ready_future();
|
||||
}
|
||||
|
||||
@@ -943,9 +944,10 @@ SEASTAR_TEST_CASE(test_parse_experimental_features_unused) {
|
||||
config& cfg = *cfg_ptr;
|
||||
cfg.read_from_yaml("experimental_features:\n - lwt\n", throw_on_error);
|
||||
BOOST_CHECK_EQUAL(cfg.experimental_features(), features{ef::UNUSED});
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::CDC));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::UNUSED_CDC));
|
||||
BOOST_CHECK(cfg.check_experimental(ef::UNUSED));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::UDF));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::ALTERNATOR_STREAMS));
|
||||
return make_ready_future();
|
||||
}
|
||||
|
||||
@@ -954,9 +956,22 @@ SEASTAR_TEST_CASE(test_parse_experimental_features_udf) {
|
||||
config& cfg = *cfg_ptr;
|
||||
cfg.read_from_yaml("experimental_features:\n - udf\n", throw_on_error);
|
||||
BOOST_CHECK_EQUAL(cfg.experimental_features(), features{ef::UDF});
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::CDC));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::UNUSED_CDC));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::UNUSED));
|
||||
BOOST_CHECK(cfg.check_experimental(ef::UDF));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::ALTERNATOR_STREAMS));
|
||||
return make_ready_future();
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_parse_experimental_features_alternator_streams) {
|
||||
auto cfg_ptr = std::make_unique<config>();
|
||||
config& cfg = *cfg_ptr;
|
||||
cfg.read_from_yaml("experimental_features:\n - alternator-streams\n", throw_on_error);
|
||||
BOOST_CHECK_EQUAL(cfg.experimental_features(), features{ef::ALTERNATOR_STREAMS});
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::UNUSED_CDC));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::UNUSED));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::UDF));
|
||||
BOOST_CHECK(cfg.check_experimental(ef::ALTERNATOR_STREAMS));
|
||||
return make_ready_future();
|
||||
}
|
||||
|
||||
@@ -964,10 +979,11 @@ SEASTAR_TEST_CASE(test_parse_experimental_features_multiple) {
|
||||
auto cfg_ptr = std::make_unique<config>();
|
||||
config& cfg = *cfg_ptr;
|
||||
cfg.read_from_yaml("experimental_features:\n - cdc\n - lwt\n - cdc\n", throw_on_error);
|
||||
BOOST_CHECK_EQUAL(cfg.experimental_features(), (features{ef::CDC, ef::UNUSED, ef::CDC}));
|
||||
BOOST_CHECK(cfg.check_experimental(ef::CDC));
|
||||
BOOST_CHECK_EQUAL(cfg.experimental_features(), (features{ef::UNUSED_CDC, ef::UNUSED, ef::UNUSED_CDC}));
|
||||
BOOST_CHECK(cfg.check_experimental(ef::UNUSED_CDC));
|
||||
BOOST_CHECK(cfg.check_experimental(ef::UNUSED));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::UDF));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::ALTERNATOR_STREAMS));
|
||||
return make_ready_future();
|
||||
}
|
||||
|
||||
@@ -979,9 +995,10 @@ SEASTAR_TEST_CASE(test_parse_experimental_features_invalid) {
|
||||
[&cfg] (const sstring& opt, const sstring& msg, std::optional<value_status> status) {
|
||||
BOOST_REQUIRE_EQUAL(opt, "experimental_features");
|
||||
BOOST_REQUIRE_NE(msg.find("line 2, column 7"), msg.npos);
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::CDC));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::UNUSED_CDC));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::UNUSED));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::UDF));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::ALTERNATOR_STREAMS));
|
||||
});
|
||||
return make_ready_future();
|
||||
}
|
||||
@@ -990,9 +1007,10 @@ SEASTAR_TEST_CASE(test_parse_experimental_true) {
|
||||
auto cfg_ptr = std::make_unique<config>();
|
||||
config& cfg = *cfg_ptr;
|
||||
cfg.read_from_yaml("experimental: true", throw_on_error);
|
||||
BOOST_CHECK(cfg.check_experimental(ef::CDC));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::UNUSED_CDC));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::UNUSED));
|
||||
BOOST_CHECK(cfg.check_experimental(ef::UDF));
|
||||
BOOST_CHECK(cfg.check_experimental(ef::ALTERNATOR_STREAMS));
|
||||
return make_ready_future();
|
||||
}
|
||||
|
||||
@@ -1000,8 +1018,9 @@ SEASTAR_TEST_CASE(test_parse_experimental_false) {
|
||||
auto cfg_ptr = std::make_unique<config>();
|
||||
config& cfg = *cfg_ptr;
|
||||
cfg.read_from_yaml("experimental: false", throw_on_error);
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::CDC));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::UNUSED_CDC));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::UNUSED));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::UDF));
|
||||
BOOST_CHECK(!cfg.check_experimental(ef::ALTERNATOR_STREAMS));
|
||||
return make_ready_future();
|
||||
}
|
||||
|
||||
@@ -128,12 +128,14 @@ SEASTAR_THREAD_TEST_CASE(test_large_data) {
|
||||
});
|
||||
}).get();
|
||||
|
||||
// Since deletion of large data entries has been deleted,
|
||||
// expect the record to be present.
|
||||
assert_that(e.execute_cql("select partition_key from system.large_rows where table_name = 'tbl' allow filtering;").get0())
|
||||
.is_rows()
|
||||
.is_empty();
|
||||
.with_size(1);
|
||||
assert_that(e.execute_cql("select partition_key from system.large_cells where table_name = 'tbl' allow filtering;").get0())
|
||||
.is_rows()
|
||||
.is_empty();
|
||||
.with_size(1);
|
||||
|
||||
return make_ready_future<>();
|
||||
}, cfg).get();
|
||||
|
||||
@@ -550,3 +550,71 @@ SEASTAR_THREAD_TEST_CASE(read_max_size) {
|
||||
}
|
||||
}).get();
|
||||
}
|
||||
|
||||
// Check that mutation queries, those that are stopped when the memory
|
||||
// consumed by their results reach the local/global limit, are aborted
|
||||
// instead of silently terminated when this happens.
|
||||
SEASTAR_THREAD_TEST_CASE(unpaged_mutation_read_global_limit) {
|
||||
auto cfg = cql_test_config{};
|
||||
cfg.dbcfg.emplace();
|
||||
// The memory available to the result memory limiter (global limit) is
|
||||
// configured based on the available memory, so give a small amount to
|
||||
// the "node", so we don't have to work with large amount of data.
|
||||
cfg.dbcfg->available_memory = 2 * 1024 * 1024;
|
||||
do_with_cql_env_thread([] (cql_test_env& e) {
|
||||
e.execute_cql("CREATE TABLE test (pk text, ck int, v text, PRIMARY KEY (pk, ck));").get();
|
||||
auto id = e.prepare("INSERT INTO test (pk, ck, v) VALUES (?, ?, ?);").get0();
|
||||
|
||||
auto& db = e.local_db();
|
||||
auto& tab = db.find_column_family("ks", "test");
|
||||
auto s = tab.schema();
|
||||
|
||||
auto pk = make_local_key(s);
|
||||
const auto raw_pk = utf8_type->decompose(data_value(pk));
|
||||
const auto cql3_pk = cql3::raw_value::make_value(raw_pk);
|
||||
|
||||
const auto value = sstring(1024, 'a');
|
||||
const auto raw_value = utf8_type->decompose(data_value(value));
|
||||
const auto cql3_value = cql3::raw_value::make_value(raw_value);
|
||||
|
||||
const int num_rows = 1024;
|
||||
const auto max_size = 1024u * 1024u * 1024u;
|
||||
|
||||
for (int i = 0; i != num_rows; ++i) {
|
||||
const auto cql3_ck = cql3::raw_value::make_value(int32_type->decompose(data_value(i)));
|
||||
e.execute_prepared(id, {cql3_pk, cql3_ck, cql3_value}).get();
|
||||
}
|
||||
|
||||
const auto partition_ranges = std::vector<dht::partition_range>{query::full_partition_range};
|
||||
|
||||
const std::vector<std::pair<sstring, std::function<future<size_t>(schema_ptr, const query::read_command&)>>> query_methods{
|
||||
{"query_mutations()", [&db, &partition_ranges] (schema_ptr s, const query::read_command& cmd) -> future<size_t> {
|
||||
return db.query_mutations(s, cmd, partition_ranges.front(), {}, db::no_timeout).then(
|
||||
[] (const std::tuple<reconcilable_result, cache_temperature>& res) {
|
||||
return std::get<0>(res).memory_usage();
|
||||
});
|
||||
}},
|
||||
{"query_mutations_on_all_shards()", [&e, &partition_ranges] (schema_ptr s, const query::read_command& cmd) -> future<size_t> {
|
||||
return query_mutations_on_all_shards(e.db(), s, cmd, partition_ranges, {}, db::no_timeout).then(
|
||||
[] (const std::tuple<foreign_ptr<lw_shared_ptr<reconcilable_result>>, cache_temperature>& res) {
|
||||
return std::get<0>(res)->memory_usage();
|
||||
});
|
||||
}}
|
||||
};
|
||||
|
||||
for (auto [query_method_name, query_method] : query_methods) {
|
||||
testlog.info("checking: query_method={}", query_method_name);
|
||||
auto slice = s->full_slice();
|
||||
slice.options.remove<query::partition_slice::option::allow_short_read>();
|
||||
query::read_command cmd(s->id(), s->version(), slice, query::max_result_size(max_size));
|
||||
try {
|
||||
auto size = query_method(s, cmd).get0();
|
||||
// Just to ensure we are not interpreting empty results as success.
|
||||
BOOST_REQUIRE(size != 0);
|
||||
BOOST_FAIL("Expected exception, but none was thrown.");
|
||||
} catch (std::runtime_error& e) {
|
||||
testlog.trace("Exception thrown, as expected: {}", e);
|
||||
}
|
||||
}
|
||||
}, std::move(cfg)).get();
|
||||
}
|
||||
|
||||
@@ -118,7 +118,6 @@ SEASTAR_TEST_CASE(cdc_schema_extension) {
|
||||
// Extensions have to be registered here - config needs to have them before construction of test env.
|
||||
ext->add_schema_extension<cdc::cdc_extension>(cdc::cdc_extension::NAME);
|
||||
auto cfg = ::make_shared<db::config>(ext);
|
||||
cfg->experimental_features({db::experimental_features_t::feature::CDC});
|
||||
|
||||
return do_with_cql_env([] (cql_test_env& e) {
|
||||
auto assert_ext_correctness = [] (cql_test_env& e, cdc::cdc_extension expected_ext) {
|
||||
|
||||
@@ -974,14 +974,7 @@ SEASTAR_THREAD_TEST_CASE(fuzzy_test) {
|
||||
|
||||
const auto& partitions = pop_desc.partitions;
|
||||
smp::invoke_on_all([cfg, db = &env.db(), gs = global_schema_ptr(pop_desc.schema), &partitions] {
|
||||
auto s = gs.get();
|
||||
auto& sem = db->local().get_reader_concurrency_semaphore();
|
||||
|
||||
auto resources = sem.available_resources();
|
||||
resources -= reader_concurrency_semaphore::resources{1, 0};
|
||||
auto permit = sem.make_permit(s.get(), "fuzzy-test");
|
||||
|
||||
return run_fuzzy_test_workload(cfg, *db, std::move(s), partitions).finally([units = permit.consume_resources(resources)] {});
|
||||
return run_fuzzy_test_workload(cfg, *db, gs.get(), partitions);
|
||||
}).handle_exception([seed] (std::exception_ptr e) {
|
||||
testlog.error("Test workload failed with exception {}."
|
||||
" To repeat this particular run, replace the random seed of the test, with that of this run ({})."
|
||||
|
||||
@@ -894,6 +894,232 @@ sstables::shared_sstable create_sstable(sstables::test_env& env, simple_schema&
|
||||
, mutations);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
class generic_inactive_read : public reader_concurrency_semaphore::inactive_read {
|
||||
flat_mutation_reader_opt _reader;
|
||||
|
||||
private:
|
||||
explicit generic_inactive_read(flat_mutation_reader&& rd) : _reader(std::move(rd)) { }
|
||||
|
||||
virtual void evict() override {
|
||||
_reader = {};
|
||||
}
|
||||
|
||||
public:
|
||||
static std::unique_ptr<inactive_read> make(flat_mutation_reader&& rd) {
|
||||
return std::make_unique<generic_inactive_read>(generic_inactive_read(std::move(rd)));
|
||||
}
|
||||
|
||||
static flat_mutation_reader_opt get_reader(std::unique_ptr<inactive_read>&& ir) {
|
||||
if (!ir) {
|
||||
return {};
|
||||
}
|
||||
auto gir = dynamic_cast<generic_inactive_read*>(ir.get());
|
||||
BOOST_REQUIRE(gir);
|
||||
return std::move(gir->_reader);
|
||||
}
|
||||
};
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
// This unit test passes a read through admission again-and-again, just
|
||||
// like an evictable reader would be during its lifetime. When readmitted
|
||||
// the read sometimes has to wait and sometimes not. This is to check that
|
||||
// the readmitting a previously admitted reader doesn't leak any units.
|
||||
SEASTAR_THREAD_TEST_CASE(test_reader_concurrency_semaphore_readmission_preserves_units) {
|
||||
simple_schema s;
|
||||
const auto initial_resources = reader_concurrency_semaphore::resources{10, 1024 * 1024};
|
||||
reader_concurrency_semaphore semaphore(initial_resources.count, initial_resources.memory, get_name());
|
||||
|
||||
auto permit = semaphore.make_permit(s.schema().get(), get_name());
|
||||
|
||||
std::optional<reader_permit::resource_units> residue_units;
|
||||
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
const auto have_residue_units = bool(residue_units);
|
||||
|
||||
auto current_resources = initial_resources;
|
||||
if (have_residue_units) {
|
||||
current_resources -= residue_units->resources();
|
||||
}
|
||||
BOOST_REQUIRE(semaphore.available_resources() == current_resources);
|
||||
|
||||
std::optional<reader_permit::resource_units> admitted_units;
|
||||
if (i % 2) {
|
||||
const auto consumed_resources = semaphore.available_resources();
|
||||
semaphore.consume(consumed_resources);
|
||||
|
||||
auto units_fut = permit.wait_admission(1024, db::no_timeout);
|
||||
BOOST_REQUIRE(!units_fut.available());
|
||||
|
||||
semaphore.signal(consumed_resources);
|
||||
admitted_units = units_fut.get();
|
||||
} else {
|
||||
admitted_units = permit.wait_admission(1024, db::no_timeout).get();
|
||||
}
|
||||
|
||||
current_resources -= admitted_units->resources();
|
||||
BOOST_REQUIRE(semaphore.available_resources() == current_resources);
|
||||
|
||||
residue_units.emplace(permit.consume_resources(reader_resources(0, 100)));
|
||||
if (!have_residue_units) {
|
||||
current_resources -= residue_units->resources();
|
||||
}
|
||||
BOOST_REQUIRE(semaphore.available_resources() == current_resources);
|
||||
|
||||
auto handle = semaphore.register_inactive_read(generic_inactive_read::make(make_empty_flat_reader(s.schema(), permit)));
|
||||
(void)handle;
|
||||
BOOST_REQUIRE(semaphore.try_evict_one_inactive_read());
|
||||
}
|
||||
|
||||
BOOST_REQUIRE(semaphore.available_resources() == initial_resources - residue_units->resources());
|
||||
|
||||
residue_units.reset();
|
||||
|
||||
BOOST_REQUIRE(semaphore.available_resources() == initial_resources);
|
||||
}
|
||||
|
||||
// This unit test checks that the semaphore doesn't get into a deadlock
|
||||
// when contended, in the presence of many memory-only reads (that don't
|
||||
// wait for admission). This is tested by simulating the 3 kind of reads we
|
||||
// currently have in the system:
|
||||
// * memory-only: reads that don't pass admission and only own memory.
|
||||
// * admitted: reads that pass admission.
|
||||
// * evictable: admitted reads that are furthermore evictable.
|
||||
//
|
||||
// The test creates and runs a large number of these reads in parallel,
|
||||
// read kinds being selected randomly, then creates a watchdog which
|
||||
// kills the test if no progress is being made.
|
||||
SEASTAR_THREAD_TEST_CASE(test_reader_concurrency_semaphore_forward_progress) {
|
||||
class reader {
|
||||
class skeleton_reader : public flat_mutation_reader::impl {
|
||||
reader_permit::resource_units _base_resources;
|
||||
std::optional<reader_permit::resource_units> _resources;
|
||||
public:
|
||||
skeleton_reader(schema_ptr s, reader_permit permit, reader_permit::resource_units res)
|
||||
: impl(std::move(s), std::move(permit)), _base_resources(std::move(res)) { }
|
||||
virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override {
|
||||
_resources.emplace(_permit.consume_resources(reader_resources(0, tests::random::get_int(1024, 2048))));
|
||||
return make_ready_future<>();
|
||||
}
|
||||
virtual void next_partition() override { }
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) override { return make_ready_future<>(); }
|
||||
virtual future<> fast_forward_to(position_range, db::timeout_clock::time_point timeout) override { return make_ready_future<>(); }
|
||||
};
|
||||
struct reader_visitor {
|
||||
reader& r;
|
||||
future<> operator()(std::monostate& ms) { return r.tick(ms); }
|
||||
future<> operator()(flat_mutation_reader& reader) { return r.tick(reader); }
|
||||
future<> operator()(reader_concurrency_semaphore::inactive_read_handle& handle) { return r.tick(handle); }
|
||||
};
|
||||
|
||||
private:
|
||||
schema_ptr _schema;
|
||||
reader_permit _permit;
|
||||
bool _memory_only = true;
|
||||
bool _evictable = false;
|
||||
std::optional<reader_permit::resource_units> _units;
|
||||
std::variant<std::monostate, flat_mutation_reader, reader_concurrency_semaphore::inactive_read_handle> _reader;
|
||||
|
||||
private:
|
||||
future<> make_reader() {
|
||||
return async([this] {
|
||||
auto res = _permit.consume_memory();
|
||||
if (!_memory_only) {
|
||||
res = _permit.wait_admission(1024, db::no_timeout).get0();
|
||||
}
|
||||
_reader = make_flat_mutation_reader<skeleton_reader>(_schema, _permit, std::move(res));
|
||||
});
|
||||
}
|
||||
future<> tick(std::monostate&) {
|
||||
return async([this] {
|
||||
make_reader().get();
|
||||
tick(std::get<flat_mutation_reader>(_reader)).get();
|
||||
});
|
||||
}
|
||||
future<> tick(flat_mutation_reader& reader) {
|
||||
return async([this, &reader] {
|
||||
reader.fill_buffer(db::no_timeout).get();
|
||||
if (_evictable) {
|
||||
_reader = _permit.semaphore().register_inactive_read(generic_inactive_read::make(std::move(reader)));
|
||||
}
|
||||
});
|
||||
}
|
||||
future<> tick(reader_concurrency_semaphore::inactive_read_handle& handle) {
|
||||
return async([this, &handle] () mutable {
|
||||
if (auto reader = generic_inactive_read::get_reader(_permit.semaphore().unregister_inactive_read(std::move(handle))); reader) {
|
||||
_reader = std::move(*reader);
|
||||
} else {
|
||||
make_reader().get();
|
||||
}
|
||||
tick(std::get<flat_mutation_reader>(_reader)).get();
|
||||
});
|
||||
}
|
||||
|
||||
public:
|
||||
reader(schema_ptr s, reader_permit permit, bool memory_only, bool evictable)
|
||||
: _schema(std::move(s))
|
||||
, _permit(std::move(permit))
|
||||
, _memory_only(memory_only)
|
||||
, _evictable(evictable)
|
||||
, _units(_permit.consume_memory(tests::random::get_int(128, 1024)))
|
||||
{
|
||||
}
|
||||
future<> tick() {
|
||||
return std::visit(reader_visitor{*this}, _reader);
|
||||
}
|
||||
};
|
||||
|
||||
const auto count = 10;
|
||||
const auto num_readers = 512;
|
||||
const auto ticks = 1000;
|
||||
|
||||
simple_schema s;
|
||||
reader_concurrency_semaphore semaphore(count, count * 1024, get_name());
|
||||
|
||||
std::list<std::optional<reader>> readers;
|
||||
unsigned nr_memory_only = 0;
|
||||
unsigned nr_admitted = 0;
|
||||
unsigned nr_evictable = 0;
|
||||
|
||||
for (auto i = 0; i < num_readers; ++i) {
|
||||
const auto memory_only = tests::random::get_bool();
|
||||
const auto evictable = !memory_only && tests::random::get_bool();
|
||||
if (memory_only) {
|
||||
++nr_memory_only;
|
||||
} else if (evictable) {
|
||||
++nr_evictable;
|
||||
} else {
|
||||
++nr_admitted;
|
||||
}
|
||||
readers.emplace_back(reader(s.schema(), semaphore.make_permit(s.schema().get(), fmt::format("reader{}", i)), memory_only, evictable));
|
||||
}
|
||||
|
||||
testlog.info("Created {} readers, memory_only={}, admitted={}, evictable={}", readers.size(), nr_memory_only, nr_admitted, nr_evictable);
|
||||
|
||||
bool watchdog_touched = false;
|
||||
auto watchdog = timer<db::timeout_clock>([&semaphore, &watchdog_touched] {
|
||||
if (!watchdog_touched) {
|
||||
testlog.error("Watchdog detected a deadlock, dumping diagnostics before killing the test: {}", semaphore.dump_diagnostics());
|
||||
semaphore.broken(std::make_exception_ptr(std::runtime_error("test killed by watchdog")));
|
||||
}
|
||||
watchdog_touched = false;
|
||||
});
|
||||
watchdog.arm_periodic(std::chrono::seconds(30));
|
||||
|
||||
parallel_for_each(readers, [&] (std::optional<reader>& r) -> future<> {
|
||||
return async([this, &watchdog_touched, &r] {
|
||||
for (auto i = 0; i < ticks; ++i) {
|
||||
watchdog_touched = true;
|
||||
r->tick().get();
|
||||
}
|
||||
r.reset();
|
||||
watchdog_touched = true;
|
||||
});
|
||||
}).get();
|
||||
}
|
||||
|
||||
static
|
||||
sstables::shared_sstable create_sstable(sstables::test_env& env, schema_ptr s, std::vector<mutation> mutations) {
|
||||
static thread_local auto tmp = tmpdir();
|
||||
@@ -2715,7 +2941,7 @@ SEASTAR_THREAD_TEST_CASE(test_queue_reader) {
|
||||
}
|
||||
}
|
||||
|
||||
// abort()
|
||||
// abort() -- check that consumer is aborted
|
||||
{
|
||||
auto [reader, handle] = make_queue_reader(gen.schema(), tests::make_permit());
|
||||
auto fill_buffer_fut = reader.fill_buffer(db::no_timeout);
|
||||
@@ -2730,6 +2956,28 @@ SEASTAR_THREAD_TEST_CASE(test_queue_reader) {
|
||||
|
||||
BOOST_REQUIRE_THROW(fill_buffer_fut.get(), std::runtime_error);
|
||||
BOOST_REQUIRE_THROW(handle.push(mutation_fragment(*gen.schema(), tests::make_permit(), partition_end{})).get(), std::runtime_error);
|
||||
BOOST_REQUIRE(!reader.is_end_of_stream());
|
||||
}
|
||||
|
||||
// abort() -- check that producer is aborted
|
||||
{
|
||||
auto [reader, handle] = make_queue_reader(gen.schema(), tests::make_permit());
|
||||
reader.set_max_buffer_size(1);
|
||||
|
||||
auto expected_reader = flat_mutation_reader_from_mutations(tests::make_permit(), expected_muts);
|
||||
|
||||
auto push_fut = make_ready_future<>();
|
||||
while (push_fut.available()) {
|
||||
push_fut = handle.push(std::move(*expected_reader(db::no_timeout).get0()));
|
||||
}
|
||||
|
||||
BOOST_REQUIRE(!push_fut.available());
|
||||
|
||||
handle.abort(std::make_exception_ptr<std::runtime_error>(std::runtime_error("error")));
|
||||
|
||||
BOOST_REQUIRE_THROW(reader.fill_buffer(db::no_timeout).get(), std::runtime_error);
|
||||
BOOST_REQUIRE_THROW(push_fut.get(), std::runtime_error);
|
||||
BOOST_REQUIRE(!reader.is_end_of_stream());
|
||||
}
|
||||
|
||||
// Detached handle
|
||||
|
||||
@@ -166,7 +166,7 @@ SEASTAR_TEST_CASE(test_multishard_writer_producer_aborts) {
|
||||
|
||||
namespace {
|
||||
|
||||
class bucket_writer {
|
||||
class test_bucket_writer {
|
||||
schema_ptr _schema;
|
||||
classify_by_timestamp _classify;
|
||||
std::unordered_map<int64_t, std::vector<mutation>>& _buckets;
|
||||
@@ -175,6 +175,17 @@ class bucket_writer {
|
||||
mutation_opt _current_mutation;
|
||||
bool _is_first_mutation = true;
|
||||
|
||||
size_t _throw_after;
|
||||
size_t _mutation_consumed = 0;
|
||||
|
||||
public:
|
||||
class expected_exception : public std::exception {
|
||||
public:
|
||||
virtual const char* what() const noexcept override {
|
||||
return "expected_exception";
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
void check_timestamp(api::timestamp_type ts) {
|
||||
const auto bucket_id = _classify(ts);
|
||||
@@ -223,40 +234,53 @@ private:
|
||||
check_timestamp(rt.tomb.timestamp);
|
||||
}
|
||||
|
||||
void maybe_throw() {
|
||||
if (_mutation_consumed++ >= _throw_after) {
|
||||
throw(expected_exception());
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
bucket_writer(schema_ptr schema, classify_by_timestamp classify, std::unordered_map<int64_t, std::vector<mutation>>& buckets)
|
||||
test_bucket_writer(schema_ptr schema, classify_by_timestamp classify, std::unordered_map<int64_t, std::vector<mutation>>& buckets, size_t throw_after = std::numeric_limits<size_t>::max())
|
||||
: _schema(std::move(schema))
|
||||
, _classify(std::move(classify))
|
||||
, _buckets(buckets) {
|
||||
}
|
||||
, _buckets(buckets)
|
||||
, _throw_after(throw_after)
|
||||
{ }
|
||||
void consume_new_partition(const dht::decorated_key& dk) {
|
||||
maybe_throw();
|
||||
BOOST_REQUIRE(!_current_mutation);
|
||||
_current_mutation = mutation(_schema, dk);
|
||||
}
|
||||
void consume(tombstone partition_tombstone) {
|
||||
maybe_throw();
|
||||
BOOST_REQUIRE(_current_mutation);
|
||||
verify_partition_tombstone(partition_tombstone);
|
||||
_current_mutation->partition().apply(partition_tombstone);
|
||||
}
|
||||
stop_iteration consume(static_row&& sr) {
|
||||
maybe_throw();
|
||||
BOOST_REQUIRE(_current_mutation);
|
||||
verify_static_row(sr);
|
||||
_current_mutation->apply(mutation_fragment(*_schema, tests::make_permit(), std::move(sr)));
|
||||
return stop_iteration::no;
|
||||
}
|
||||
stop_iteration consume(clustering_row&& cr) {
|
||||
maybe_throw();
|
||||
BOOST_REQUIRE(_current_mutation);
|
||||
verify_clustering_row(cr);
|
||||
_current_mutation->apply(mutation_fragment(*_schema, tests::make_permit(), std::move(cr)));
|
||||
return stop_iteration::no;
|
||||
}
|
||||
stop_iteration consume(range_tombstone&& rt) {
|
||||
maybe_throw();
|
||||
BOOST_REQUIRE(_current_mutation);
|
||||
verify_range_tombstone(rt);
|
||||
_current_mutation->apply(mutation_fragment(*_schema, tests::make_permit(), std::move(rt)));
|
||||
return stop_iteration::no;
|
||||
}
|
||||
stop_iteration consume_end_of_partition() {
|
||||
maybe_throw();
|
||||
BOOST_REQUIRE(_current_mutation);
|
||||
BOOST_REQUIRE(_bucket_id);
|
||||
auto& bucket = _buckets[*_bucket_id];
|
||||
@@ -311,7 +335,7 @@ SEASTAR_THREAD_TEST_CASE(test_timestamp_based_splitting_mutation_writer) {
|
||||
|
||||
auto consumer = [&] (flat_mutation_reader bucket_reader) {
|
||||
return do_with(std::move(bucket_reader), [&] (flat_mutation_reader& rd) {
|
||||
return rd.consume(bucket_writer(random_schema.schema(), classify_fn, buckets), db::no_timeout);
|
||||
return rd.consume(test_bucket_writer(random_schema.schema(), classify_fn, buckets), db::no_timeout);
|
||||
});
|
||||
};
|
||||
|
||||
@@ -342,3 +366,53 @@ SEASTAR_THREAD_TEST_CASE(test_timestamp_based_splitting_mutation_writer) {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_timestamp_based_splitting_mutation_writer_abort) {
|
||||
auto random_spec = tests::make_random_schema_specification(
|
||||
get_name(),
|
||||
std::uniform_int_distribution<size_t>(1, 4),
|
||||
std::uniform_int_distribution<size_t>(2, 4),
|
||||
std::uniform_int_distribution<size_t>(2, 8),
|
||||
std::uniform_int_distribution<size_t>(2, 8));
|
||||
auto random_schema = tests::random_schema{tests::random::get_int<uint32_t>(), *random_spec};
|
||||
|
||||
testlog.info("Random schema:\n{}", random_schema.cql());
|
||||
|
||||
auto ts_gen = [&, underlying = tests::default_timestamp_generator()] (std::mt19937& engine,
|
||||
tests::timestamp_destination ts_dest, api::timestamp_type min_timestamp) -> api::timestamp_type {
|
||||
if (ts_dest == tests::timestamp_destination::partition_tombstone ||
|
||||
ts_dest == tests::timestamp_destination::row_marker ||
|
||||
ts_dest == tests::timestamp_destination::row_tombstone ||
|
||||
ts_dest == tests::timestamp_destination::collection_tombstone) {
|
||||
if (tests::random::get_int<int>(0, 10, engine)) {
|
||||
return api::missing_timestamp;
|
||||
}
|
||||
}
|
||||
return underlying(engine, ts_dest, min_timestamp);
|
||||
};
|
||||
|
||||
auto muts = tests::generate_random_mutations(random_schema, ts_gen).get0();
|
||||
|
||||
auto classify_fn = [] (api::timestamp_type ts) {
|
||||
return int64_t(ts % 2);
|
||||
};
|
||||
|
||||
std::unordered_map<int64_t, std::vector<mutation>> buckets;
|
||||
|
||||
int throw_after = tests::random::get_int(muts.size() - 1);
|
||||
testlog.info("Will raise exception after {}/{} mutations", throw_after, muts.size());
|
||||
auto consumer = [&] (flat_mutation_reader bucket_reader) {
|
||||
return do_with(std::move(bucket_reader), [&] (flat_mutation_reader& rd) {
|
||||
return rd.consume(test_bucket_writer(random_schema.schema(), classify_fn, buckets, throw_after), db::no_timeout);
|
||||
});
|
||||
};
|
||||
|
||||
try {
|
||||
segregate_by_timestamp(flat_mutation_reader_from_mutations(tests::make_permit(), muts), classify_fn, std::move(consumer)).get();
|
||||
} catch (const test_bucket_writer::expected_exception&) {
|
||||
BOOST_TEST_PASSPOINT();
|
||||
} catch (const seastar::broken_promise&) {
|
||||
// Tolerated until we properly abort readers
|
||||
BOOST_TEST_PASSPOINT();
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user