Compare commits
92 Commits
scylla-4.0
...
next-4.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4ae9a56466 | ||
|
|
0374c1d040 | ||
|
|
9cb0fe3b33 | ||
|
|
a813ff4da2 | ||
|
|
d5936147f4 | ||
|
|
a3d3b4e185 | ||
|
|
4ca2576c98 | ||
|
|
e99a0c7b89 | ||
|
|
f8c7605657 | ||
|
|
7b9e33dcd4 | ||
|
|
d86a31097a | ||
|
|
bd9d6f8e45 | ||
|
|
11ef23e97a | ||
|
|
2c0eac09ae | ||
|
|
713a7269d0 | ||
|
|
1724301d4d | ||
|
|
9971f2f5db | ||
|
|
ee328c22ca | ||
|
|
3a9c9a8a12 | ||
|
|
c03445871a | ||
|
|
565ac1b092 | ||
|
|
7d1180b98f | ||
|
|
f258e6f6ee | ||
|
|
2708b0d664 | ||
|
|
e31ffbf2e6 | ||
|
|
801994e299 | ||
|
|
3b932078bf | ||
|
|
608f62a0e9 | ||
|
|
d8619d3320 | ||
|
|
4f0c99a187 | ||
|
|
ada79df082 | ||
|
|
1935f2b480 | ||
|
|
44a76ed231 | ||
|
|
aeb49f4915 | ||
|
|
8d6b35ad20 | ||
|
|
b123700ebe | ||
|
|
6786b521f9 | ||
|
|
fda0d1ae8e | ||
|
|
e7cffb978a | ||
|
|
79a1c74921 | ||
|
|
3ee854f9fc | ||
|
|
2b65984d14 | ||
|
|
52d1099d09 | ||
|
|
3a03906377 | ||
|
|
2395a240b4 | ||
|
|
d182c595a1 | ||
|
|
fe9c4611b3 | ||
|
|
29df416720 | ||
|
|
1d3c00572c | ||
|
|
9d6e2c5a71 | ||
|
|
386741e3b7 | ||
|
|
d0fdc3960a | ||
|
|
4035cf4f9f | ||
|
|
09367742b1 | ||
|
|
a18ff57b29 | ||
|
|
4734ba21a7 | ||
|
|
425af4c543 | ||
|
|
55f096d01b | ||
|
|
fc79da5912 | ||
|
|
da9e7080ca | ||
|
|
01b0195c22 | ||
|
|
d05b567a40 | ||
|
|
2c11efbbae | ||
|
|
c60d71dc69 | ||
|
|
79930048db | ||
|
|
82b4f4a6c2 | ||
|
|
5b99195d21 | ||
|
|
edde256228 | ||
|
|
3cf28ac18e | ||
|
|
58b65f61c0 | ||
|
|
466cfb0ca6 | ||
|
|
1cd6f50806 | ||
|
|
3f6fe7328a | ||
|
|
f9dd8608eb | ||
|
|
24a80cbf47 | ||
|
|
6e4edc97ad | ||
|
|
81df28b6f3 | ||
|
|
ea6620e9eb | ||
|
|
19be84dafd | ||
|
|
2ff897d351 | ||
|
|
8fc3300739 | ||
|
|
d2ac7d4b18 | ||
|
|
61706a6789 | ||
|
|
65aa531010 | ||
|
|
4bffd0f522 | ||
|
|
9409fc7290 | ||
|
|
86faf1b3ca | ||
|
|
426295bda9 | ||
|
|
c6fde0e562 | ||
|
|
d9f9e7455b | ||
|
|
e95bcd0f8f | ||
|
|
2ff6e2e122 |
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -15,3 +15,6 @@
|
||||
[submodule "zstd"]
|
||||
path = zstd
|
||||
url = ../zstd
|
||||
[submodule "abseil"]
|
||||
path = abseil
|
||||
url = ../abseil-cpp
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
|
||||
PRODUCT=scylla
|
||||
VERSION=4.0.3
|
||||
VERSION=4.0.11
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
1
abseil
Submodule
1
abseil
Submodule
Submodule abseil added at 2069dc796a
@@ -365,31 +365,35 @@ bool check_compare(const rjson::value* v1, const rjson::value& v2, const Compara
|
||||
|
||||
struct cmp_lt {
|
||||
template <typename T> bool operator()(const T& lhs, const T& rhs) const { return lhs < rhs; }
|
||||
// We cannot use the normal comparison operators like "<" on the bytes
|
||||
// type, because they treat individual bytes as signed but we need to
|
||||
// compare them as *unsigned*. So we need a specialization for bytes.
|
||||
bool operator()(const bytes& lhs, const bytes& rhs) const { return compare_unsigned(lhs, rhs) < 0; }
|
||||
static constexpr const char* diagnostic = "LT operator";
|
||||
};
|
||||
|
||||
struct cmp_le {
|
||||
// bytes only has <, so we cannot use <=.
|
||||
template <typename T> bool operator()(const T& lhs, const T& rhs) const { return lhs < rhs || lhs == rhs; }
|
||||
template <typename T> bool operator()(const T& lhs, const T& rhs) const { return lhs <= rhs; }
|
||||
bool operator()(const bytes& lhs, const bytes& rhs) const { return compare_unsigned(lhs, rhs) <= 0; }
|
||||
static constexpr const char* diagnostic = "LE operator";
|
||||
};
|
||||
|
||||
struct cmp_ge {
|
||||
// bytes only has <, so we cannot use >=.
|
||||
template <typename T> bool operator()(const T& lhs, const T& rhs) const { return rhs < lhs || lhs == rhs; }
|
||||
template <typename T> bool operator()(const T& lhs, const T& rhs) const { return lhs >= rhs; }
|
||||
bool operator()(const bytes& lhs, const bytes& rhs) const { return compare_unsigned(lhs, rhs) >= 0; }
|
||||
static constexpr const char* diagnostic = "GE operator";
|
||||
};
|
||||
|
||||
struct cmp_gt {
|
||||
// bytes only has <, so we cannot use >.
|
||||
template <typename T> bool operator()(const T& lhs, const T& rhs) const { return rhs < lhs; }
|
||||
template <typename T> bool operator()(const T& lhs, const T& rhs) const { return lhs > rhs; }
|
||||
bool operator()(const bytes& lhs, const bytes& rhs) const { return compare_unsigned(lhs, rhs) > 0; }
|
||||
static constexpr const char* diagnostic = "GT operator";
|
||||
};
|
||||
|
||||
// True if v is between lb and ub, inclusive. Throws if lb > ub.
|
||||
template <typename T>
|
||||
bool check_BETWEEN(const T& v, const T& lb, const T& ub) {
|
||||
if (ub < lb) {
|
||||
if (cmp_lt()(ub, lb)) {
|
||||
throw api_error("ValidationException",
|
||||
format("BETWEEN operator requires lower_bound <= upper_bound, but {} > {}", lb, ub));
|
||||
}
|
||||
|
||||
@@ -565,7 +565,7 @@ static void validate_tags(const std::map<sstring, sstring>& tags) {
|
||||
// to races during concurrent updates of the same table. Once Scylla schema updates
|
||||
// are fixed, this issue will automatically get fixed as well.
|
||||
enum class update_tags_action { add_tags, delete_tags };
|
||||
static future<> update_tags(const rjson::value& tags, schema_ptr schema, std::map<sstring, sstring>&& tags_map, update_tags_action action) {
|
||||
static future<> update_tags(service::migration_manager& mm, const rjson::value& tags, schema_ptr schema, std::map<sstring, sstring>&& tags_map, update_tags_action action) {
|
||||
if (action == update_tags_action::add_tags) {
|
||||
for (auto it = tags.Begin(); it != tags.End(); ++it) {
|
||||
const rjson::value& key = (*it)["Key"];
|
||||
@@ -592,24 +592,12 @@ static future<> update_tags(const rjson::value& tags, schema_ptr schema, std::ma
|
||||
}
|
||||
validate_tags(tags_map);
|
||||
|
||||
std::stringstream serialized_tags;
|
||||
serialized_tags << '{';
|
||||
for (auto& tag_entry : tags_map) {
|
||||
serialized_tags << format("'{}':'{}',", tag_entry.first, tag_entry.second);
|
||||
}
|
||||
std::string serialized_tags_str = serialized_tags.str();
|
||||
if (!tags_map.empty()) {
|
||||
serialized_tags_str[serialized_tags_str.size() - 1] = '}'; // trims the last ',' delimiter
|
||||
} else {
|
||||
serialized_tags_str.push_back('}');
|
||||
}
|
||||
|
||||
sstring req = format("ALTER TABLE \"{}\".\"{}\" WITH {} = {}",
|
||||
schema->ks_name(), schema->cf_name(), tags_extension::NAME, serialized_tags_str);
|
||||
return db::execute_cql(std::move(req)).discard_result();
|
||||
schema_builder builder(schema);
|
||||
builder.set_extensions(schema::extensions_map{{sstring(tags_extension::NAME), ::make_shared<tags_extension>(std::move(tags_map))}});
|
||||
return mm.announce_column_family_update(builder.build(), false, std::vector<view_ptr>(), false);
|
||||
}
|
||||
|
||||
static future<> add_tags(service::storage_proxy& proxy, schema_ptr schema, rjson::value& request_info) {
|
||||
static future<> add_tags(service::migration_manager& mm, service::storage_proxy& proxy, schema_ptr schema, rjson::value& request_info) {
|
||||
const rjson::value* tags = rjson::find(request_info, "Tags");
|
||||
if (!tags || !tags->IsArray()) {
|
||||
return make_exception_future<>(api_error("ValidationException", format("Cannot parse tags")));
|
||||
@@ -619,7 +607,7 @@ static future<> add_tags(service::storage_proxy& proxy, schema_ptr schema, rjson
|
||||
}
|
||||
|
||||
std::map<sstring, sstring> tags_map = get_tags_of_table(schema);
|
||||
return update_tags(rjson::copy(*tags), schema, std::move(tags_map), update_tags_action::add_tags);
|
||||
return update_tags(mm, rjson::copy(*tags), schema, std::move(tags_map), update_tags_action::add_tags);
|
||||
}
|
||||
|
||||
future<executor::request_return_type> executor::tag_resource(client_state& client_state, service_permit permit, rjson::value request) {
|
||||
@@ -631,7 +619,7 @@ future<executor::request_return_type> executor::tag_resource(client_state& clien
|
||||
return api_error("AccessDeniedException", "Incorrect resource identifier");
|
||||
}
|
||||
schema_ptr schema = get_table_from_arn(_proxy, std::string_view(arn->GetString(), arn->GetStringLength()));
|
||||
add_tags(_proxy, schema, request).get();
|
||||
add_tags(_mm, _proxy, schema, request).get();
|
||||
return json_string("");
|
||||
});
|
||||
}
|
||||
@@ -652,7 +640,7 @@ future<executor::request_return_type> executor::untag_resource(client_state& cli
|
||||
schema_ptr schema = get_table_from_arn(_proxy, std::string_view(arn->GetString(), arn->GetStringLength()));
|
||||
|
||||
std::map<sstring, sstring> tags_map = get_tags_of_table(schema);
|
||||
update_tags(*tags, schema, std::move(tags_map), update_tags_action::delete_tags).get();
|
||||
update_tags(_mm, *tags, schema, std::move(tags_map), update_tags_action::delete_tags).get();
|
||||
return json_string("");
|
||||
});
|
||||
}
|
||||
@@ -869,7 +857,7 @@ future<executor::request_return_type> executor::create_table(client_state& clien
|
||||
}).then([this, table_info = std::move(table_info), schema] () mutable {
|
||||
future<> f = make_ready_future<>();
|
||||
if (rjson::find(table_info, "Tags")) {
|
||||
f = add_tags(_proxy, schema, table_info);
|
||||
f = add_tags(_mm, _proxy, schema, table_info);
|
||||
}
|
||||
return f.then([table_info = std::move(table_info), schema] () mutable {
|
||||
rjson::value status = rjson::empty_object();
|
||||
@@ -899,15 +887,24 @@ class attribute_collector {
|
||||
void add(bytes&& name, atomic_cell&& cell) {
|
||||
collected.emplace(std::move(name), std::move(cell));
|
||||
}
|
||||
void add(const bytes& name, atomic_cell&& cell) {
|
||||
collected.emplace(name, std::move(cell));
|
||||
}
|
||||
public:
|
||||
attribute_collector() : collected(attrs_type()->get_keys_type()->as_less_comparator()) { }
|
||||
void put(bytes&& name, bytes&& val, api::timestamp_type ts) {
|
||||
add(std::move(name), atomic_cell::make_live(*bytes_type, ts, std::move(val), atomic_cell::collection_member::yes));
|
||||
void put(bytes&& name, const bytes& val, api::timestamp_type ts) {
|
||||
add(std::move(name), atomic_cell::make_live(*bytes_type, ts, val, atomic_cell::collection_member::yes));
|
||||
|
||||
}
|
||||
void put(const bytes& name, const bytes& val, api::timestamp_type ts) {
|
||||
add(name, atomic_cell::make_live(*bytes_type, ts, val, atomic_cell::collection_member::yes));
|
||||
}
|
||||
void del(bytes&& name, api::timestamp_type ts) {
|
||||
add(std::move(name), atomic_cell::make_dead(ts, gc_clock::now()));
|
||||
}
|
||||
void del(const bytes& name, api::timestamp_type ts) {
|
||||
add(name, atomic_cell::make_dead(ts, gc_clock::now()));
|
||||
}
|
||||
collection_mutation_description to_mut() {
|
||||
collection_mutation_description ret;
|
||||
for (auto&& e : collected) {
|
||||
@@ -987,7 +984,7 @@ public:
|
||||
put_or_delete_item(const rjson::value& item, schema_ptr schema, put_item);
|
||||
// put_or_delete_item doesn't keep a reference to schema (so it can be
|
||||
// moved between shards for LWT) so it needs to be given again to build():
|
||||
mutation build(schema_ptr schema, api::timestamp_type ts);
|
||||
mutation build(schema_ptr schema, api::timestamp_type ts) const;
|
||||
const partition_key& pk() const { return _pk; }
|
||||
const clustering_key& ck() const { return _ck; }
|
||||
};
|
||||
@@ -1016,7 +1013,7 @@ put_or_delete_item::put_or_delete_item(const rjson::value& item, schema_ptr sche
|
||||
}
|
||||
}
|
||||
|
||||
mutation put_or_delete_item::build(schema_ptr schema, api::timestamp_type ts) {
|
||||
mutation put_or_delete_item::build(schema_ptr schema, api::timestamp_type ts) const {
|
||||
mutation m(schema, _pk);
|
||||
// If there's no clustering key, a tombstone should be created directly
|
||||
// on a partition, not on a clustering row - otherwise it will look like
|
||||
@@ -1038,7 +1035,7 @@ mutation put_or_delete_item::build(schema_ptr schema, api::timestamp_type ts) {
|
||||
for (auto& c : *_cells) {
|
||||
const column_definition* cdef = schema->get_column_definition(c.column_name);
|
||||
if (!cdef) {
|
||||
attrs_collector.put(std::move(c.column_name), std::move(c.value), ts);
|
||||
attrs_collector.put(c.column_name, c.value, ts);
|
||||
} else {
|
||||
row.cells().apply(*cdef, atomic_cell::make_live(*cdef->type, ts, std::move(c.value)));
|
||||
}
|
||||
@@ -1338,7 +1335,7 @@ public:
|
||||
check_needs_read_before_write(_condition_expression) ||
|
||||
_returnvalues == returnvalues::ALL_OLD;
|
||||
}
|
||||
virtual std::optional<mutation> apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) override {
|
||||
virtual std::optional<mutation> apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) const override {
|
||||
std::unordered_set<std::string> used_attribute_values;
|
||||
std::unordered_set<std::string> used_attribute_names;
|
||||
if (!verify_expected(_request, previous_item) ||
|
||||
@@ -1350,6 +1347,7 @@ public:
|
||||
// efficient than throwing an exception.
|
||||
return {};
|
||||
}
|
||||
_return_attributes = {};
|
||||
if (_returnvalues == returnvalues::ALL_OLD && previous_item) {
|
||||
// previous_item is supposed to have been created with
|
||||
// describe_item(), so has the "Item" attribute:
|
||||
@@ -1416,7 +1414,7 @@ public:
|
||||
check_needs_read_before_write(_condition_expression) ||
|
||||
_returnvalues == returnvalues::ALL_OLD;
|
||||
}
|
||||
virtual std::optional<mutation> apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) override {
|
||||
virtual std::optional<mutation> apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) const override {
|
||||
std::unordered_set<std::string> used_attribute_values;
|
||||
std::unordered_set<std::string> used_attribute_names;
|
||||
if (!verify_expected(_request, previous_item) ||
|
||||
@@ -1428,6 +1426,7 @@ public:
|
||||
// efficient than throwing an exception.
|
||||
return {};
|
||||
}
|
||||
_return_attributes = {};
|
||||
if (_returnvalues == returnvalues::ALL_OLD && previous_item) {
|
||||
rjson::value* item = rjson::find(*previous_item, "Item");
|
||||
if (item) {
|
||||
@@ -1511,7 +1510,7 @@ public:
|
||||
virtual ~put_or_delete_item_cas_request() = default;
|
||||
virtual std::optional<mutation> apply(query::result& qr, const query::partition_slice& slice, api::timestamp_type ts) override {
|
||||
std::optional<mutation> ret;
|
||||
for (put_or_delete_item& mutation_builder : _mutation_builders) {
|
||||
for (const put_or_delete_item& mutation_builder : _mutation_builders) {
|
||||
// We assume all these builders have the same partition.
|
||||
if (ret) {
|
||||
ret->apply(mutation_builder.build(schema, ts));
|
||||
@@ -2336,7 +2335,7 @@ public:
|
||||
|
||||
update_item_operation(service::storage_proxy& proxy, rjson::value&& request);
|
||||
virtual ~update_item_operation() = default;
|
||||
virtual std::optional<mutation> apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) override;
|
||||
virtual std::optional<mutation> apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) const override;
|
||||
bool needs_read_before_write() const;
|
||||
};
|
||||
|
||||
@@ -2400,7 +2399,7 @@ update_item_operation::needs_read_before_write() const {
|
||||
}
|
||||
|
||||
std::optional<mutation>
|
||||
update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) {
|
||||
update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) const {
|
||||
std::unordered_set<std::string> used_attribute_values;
|
||||
std::unordered_set<std::string> used_attribute_names;
|
||||
if (!verify_expected(_request, previous_item) ||
|
||||
|
||||
@@ -83,7 +83,11 @@ protected:
|
||||
// When _returnvalues != NONE, apply() should store here, in JSON form,
|
||||
// the values which are to be returned in the "Attributes" field.
|
||||
// The default null JSON means do not return an Attributes field at all.
|
||||
rjson::value _return_attributes;
|
||||
// This field is marked "mutable" so that the const apply() can modify
|
||||
// it (see explanation below), but note that because apply() may be
|
||||
// called more than once, if apply() will sometimes set this field it
|
||||
// must set it (even if just to the default empty value) every time.
|
||||
mutable rjson::value _return_attributes;
|
||||
public:
|
||||
// The constructor of a rmw_operation subclass should parse the request
|
||||
// and try to discover as many input errors as it can before really
|
||||
@@ -96,7 +100,12 @@ public:
|
||||
// conditional expression, apply() should return an empty optional.
|
||||
// apply() may throw if it encounters input errors not discovered during
|
||||
// the constructor.
|
||||
virtual std::optional<mutation> apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) = 0;
|
||||
// apply() may be called more than once in case of contention, so it must
|
||||
// not change the state saved in the object (issue #7218 was caused by
|
||||
// violating this). We mark apply() "const" to let the compiler validate
|
||||
// this for us. The output-only field _return_attributes is marked
|
||||
// "mutable" above so that apply() can still write to it.
|
||||
virtual std::optional<mutation> apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) const = 0;
|
||||
// Convert the above apply() into the signature needed by cas_request:
|
||||
virtual std::optional<mutation> apply(query::result& qr, const query::partition_slice& slice, api::timestamp_type ts) override;
|
||||
virtual ~rmw_operation() = default;
|
||||
|
||||
@@ -252,8 +252,8 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
for (auto cf : column_families) {
|
||||
column_families_vec.push_back(&db.find_column_family(keyspace, cf));
|
||||
}
|
||||
return parallel_for_each(column_families_vec, [&cm] (column_family* cf) {
|
||||
return cm.perform_cleanup(cf);
|
||||
return parallel_for_each(column_families_vec, [&cm, &db] (column_family* cf) {
|
||||
return cm.perform_cleanup(db, cf);
|
||||
});
|
||||
}).then([]{
|
||||
return make_ready_future<json::json_return_type>(0);
|
||||
|
||||
@@ -1135,7 +1135,7 @@ public:
|
||||
if (r.row().deleted_at()) {
|
||||
touched_parts.set<stats::part_type::ROW_DELETE>();
|
||||
cdc_op = operation::row_delete;
|
||||
if (pirow) {
|
||||
if (pirow && pikey) {
|
||||
for (const column_definition& column: _schema->regular_columns()) {
|
||||
assert(pirow->has(column.name_as_text()));
|
||||
auto& cdef = *_log_schema->get_column_definition(log_data_column_name_bytes(column.name()));
|
||||
|
||||
59
configure.py
59
configure.py
@@ -381,6 +381,7 @@ scylla_tests = set([
|
||||
'test/boost/view_schema_ckey_test',
|
||||
'test/boost/vint_serialization_test',
|
||||
'test/boost/virtual_reader_test',
|
||||
'test/boost/stall_free_test',
|
||||
'test/manual/ec2_snitch_test',
|
||||
'test/manual/gce_snitch_test',
|
||||
'test/manual/gossip',
|
||||
@@ -1265,9 +1266,9 @@ def query_seastar_flags(pc_file, link_static_cxx=False):
|
||||
return cflags, libs
|
||||
|
||||
for mode in build_modes:
|
||||
seastar_cflags, seastar_libs = query_seastar_flags(pc[mode], link_static_cxx=args.staticcxx)
|
||||
modes[mode]['seastar_cflags'] = seastar_cflags
|
||||
modes[mode]['seastar_libs'] = seastar_libs
|
||||
seastar_pc_cflags, seastar_pc_libs = query_seastar_flags(pc[mode], link_static_cxx=args.staticcxx)
|
||||
modes[mode]['seastar_cflags'] = seastar_pc_cflags
|
||||
modes[mode]['seastar_libs'] = seastar_pc_libs
|
||||
|
||||
# We need to use experimental features of the zstd library (to use our own allocators for the (de)compression context),
|
||||
# which are available only when the library is linked statically.
|
||||
@@ -1288,6 +1289,46 @@ def configure_zstd(build_dir, mode):
|
||||
os.makedirs(zstd_build_dir, exist_ok=True)
|
||||
subprocess.check_call(zstd_cmd, shell=False, cwd=zstd_build_dir)
|
||||
|
||||
def configure_abseil(build_dir, mode):
|
||||
abseil_build_dir = os.path.join(build_dir, mode, 'abseil')
|
||||
|
||||
abseil_cflags = seastar_cflags + ' ' + modes[mode]['cxx_ld_flags']
|
||||
cmake_mode = MODE_TO_CMAKE_BUILD_TYPE[mode]
|
||||
abseil_cmake_args = [
|
||||
'-DCMAKE_BUILD_TYPE={}'.format(cmake_mode),
|
||||
'-DCMAKE_INSTALL_PREFIX={}'.format(build_dir + '/inst'), # just to avoid a warning from absl
|
||||
'-DCMAKE_C_COMPILER={}'.format(args.cc),
|
||||
'-DCMAKE_CXX_COMPILER={}'.format(args.cxx),
|
||||
'-DCMAKE_CXX_FLAGS_{}={}'.format(cmake_mode.upper(), abseil_cflags),
|
||||
]
|
||||
|
||||
abseil_cmd = ['cmake', '-G', 'Ninja', os.path.relpath('abseil', abseil_build_dir)] + abseil_cmake_args
|
||||
|
||||
os.makedirs(abseil_build_dir, exist_ok=True)
|
||||
subprocess.check_call(abseil_cmd, shell=False, cwd=abseil_build_dir)
|
||||
|
||||
abseil_libs = ['absl/' + lib for lib in [
|
||||
'container/libabsl_hashtablez_sampler.a',
|
||||
'container/libabsl_raw_hash_set.a',
|
||||
'synchronization/libabsl_synchronization.a',
|
||||
'synchronization/libabsl_graphcycles_internal.a',
|
||||
'debugging/libabsl_stacktrace.a',
|
||||
'debugging/libabsl_symbolize.a',
|
||||
'debugging/libabsl_debugging_internal.a',
|
||||
'debugging/libabsl_demangle_internal.a',
|
||||
'time/libabsl_time.a',
|
||||
'time/libabsl_time_zone.a',
|
||||
'numeric/libabsl_int128.a',
|
||||
'hash/libabsl_city.a',
|
||||
'hash/libabsl_hash.a',
|
||||
'base/libabsl_malloc_internal.a',
|
||||
'base/libabsl_spinlock_wait.a',
|
||||
'base/libabsl_base.a',
|
||||
'base/libabsl_dynamic_annotations.a',
|
||||
'base/libabsl_raw_logging_internal.a',
|
||||
'base/libabsl_exponential_biased.a',
|
||||
'base/libabsl_throw_delegate.a']]
|
||||
|
||||
args.user_cflags += " " + pkg_config('jsoncpp', '--cflags')
|
||||
args.user_cflags += ' -march=' + args.target
|
||||
libs = ' '.join([maybe_static(args.staticyamlcpp, '-lyaml-cpp'), '-latomic', '-llz4', '-lz', '-lsnappy', pkg_config('jsoncpp', '--libs'),
|
||||
@@ -1316,6 +1357,7 @@ if any(filter(thrift_version.startswith, thrift_boost_versions)):
|
||||
for pkg in pkgs:
|
||||
args.user_cflags += ' ' + pkg_config(pkg, '--cflags')
|
||||
libs += ' ' + pkg_config(pkg, '--libs')
|
||||
args.user_cflags += '-I abseil'
|
||||
user_cflags = args.user_cflags + ' -fvisibility=hidden'
|
||||
user_ldflags = args.user_ldflags + ' -fvisibility=hidden'
|
||||
if args.staticcxx:
|
||||
@@ -1346,6 +1388,9 @@ else:
|
||||
for mode in build_modes:
|
||||
configure_zstd(outdir, mode)
|
||||
|
||||
for mode in build_modes:
|
||||
configure_abseil(outdir, mode)
|
||||
|
||||
# configure.py may run automatically from an already-existing build.ninja.
|
||||
# If the user interrupts configure.py in the middle, we need build.ninja
|
||||
# to remain in a valid state. So we write our output to a temporary
|
||||
@@ -1480,6 +1525,8 @@ with open(buildfile_tmp, 'w') as f:
|
||||
objs.extend(['$builddir/' + mode + '/' + artifact for artifact in [
|
||||
'libdeflate/libdeflate.a',
|
||||
'zstd/lib/libzstd.a',
|
||||
] + [
|
||||
'abseil/' + x for x in abseil_libs
|
||||
]])
|
||||
objs.append('$builddir/' + mode + '/gen/utils/gz/crc_combine_table.o')
|
||||
if binary in tests:
|
||||
@@ -1621,6 +1668,12 @@ with open(buildfile_tmp, 'w') as f:
|
||||
f.write(' subdir = build/{mode}/zstd\n'.format(**locals()))
|
||||
f.write(' target = libzstd.a\n'.format(**locals()))
|
||||
|
||||
for lib in abseil_libs:
|
||||
f.write('build build/{mode}/abseil/{lib}: ninja\n'.format(**locals()))
|
||||
f.write(' pool = submodule_pool\n')
|
||||
f.write(' subdir = build/{mode}/abseil\n'.format(**locals()))
|
||||
f.write(' target = {lib}\n'.format(**locals()))
|
||||
|
||||
mode = 'dev' if 'dev' in modes else modes[0]
|
||||
f.write('build checkheaders: phony || {}\n'.format(' '.join(['$builddir/{}/{}.o'.format(mode, hh) for hh in headers])))
|
||||
|
||||
|
||||
@@ -267,10 +267,13 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
/// The same as `impl_max_function_for' but without knowledge of `Type'.
|
||||
/// The same as `impl_max_function_for' but without compile-time dependency on `Type'.
|
||||
class impl_max_dynamic_function final : public aggregate_function::aggregate {
|
||||
data_type _io_type;
|
||||
opt_bytes _max;
|
||||
public:
|
||||
impl_max_dynamic_function(data_type io_type) : _io_type(std::move(io_type)) {}
|
||||
|
||||
virtual void reset() override {
|
||||
_max = {};
|
||||
}
|
||||
@@ -278,12 +281,11 @@ public:
|
||||
return _max.value_or(bytes{});
|
||||
}
|
||||
virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
|
||||
if (!values[0]) {
|
||||
if (values.empty() || !values[0]) {
|
||||
return;
|
||||
}
|
||||
const auto val = *values[0];
|
||||
if (!_max || *_max < val) {
|
||||
_max = val;
|
||||
if (!_max || _io_type->less(*_max, *values[0])) {
|
||||
_max = values[0];
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -298,10 +300,13 @@ public:
|
||||
};
|
||||
|
||||
class max_dynamic_function final : public native_aggregate_function {
|
||||
data_type _io_type;
|
||||
public:
|
||||
max_dynamic_function(data_type io_type) : native_aggregate_function("max", io_type, { io_type }) {}
|
||||
max_dynamic_function(data_type io_type)
|
||||
: native_aggregate_function("max", io_type, { io_type })
|
||||
, _io_type(std::move(io_type)) {}
|
||||
virtual std::unique_ptr<aggregate> new_aggregate() override {
|
||||
return std::make_unique<impl_max_dynamic_function>();
|
||||
return std::make_unique<impl_max_dynamic_function>(_io_type);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -358,10 +363,13 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
/// The same as `impl_min_function_for' but without knowledge of `Type'.
|
||||
/// The same as `impl_min_function_for' but without compile-time dependency on `Type'.
|
||||
class impl_min_dynamic_function final : public aggregate_function::aggregate {
|
||||
data_type _io_type;
|
||||
opt_bytes _min;
|
||||
public:
|
||||
impl_min_dynamic_function(data_type io_type) : _io_type(std::move(io_type)) {}
|
||||
|
||||
virtual void reset() override {
|
||||
_min = {};
|
||||
}
|
||||
@@ -369,12 +377,11 @@ public:
|
||||
return _min.value_or(bytes{});
|
||||
}
|
||||
virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
|
||||
if (!values[0]) {
|
||||
if (values.empty() || !values[0]) {
|
||||
return;
|
||||
}
|
||||
const auto val = *values[0];
|
||||
if (!_min || val < *_min) {
|
||||
_min = val;
|
||||
if (!_min || _io_type->less(*values[0], *_min)) {
|
||||
_min = values[0];
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -389,10 +396,13 @@ public:
|
||||
};
|
||||
|
||||
class min_dynamic_function final : public native_aggregate_function {
|
||||
data_type _io_type;
|
||||
public:
|
||||
min_dynamic_function(data_type io_type) : native_aggregate_function("min", io_type, { io_type }) {}
|
||||
min_dynamic_function(data_type io_type)
|
||||
: native_aggregate_function("min", io_type, { io_type })
|
||||
, _io_type(std::move(io_type)) {}
|
||||
virtual std::unique_ptr<aggregate> new_aggregate() override {
|
||||
return std::make_unique<impl_min_dynamic_function>();
|
||||
return std::make_unique<impl_min_dynamic_function>(_io_type);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -87,17 +87,14 @@ template<typename ToType>
|
||||
std::function<data_value(data_value)> make_castas_fctn_from_decimal_to_float() {
|
||||
return [](data_value from) -> data_value {
|
||||
auto val_from = value_cast<big_decimal>(from);
|
||||
boost::multiprecision::cpp_int ten(10);
|
||||
boost::multiprecision::cpp_rational r = val_from.unscaled_value();
|
||||
r /= boost::multiprecision::pow(ten, val_from.scale());
|
||||
return static_cast<ToType>(r);
|
||||
return static_cast<ToType>(val_from.as_rational());
|
||||
};
|
||||
}
|
||||
|
||||
static utils::multiprecision_int from_decimal_to_cppint(const data_value& from) {
|
||||
const auto& val_from = value_cast<big_decimal>(from);
|
||||
boost::multiprecision::cpp_int ten(10);
|
||||
return boost::multiprecision::cpp_int(val_from.unscaled_value() / boost::multiprecision::pow(ten, val_from.scale()));
|
||||
auto r = val_from.as_rational();
|
||||
return utils::multiprecision_int(numerator(r)/denominator(r));
|
||||
}
|
||||
|
||||
template<typename ToType>
|
||||
|
||||
@@ -357,7 +357,12 @@ lists::setter_by_uuid::execute(mutation& m, const clustering_key_prefix& prefix,
|
||||
|
||||
collection_mutation_description mut;
|
||||
mut.cells.reserve(1);
|
||||
mut.cells.emplace_back(to_bytes(*index), params.make_cell(*ltype->value_comparator(), *value, atomic_cell::collection_member::yes));
|
||||
|
||||
if (!value) {
|
||||
mut.cells.emplace_back(to_bytes(*index), params.make_dead_cell());
|
||||
} else {
|
||||
mut.cells.emplace_back(to_bytes(*index), params.make_cell(*ltype->value_comparator(), *value, atomic_cell::collection_member::yes));
|
||||
}
|
||||
|
||||
m.set_cell(prefix, column, mut.serialize(*ltype));
|
||||
}
|
||||
|
||||
@@ -49,7 +49,7 @@ relation::to_column_definition(const schema& schema, const column_identifier::ra
|
||||
auto id = entity.prepare_column_identifier(schema);
|
||||
auto def = get_column_definition(schema, *id);
|
||||
if (!def || def->is_hidden_from_cql()) {
|
||||
throw exceptions::unrecognized_entity_exception(id, shared_from_this());
|
||||
throw exceptions::unrecognized_entity_exception(*id, to_string());
|
||||
}
|
||||
return *def;
|
||||
}
|
||||
|
||||
@@ -417,7 +417,7 @@ std::vector<const column_definition*> statement_restrictions::get_column_defs_fo
|
||||
_clustering_columns_restrictions->num_prefix_columns_that_need_not_be_filtered();
|
||||
for (auto&& cdef : _clustering_columns_restrictions->get_column_defs()) {
|
||||
::shared_ptr<single_column_restriction> restr;
|
||||
if (single_pk_restrs) {
|
||||
if (single_ck_restrs) {
|
||||
auto it = single_ck_restrs->restrictions().find(cdef);
|
||||
if (it != single_ck_restrs->restrictions().end()) {
|
||||
restr = dynamic_pointer_cast<single_column_restriction>(it->second);
|
||||
@@ -624,9 +624,6 @@ bool single_column_restriction::EQ::is_satisfied_by(const schema& schema,
|
||||
const row& cells,
|
||||
const query_options& options,
|
||||
gc_clock::time_point now) const {
|
||||
if (_column_def.type->is_counter()) {
|
||||
fail(unimplemented::cause::COUNTERS);
|
||||
}
|
||||
auto operand = value(options);
|
||||
if (operand) {
|
||||
auto cell_value = get_value(schema, key, ckey, cells, now);
|
||||
@@ -641,9 +638,6 @@ bool single_column_restriction::EQ::is_satisfied_by(const schema& schema,
|
||||
}
|
||||
|
||||
bool single_column_restriction::EQ::is_satisfied_by(bytes_view data, const query_options& options) const {
|
||||
if (_column_def.type->is_counter()) {
|
||||
fail(unimplemented::cause::COUNTERS);
|
||||
}
|
||||
auto operand = value(options);
|
||||
return operand && _column_def.type->compare(*operand, data) == 0;
|
||||
}
|
||||
@@ -654,9 +648,6 @@ bool single_column_restriction::IN::is_satisfied_by(const schema& schema,
|
||||
const row& cells,
|
||||
const query_options& options,
|
||||
gc_clock::time_point now) const {
|
||||
if (_column_def.type->is_counter()) {
|
||||
fail(unimplemented::cause::COUNTERS);
|
||||
}
|
||||
auto cell_value = get_value(schema, key, ckey, cells, now);
|
||||
if (!cell_value) {
|
||||
return false;
|
||||
@@ -670,9 +661,6 @@ bool single_column_restriction::IN::is_satisfied_by(const schema& schema,
|
||||
}
|
||||
|
||||
bool single_column_restriction::IN::is_satisfied_by(bytes_view data, const query_options& options) const {
|
||||
if (_column_def.type->is_counter()) {
|
||||
fail(unimplemented::cause::COUNTERS);
|
||||
}
|
||||
auto operands = values(options);
|
||||
return boost::algorithm::any_of(operands, [this, &data] (const bytes_opt& operand) {
|
||||
return operand && _column_def.type->compare(*operand, data) == 0;
|
||||
@@ -697,6 +685,11 @@ static query::range<bytes_view> to_range(const term_slice& slice, const query_op
|
||||
extract_bound(statements::bound::END));
|
||||
}
|
||||
|
||||
static bool contains_without_wraparound(
|
||||
const query::range<bytes_view>& range, bytes_view value, const serialized_tri_compare& cmp) {
|
||||
return !range.is_wrap_around(cmp) && range.contains(value, cmp);
|
||||
}
|
||||
|
||||
bool single_column_restriction::slice::is_satisfied_by(const schema& schema,
|
||||
const partition_key& key,
|
||||
const clustering_key_prefix& ckey,
|
||||
@@ -711,15 +704,14 @@ bool single_column_restriction::slice::is_satisfied_by(const schema& schema,
|
||||
return false;
|
||||
}
|
||||
return cell_value->with_linearized([&] (bytes_view cell_value_bv) {
|
||||
return to_range(_slice, options).contains(cell_value_bv, _column_def.type->as_tri_comparator());
|
||||
return contains_without_wraparound(to_range(_slice, options),
|
||||
cell_value_bv, _column_def.type->as_tri_comparator());
|
||||
});
|
||||
}
|
||||
|
||||
bool single_column_restriction::slice::is_satisfied_by(bytes_view data, const query_options& options) const {
|
||||
if (_column_def.type->is_counter()) {
|
||||
fail(unimplemented::cause::COUNTERS);
|
||||
}
|
||||
return to_range(_slice, options).contains(data, _column_def.type->underlying_type()->as_tri_comparator());
|
||||
return contains_without_wraparound(to_range(_slice, options),
|
||||
data, _column_def.type->underlying_type()->as_tri_comparator());
|
||||
}
|
||||
|
||||
bool single_column_restriction::contains::is_satisfied_by(const schema& schema,
|
||||
@@ -728,9 +720,6 @@ bool single_column_restriction::contains::is_satisfied_by(const schema& schema,
|
||||
const row& cells,
|
||||
const query_options& options,
|
||||
gc_clock::time_point now) const {
|
||||
if (_column_def.type->is_counter()) {
|
||||
fail(unimplemented::cause::COUNTERS);
|
||||
}
|
||||
if (!_column_def.type->is_collection()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -207,6 +207,9 @@ void alter_table_statement::add_column(const schema& schema, const table& cf, sc
|
||||
"because a collection with the same name and a different type has already been used in the past", column_name));
|
||||
}
|
||||
}
|
||||
if (type->is_counter() && !schema.is_counter()) {
|
||||
throw exceptions::configuration_exception(format("Cannot add a counter column ({}) in a non counter column family", column_name));
|
||||
}
|
||||
|
||||
cfm.with_column(column_name.name(), type, is_static ? column_kind::static_column : column_kind::regular_column);
|
||||
|
||||
@@ -222,7 +225,7 @@ void alter_table_statement::add_column(const schema& schema, const table& cf, sc
|
||||
schema_builder builder(view);
|
||||
if (view->view_info()->include_all_columns()) {
|
||||
builder.with_column(column_name.name(), type);
|
||||
} else if (view->view_info()->base_non_pk_columns_in_view_pk().empty()) {
|
||||
} else if (!view->view_info()->has_base_non_pk_columns_in_view_pk()) {
|
||||
db::view::create_virtual_column(builder, column_name.name(), type);
|
||||
}
|
||||
view_updates.push_back(view_ptr(builder.build()));
|
||||
|
||||
@@ -255,7 +255,9 @@ void cf_prop_defs::apply_to_builder(schema_builder& builder, schema::extensions_
|
||||
}
|
||||
}
|
||||
|
||||
builder.set_default_time_to_live(gc_clock::duration(get_int(KW_DEFAULT_TIME_TO_LIVE, DEFAULT_DEFAULT_TIME_TO_LIVE)));
|
||||
if (has_property(KW_DEFAULT_TIME_TO_LIVE)) {
|
||||
builder.set_default_time_to_live(gc_clock::duration(get_int(KW_DEFAULT_TIME_TO_LIVE, DEFAULT_DEFAULT_TIME_TO_LIVE)));
|
||||
}
|
||||
|
||||
if (has_property(KW_SPECULATIVE_RETRY)) {
|
||||
builder.set_speculative_retry(get_string(KW_SPECULATIVE_RETRY, builder.get_speculative_retry().to_sstring()));
|
||||
|
||||
@@ -1367,8 +1367,8 @@ select_statement::prepare_restrictions(database& db,
|
||||
return ::make_shared<restrictions::statement_restrictions>(db, schema, statement_type::SELECT, std::move(_where_clause), bound_names,
|
||||
selection->contains_only_static_columns(), selection->contains_a_collection(), for_view, allow_filtering);
|
||||
} catch (const exceptions::unrecognized_entity_exception& e) {
|
||||
if (contains_alias(*e.entity)) {
|
||||
throw exceptions::invalid_request_exception(format("Aliases aren't allowed in the where clause ('{}')", e.relation->to_string()));
|
||||
if (contains_alias(e.entity)) {
|
||||
throw exceptions::invalid_request_exception(format("Aliases aren't allowed in the where clause ('{}')", e.relation_str));
|
||||
}
|
||||
throw;
|
||||
}
|
||||
|
||||
21
database.cc
21
database.cc
@@ -1323,7 +1323,7 @@ future<mutation> database::do_apply_counter_update(column_family& cf, const froz
|
||||
// counter state for each modified cell...
|
||||
|
||||
tracing::trace(trace_state, "Reading counter values from the CF");
|
||||
return counter_write_query(m_schema, cf.as_mutation_source(), m.decorated_key(), slice, trace_state)
|
||||
return counter_write_query(m_schema, cf.as_mutation_source(), m.decorated_key(), slice, trace_state, timeout)
|
||||
.then([this, &cf, &m, m_schema, timeout, trace_state] (auto mopt) {
|
||||
// ...now, that we got existing state of all affected counter
|
||||
// cells we can look for our shard in each of them, increment
|
||||
@@ -1827,7 +1827,11 @@ future<> database::truncate(const keyspace& ks, column_family& cf, timestamp_fun
|
||||
// TODO: indexes.
|
||||
// Note: since discard_sstables was changed to only count tables owned by this shard,
|
||||
// we can get zero rp back. Changed assert, and ensure we save at least low_mark.
|
||||
assert(low_mark <= rp || rp == db::replay_position());
|
||||
// #6995 - the assert below was broken in c2c6c71 and remained so for many years.
|
||||
// We nowadays do not flush tables with sstables but autosnapshot=false. This means
|
||||
// the low_mark assertion does not hold, because we maybe/probably never got around to
|
||||
// creating the sstables that would create them.
|
||||
assert(!should_flush || low_mark <= rp || rp == db::replay_position());
|
||||
rp = std::max(low_mark, rp);
|
||||
return truncate_views(cf, truncated_at, should_flush).then([&cf, truncated_at, rp] {
|
||||
// save_truncation_record() may actually fail after we cached the truncation time
|
||||
@@ -2005,9 +2009,10 @@ flat_mutation_reader make_multishard_streaming_reader(distributed<database>& db,
|
||||
reader_concurrency_semaphore* semaphore;
|
||||
};
|
||||
distributed<database>& _db;
|
||||
utils::UUID _table_id;
|
||||
std::vector<reader_context> _contexts;
|
||||
public:
|
||||
explicit streaming_reader_lifecycle_policy(distributed<database>& db) : _db(db), _contexts(smp::count) {
|
||||
streaming_reader_lifecycle_policy(distributed<database>& db, utils::UUID table_id) : _db(db), _table_id(table_id), _contexts(smp::count) {
|
||||
}
|
||||
virtual flat_mutation_reader create_reader(
|
||||
schema_ptr schema,
|
||||
@@ -2036,7 +2041,12 @@ flat_mutation_reader make_multishard_streaming_reader(distributed<database>& db,
|
||||
});
|
||||
}
|
||||
virtual reader_concurrency_semaphore& semaphore() override {
|
||||
return *_contexts[engine().cpu_id()].semaphore;
|
||||
const auto shard = engine().cpu_id();
|
||||
if (!_contexts[shard].semaphore) {
|
||||
auto& cf = _db.local().find_column_family(_table_id);
|
||||
_contexts[shard].semaphore = &cf.streaming_read_concurrency_semaphore();
|
||||
}
|
||||
return *_contexts[shard].semaphore;
|
||||
}
|
||||
};
|
||||
auto ms = mutation_source([&db] (schema_ptr s,
|
||||
@@ -2047,7 +2057,8 @@ flat_mutation_reader make_multishard_streaming_reader(distributed<database>& db,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
streamed_mutation::forwarding,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
return make_multishard_combining_reader(make_shared<streaming_reader_lifecycle_policy>(db), std::move(s), pr, ps, pc,
|
||||
auto table_id = s->id();
|
||||
return make_multishard_combining_reader(make_shared<streaming_reader_lifecycle_policy>(db, table_id), std::move(s), pr, ps, pc,
|
||||
std::move(trace_state), fwd_mr);
|
||||
});
|
||||
auto&& full_slice = schema->full_slice();
|
||||
|
||||
@@ -55,6 +55,7 @@
|
||||
#include <limits>
|
||||
#include <cstddef>
|
||||
#include "schema_fwd.hh"
|
||||
#include "db/view/view.hh"
|
||||
#include "db/schema_features.hh"
|
||||
#include "gms/feature.hh"
|
||||
#include "timestamp.hh"
|
||||
@@ -885,7 +886,7 @@ public:
|
||||
lw_shared_ptr<const sstable_list> get_sstables_including_compacted_undeleted() const;
|
||||
const std::vector<sstables::shared_sstable>& compacted_undeleted_sstables() const;
|
||||
std::vector<sstables::shared_sstable> select_sstables(const dht::partition_range& range) const;
|
||||
std::vector<sstables::shared_sstable> candidates_for_compaction() const;
|
||||
std::vector<sstables::shared_sstable> non_staging_sstables() const;
|
||||
std::vector<sstables::shared_sstable> sstables_need_rewrite() const;
|
||||
size_t sstables_count() const;
|
||||
std::vector<uint64_t> sstable_count_per_level() const;
|
||||
@@ -981,8 +982,9 @@ public:
|
||||
return *_config.sstables_manager;
|
||||
}
|
||||
|
||||
// Reader's schema must be the same as the base schema of each of the views.
|
||||
future<> populate_views(
|
||||
std::vector<view_ptr>,
|
||||
std::vector<db::view::view_and_base>,
|
||||
dht::token base_token,
|
||||
flat_mutation_reader&&);
|
||||
|
||||
@@ -998,7 +1000,7 @@ private:
|
||||
future<row_locker::lock_holder> do_push_view_replica_updates(const schema_ptr& s, mutation&& m, db::timeout_clock::time_point timeout, mutation_source&& source, const io_priority_class& io_priority) const;
|
||||
std::vector<view_ptr> affected_views(const schema_ptr& base, const mutation& update) const;
|
||||
future<> generate_and_propagate_view_updates(const schema_ptr& base,
|
||||
std::vector<view_ptr>&& views,
|
||||
std::vector<db::view::view_and_base>&& views,
|
||||
mutation&& m,
|
||||
flat_mutation_reader_opt existings) const;
|
||||
|
||||
|
||||
@@ -520,7 +520,7 @@ public:
|
||||
_segment_manager->totals.total_size_on_disk -= size_on_disk();
|
||||
_segment_manager->totals.total_size -= (size_on_disk() + _buffer.size_bytes());
|
||||
_segment_manager->add_file_to_delete(_file_name, _desc);
|
||||
} else {
|
||||
} else if (_segment_manager->cfg.warn_about_segments_left_on_disk_after_shutdown) {
|
||||
clogger.warn("Segment {} is dirty and is left on disk.", *this);
|
||||
}
|
||||
}
|
||||
@@ -1293,7 +1293,7 @@ future<db::commitlog::segment_manager::sseg_ptr> db::commitlog::segment_manager:
|
||||
auto fut = open_file_dma(filename, flags, opt);
|
||||
if (cfg.extensions && !cfg.extensions->commitlog_file_extensions().empty()) {
|
||||
for (auto * ext : cfg.extensions->commitlog_file_extensions()) {
|
||||
fut = fut.then([ext, filename, flags](file f) {
|
||||
fut = close_on_failure(std::move(fut), [ext, filename, flags](file f) {
|
||||
return ext->wrap_file(filename, f, flags).then([f](file nf) mutable {
|
||||
return nf ? nf : std::move(f);
|
||||
});
|
||||
|
||||
@@ -137,6 +137,7 @@ public:
|
||||
|
||||
bool reuse_segments = true;
|
||||
bool use_o_dsync = false;
|
||||
bool warn_about_segments_left_on_disk_after_shutdown = true;
|
||||
|
||||
const db::extensions * extensions = nullptr;
|
||||
};
|
||||
|
||||
@@ -299,7 +299,7 @@ future<> db::commitlog_replayer::impl::process(stats* s, commitlog::buffer_and_r
|
||||
mutation m(cf.schema(), fm.decorated_key(*cf.schema()));
|
||||
converting_mutation_partition_applier v(cm, *cf.schema(), m.partition());
|
||||
fm.partition().accept(cm, v);
|
||||
return do_with(std::move(m), [&db, &cf] (mutation m) {
|
||||
return do_with(std::move(m), [&db, &cf] (const mutation& m) {
|
||||
return db.apply_in_memory(m, cf, db::rp_handle(), db::no_timeout);
|
||||
});
|
||||
} else {
|
||||
|
||||
@@ -224,7 +224,9 @@ future<> manager::end_point_hints_manager::stop(drain should_drain) noexcept {
|
||||
with_lock(file_update_mutex(), [this] {
|
||||
if (_hints_store_anchor) {
|
||||
hints_store_ptr tmp = std::exchange(_hints_store_anchor, nullptr);
|
||||
return tmp->shutdown().finally([tmp] {});
|
||||
return tmp->shutdown().finally([tmp] {
|
||||
return tmp->release();
|
||||
}).finally([tmp] {});
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}).handle_exception([&eptr] (auto e) { eptr = std::move(e); }).get();
|
||||
@@ -326,6 +328,10 @@ future<db::commitlog> manager::end_point_hints_manager::add_store() noexcept {
|
||||
// HH doesn't utilize the flow that benefits from reusing segments.
|
||||
// Therefore let's simply disable it to avoid any possible confusion.
|
||||
cfg.reuse_segments = false;
|
||||
// HH leaves segments on disk after commitlog shutdown, and later reads
|
||||
// them when commitlog is re-created. This is expected to happen regularly
|
||||
// during standard HH workload, so no need to print a warning about it.
|
||||
cfg.warn_about_segments_left_on_disk_after_shutdown = false;
|
||||
|
||||
return commitlog::create_commitlog(std::move(cfg)).then([this] (commitlog l) {
|
||||
// add_store() is triggered every time hint files are forcefully flushed to I/O (every hints_flush_period).
|
||||
@@ -352,7 +358,9 @@ future<> manager::end_point_hints_manager::flush_current_hints() noexcept {
|
||||
return futurize_apply([this] {
|
||||
return with_lock(file_update_mutex(), [this]() -> future<> {
|
||||
return get_or_load().then([] (hints_store_ptr cptr) {
|
||||
return cptr->shutdown();
|
||||
return cptr->shutdown().finally([cptr] {
|
||||
return cptr->release();
|
||||
}).finally([cptr] {});
|
||||
}).then([this] {
|
||||
// Un-hold the commitlog object. Since we are under the exclusive _file_update_mutex lock there are no
|
||||
// other hints_store_ptr copies and this would destroy the commitlog shared value.
|
||||
|
||||
@@ -822,6 +822,14 @@ future<> merge_schema(distributed<service::storage_proxy>& proxy, gms::feature_s
|
||||
});
|
||||
}
|
||||
|
||||
future<> recalculate_schema_version(distributed<service::storage_proxy>& proxy, gms::feature_service& feat) {
|
||||
return merge_lock().then([&proxy, &feat] {
|
||||
return update_schema_version_and_announce(proxy, feat.cluster_schema_features());
|
||||
}).finally([] {
|
||||
return merge_unlock();
|
||||
});
|
||||
}
|
||||
|
||||
future<> merge_schema(distributed<service::storage_proxy>& proxy, std::vector<mutation> mutations, bool do_flush)
|
||||
{
|
||||
return merge_lock().then([&proxy, mutations = std::move(mutations), do_flush] () mutable {
|
||||
|
||||
@@ -170,6 +170,13 @@ future<> merge_schema(distributed<service::storage_proxy>& proxy, gms::feature_s
|
||||
|
||||
future<> merge_schema(distributed<service::storage_proxy>& proxy, std::vector<mutation> mutations, bool do_flush);
|
||||
|
||||
// Recalculates the local schema version and publishes it in gossip.
|
||||
//
|
||||
// It is safe to call concurrently with recalculate_schema_version() and merge_schema() in which case it
|
||||
// is guaranteed that the schema version we end up with after all calls will reflect the most recent state
|
||||
// of feature_service and schema tables.
|
||||
future<> recalculate_schema_version(distributed<service::storage_proxy>& proxy, gms::feature_service& feat);
|
||||
|
||||
future<std::set<sstring>> merge_keyspaces(distributed<service::storage_proxy>& proxy, schema_result&& before, schema_result&& after);
|
||||
|
||||
std::vector<mutation> make_create_keyspace_mutations(lw_shared_ptr<keyspace_metadata> keyspace, api::timestamp_type timestamp, bool with_tables_and_types_and_functions = true);
|
||||
|
||||
@@ -203,6 +203,7 @@ schema_ptr batchlog() {
|
||||
// operations on resulting CFMetaData:
|
||||
// .compactionStrategyClass(LeveledCompactionStrategy.class);
|
||||
)));
|
||||
builder.set_gc_grace_seconds(0);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
builder.set_wait_for_sync_to_commitlog(true);
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
@@ -226,6 +227,7 @@ schema_ptr built_indexes() {
|
||||
// comment
|
||||
"built column indexes"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(0);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::yes);
|
||||
}();
|
||||
@@ -272,6 +274,7 @@ schema_ptr built_indexes() {
|
||||
// comment
|
||||
"information about the local node"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(0);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
builder.remove_column("scylla_cpu_sharding_algorithm");
|
||||
builder.remove_column("scylla_nr_shards");
|
||||
@@ -307,6 +310,7 @@ schema_ptr built_indexes() {
|
||||
// comment
|
||||
"information about known peers in the cluster"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(0);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
@@ -331,6 +335,7 @@ schema_ptr built_indexes() {
|
||||
// comment
|
||||
"events related to peers"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(0);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
@@ -353,6 +358,7 @@ schema_ptr built_indexes() {
|
||||
// comment
|
||||
"ranges requested for transfer"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(0);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
@@ -490,6 +496,7 @@ schema_ptr size_estimates() {
|
||||
// comment
|
||||
"partitions larger than specified threshold"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(0);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
@@ -510,6 +517,7 @@ static schema_ptr large_rows() {
|
||||
.with_column("compaction_time", timestamp_type)
|
||||
.set_comment("rows larger than specified threshold")
|
||||
.with_version(generate_schema_version(id))
|
||||
.set_gc_grace_seconds(0)
|
||||
.build();
|
||||
}();
|
||||
return large_rows;
|
||||
@@ -530,6 +538,7 @@ static schema_ptr large_cells() {
|
||||
.with_column("compaction_time", timestamp_type)
|
||||
.set_comment("cells larger than specified threshold")
|
||||
.with_version(generate_schema_version(id))
|
||||
.set_gc_grace_seconds(0)
|
||||
.build();
|
||||
}();
|
||||
return large_cells;
|
||||
@@ -553,6 +562,7 @@ static schema_ptr large_cells() {
|
||||
// comment
|
||||
"Scylla specific information about the local node"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(0);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
@@ -666,6 +676,7 @@ schema_ptr local() {
|
||||
// comment
|
||||
"information about the local node"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(0);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
@@ -693,6 +704,7 @@ schema_ptr truncated() {
|
||||
// comment
|
||||
"information about table truncation"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(0);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
@@ -755,6 +767,7 @@ schema_ptr available_ranges() {
|
||||
// comment
|
||||
"available keyspace/ranges during bootstrap/replace that are ready to be served"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(0);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build();
|
||||
}();
|
||||
@@ -777,6 +790,7 @@ schema_ptr views_builds_in_progress() {
|
||||
// comment
|
||||
"views builds current progress"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(0);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build();
|
||||
}();
|
||||
@@ -799,6 +813,7 @@ schema_ptr built_views() {
|
||||
// comment
|
||||
"built views"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(0);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build();
|
||||
}();
|
||||
@@ -842,6 +857,7 @@ schema_ptr scylla_views_builds_in_progress() {
|
||||
// comment
|
||||
"CDC-specific information that the local node stores"
|
||||
)));
|
||||
builder.set_gc_grace_seconds(0);
|
||||
builder.with_version(generate_schema_version(builder.uuid()));
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
|
||||
@@ -130,17 +130,26 @@ const column_definition* view_info::view_column(const column_definition& base_de
|
||||
return _schema.get_column_definition(base_def.name());
|
||||
}
|
||||
|
||||
const std::vector<column_id>& view_info::base_non_pk_columns_in_view_pk() const {
|
||||
return _base_non_pk_columns_in_view_pk;
|
||||
void view_info::set_base_info(db::view::base_info_ptr base_info) {
|
||||
_base_info = std::move(base_info);
|
||||
}
|
||||
|
||||
void view_info::initialize_base_dependent_fields(const schema& base) {
|
||||
db::view::base_info_ptr view_info::make_base_dependent_view_info(const schema& base) const {
|
||||
std::vector<column_id> base_non_pk_columns_in_view_pk;
|
||||
for (auto&& view_col : boost::range::join(_schema.partition_key_columns(), _schema.clustering_key_columns())) {
|
||||
auto* base_col = base.get_column_definition(view_col.name());
|
||||
if (base_col && !base_col->is_primary_key()) {
|
||||
_base_non_pk_columns_in_view_pk.push_back(base_col->id);
|
||||
base_non_pk_columns_in_view_pk.push_back(base_col->id);
|
||||
}
|
||||
}
|
||||
return make_lw_shared<db::view::base_dependent_view_info>({
|
||||
.base_schema = base.shared_from_this(),
|
||||
.base_non_pk_columns_in_view_pk = std::move(base_non_pk_columns_in_view_pk)
|
||||
});
|
||||
}
|
||||
|
||||
bool view_info::has_base_non_pk_columns_in_view_pk() const {
|
||||
return !_base_info->base_non_pk_columns_in_view_pk.empty();
|
||||
}
|
||||
|
||||
namespace db {
|
||||
@@ -188,11 +197,11 @@ bool may_be_affected_by(const schema& base, const view_info& view, const dht::de
|
||||
}
|
||||
|
||||
static bool update_requires_read_before_write(const schema& base,
|
||||
const std::vector<view_ptr>& views,
|
||||
const std::vector<view_and_base>& views,
|
||||
const dht::decorated_key& key,
|
||||
const rows_entry& update) {
|
||||
for (auto&& v : views) {
|
||||
view_info& vf = *v->view_info();
|
||||
view_info& vf = *v.view->view_info();
|
||||
if (may_be_affected_by(base, vf, key, update)) {
|
||||
return true;
|
||||
}
|
||||
@@ -239,12 +248,14 @@ class view_updates final {
|
||||
view_ptr _view;
|
||||
const view_info& _view_info;
|
||||
schema_ptr _base;
|
||||
base_info_ptr _base_info;
|
||||
std::unordered_map<partition_key, mutation_partition, partition_key::hashing, partition_key::equality> _updates;
|
||||
public:
|
||||
explicit view_updates(view_ptr view, schema_ptr base)
|
||||
: _view(std::move(view))
|
||||
explicit view_updates(view_and_base vab)
|
||||
: _view(std::move(vab.view))
|
||||
, _view_info(*_view->view_info())
|
||||
, _base(std::move(base))
|
||||
, _base(vab.base->base_schema)
|
||||
, _base_info(vab.base)
|
||||
, _updates(8, partition_key::hashing(*_view), partition_key::equality(*_view)) {
|
||||
}
|
||||
|
||||
@@ -306,7 +317,7 @@ row_marker view_updates::compute_row_marker(const clustering_row& base_row) cons
|
||||
// they share liveness information. It's true especially in the only case currently allowed by CQL,
|
||||
// which assumes there's up to one non-pk column in the view key. It's also true in alternator,
|
||||
// which does not carry TTL information.
|
||||
const auto& col_ids = _view_info.base_non_pk_columns_in_view_pk();
|
||||
const auto& col_ids = _base_info->base_non_pk_columns_in_view_pk;
|
||||
if (!col_ids.empty()) {
|
||||
auto& def = _base->regular_column_at(col_ids[0]);
|
||||
// Note: multi-cell columns can't be part of the primary key.
|
||||
@@ -537,7 +548,7 @@ void view_updates::delete_old_entry(const partition_key& base_key, const cluster
|
||||
|
||||
void view_updates::do_delete_old_entry(const partition_key& base_key, const clustering_row& existing, const clustering_row& update, gc_clock::time_point now) {
|
||||
auto& r = get_view_row(base_key, existing);
|
||||
const auto& col_ids = _view_info.base_non_pk_columns_in_view_pk();
|
||||
const auto& col_ids = _base_info->base_non_pk_columns_in_view_pk;
|
||||
if (!col_ids.empty()) {
|
||||
// We delete the old row using a shadowable row tombstone, making sure that
|
||||
// the tombstone deletes everything in the row (or it might still show up).
|
||||
@@ -678,7 +689,7 @@ void view_updates::generate_update(
|
||||
return;
|
||||
}
|
||||
|
||||
const auto& col_ids = _view_info.base_non_pk_columns_in_view_pk();
|
||||
const auto& col_ids = _base_info->base_non_pk_columns_in_view_pk;
|
||||
if (col_ids.empty()) {
|
||||
// The view key is necessarily the same pre and post update.
|
||||
if (existing && existing->is_live(*_base)) {
|
||||
@@ -932,11 +943,16 @@ future<stop_iteration> view_update_builder::on_results() {
|
||||
|
||||
future<std::vector<frozen_mutation_and_schema>> generate_view_updates(
|
||||
const schema_ptr& base,
|
||||
std::vector<view_ptr>&& views_to_update,
|
||||
std::vector<view_and_base>&& views_to_update,
|
||||
flat_mutation_reader&& updates,
|
||||
flat_mutation_reader_opt&& existings) {
|
||||
auto vs = boost::copy_range<std::vector<view_updates>>(views_to_update | boost::adaptors::transformed([&] (auto&& v) {
|
||||
return view_updates(std::move(v), base);
|
||||
auto vs = boost::copy_range<std::vector<view_updates>>(views_to_update | boost::adaptors::transformed([&] (view_and_base v) {
|
||||
if (base->version() != v.base->base_schema->version()) {
|
||||
on_internal_error(vlogger, format("Schema version used for view updates ({}) does not match the current"
|
||||
" base schema version of the view ({}) for view {}.{} of {}.{}",
|
||||
base->version(), v.base->base_schema->version(), v.view->ks_name(), v.view->cf_name(), base->ks_name(), base->cf_name()));
|
||||
}
|
||||
return view_updates(std::move(v));
|
||||
}));
|
||||
auto builder = std::make_unique<view_update_builder>(base, std::move(vs), std::move(updates), std::move(existings));
|
||||
auto f = builder->build();
|
||||
@@ -946,18 +962,18 @@ future<std::vector<frozen_mutation_and_schema>> generate_view_updates(
|
||||
query::clustering_row_ranges calculate_affected_clustering_ranges(const schema& base,
|
||||
const dht::decorated_key& key,
|
||||
const mutation_partition& mp,
|
||||
const std::vector<view_ptr>& views) {
|
||||
const std::vector<view_and_base>& views) {
|
||||
std::vector<nonwrapping_range<clustering_key_prefix_view>> row_ranges;
|
||||
std::vector<nonwrapping_range<clustering_key_prefix_view>> view_row_ranges;
|
||||
clustering_key_prefix_view::tri_compare cmp(base);
|
||||
if (mp.partition_tombstone() || !mp.row_tombstones().empty()) {
|
||||
for (auto&& v : views) {
|
||||
// FIXME: #2371
|
||||
if (v->view_info()->select_statement().get_restrictions()->has_unrestricted_clustering_columns()) {
|
||||
if (v.view->view_info()->select_statement().get_restrictions()->has_unrestricted_clustering_columns()) {
|
||||
view_row_ranges.push_back(nonwrapping_range<clustering_key_prefix_view>::make_open_ended_both_sides());
|
||||
break;
|
||||
}
|
||||
for (auto&& r : v->view_info()->partition_slice().default_row_ranges()) {
|
||||
for (auto&& r : v.view->view_info()->partition_slice().default_row_ranges()) {
|
||||
view_row_ranges.push_back(r.transform(std::mem_fn(&clustering_key_prefix::view)));
|
||||
}
|
||||
}
|
||||
@@ -1717,7 +1733,7 @@ public:
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
|
||||
_fragments_memory_usage += cr.memory_usage(*_step.base->schema());
|
||||
_fragments_memory_usage += cr.memory_usage(*_step.reader.schema());
|
||||
_fragments.push_back(std::move(cr));
|
||||
if (_fragments_memory_usage > batch_memory_max) {
|
||||
// Although we have not yet completed the batch of base rows that
|
||||
@@ -1737,10 +1753,14 @@ public:
|
||||
_builder._as.check();
|
||||
if (!_fragments.empty()) {
|
||||
_fragments.push_front(partition_start(_step.current_key, tombstone()));
|
||||
auto base_schema = _step.base->schema();
|
||||
auto views = with_base_info_snapshot(_views_to_build);
|
||||
auto reader = make_flat_mutation_reader_from_fragments(_step.reader.schema(), std::move(_fragments));
|
||||
reader.upgrade_schema(base_schema);
|
||||
_step.base->populate_views(
|
||||
_views_to_build,
|
||||
std::move(views),
|
||||
_step.current_token(),
|
||||
make_flat_mutation_reader_from_fragments(_step.base->schema(), std::move(_fragments))).get();
|
||||
std::move(reader)).get();
|
||||
_fragments.clear();
|
||||
_fragments_memory_usage = 0;
|
||||
}
|
||||
@@ -1887,5 +1907,11 @@ future<bool> check_needs_view_update_path(db::system_distributed_keyspace& sys_d
|
||||
});
|
||||
}
|
||||
|
||||
std::vector<db::view::view_and_base> with_base_info_snapshot(std::vector<view_ptr> vs) {
|
||||
return boost::copy_range<std::vector<db::view::view_and_base>>(vs | boost::adaptors::transformed([] (const view_ptr& v) {
|
||||
return db::view::view_and_base{v, v->view_info()->base_info()};
|
||||
}));
|
||||
}
|
||||
|
||||
} // namespace view
|
||||
} // namespace db
|
||||
|
||||
@@ -43,6 +43,27 @@ namespace db {
|
||||
|
||||
namespace view {
|
||||
|
||||
// Part of the view description which depends on the base schema version.
|
||||
//
|
||||
// This structure may change even though the view schema doesn't change, so
|
||||
// it needs to live outside view_ptr.
|
||||
struct base_dependent_view_info {
|
||||
schema_ptr base_schema;
|
||||
|
||||
// Id of a regular base table column included in the view's PK, if any.
|
||||
// Scylla views only allow one such column, alternator can have up to two.
|
||||
std::vector<column_id> base_non_pk_columns_in_view_pk;
|
||||
};
|
||||
|
||||
// Immutable snapshot of view's base-schema-dependent part.
|
||||
using base_info_ptr = lw_shared_ptr<const base_dependent_view_info>;
|
||||
|
||||
// Snapshot of the view schema and its base-schema-dependent part.
|
||||
struct view_and_base {
|
||||
view_ptr view;
|
||||
base_info_ptr base;
|
||||
};
|
||||
|
||||
/**
|
||||
* Whether the view filter considers the specified partition key.
|
||||
*
|
||||
@@ -92,7 +113,7 @@ bool clustering_prefix_matches(const schema& base, const partition_key& key, con
|
||||
|
||||
future<std::vector<frozen_mutation_and_schema>> generate_view_updates(
|
||||
const schema_ptr& base,
|
||||
std::vector<view_ptr>&& views_to_update,
|
||||
std::vector<view_and_base>&& views_to_update,
|
||||
flat_mutation_reader&& updates,
|
||||
flat_mutation_reader_opt&& existings);
|
||||
|
||||
@@ -100,7 +121,7 @@ query::clustering_row_ranges calculate_affected_clustering_ranges(
|
||||
const schema& base,
|
||||
const dht::decorated_key& key,
|
||||
const mutation_partition& mp,
|
||||
const std::vector<view_ptr>& views);
|
||||
const std::vector<view_and_base>& views);
|
||||
|
||||
struct wait_for_all_updates_tag {};
|
||||
using wait_for_all_updates = bool_class<wait_for_all_updates_tag>;
|
||||
@@ -128,6 +149,13 @@ future<> mutate_MV(
|
||||
*/
|
||||
void create_virtual_column(schema_builder& builder, const bytes& name, const data_type& type);
|
||||
|
||||
/**
|
||||
* Converts a collection of view schema snapshots into a collection of
|
||||
* view_and_base objects, which are snapshots of both the view schema
|
||||
* and the base-schema-dependent part of view description.
|
||||
*/
|
||||
std::vector<view_and_base> with_base_info_snapshot(std::vector<view_ptr>);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
15
dist/common/scripts/scylla-housekeeping
vendored
15
dist/common/scripts/scylla-housekeeping
vendored
@@ -61,7 +61,15 @@ def sh_command(*args):
|
||||
return out
|
||||
|
||||
def get_url(path):
|
||||
return urllib.request.urlopen(path).read().decode('utf-8')
|
||||
# If server returns any error, like 403, or 500 urllib.request throws exception, which is not serializable.
|
||||
# When multiprocessing routines fail to serialize it, it throws ambiguous serialization exception
|
||||
# from get_json_from_url.
|
||||
# In order to see legit error we catch it from the inside of process, covert to string and
|
||||
# pass it as part of return value
|
||||
try:
|
||||
return 0, urllib.request.urlopen(path).read().decode('utf-8')
|
||||
except Exception as exc:
|
||||
return 1, str(exc)
|
||||
|
||||
def get_json_from_url(path):
|
||||
pool = mp.Pool(processes=1)
|
||||
@@ -71,13 +79,16 @@ def get_json_from_url(path):
|
||||
# to enforce a wallclock timeout.
|
||||
result = pool.apply_async(get_url, args=(path,))
|
||||
try:
|
||||
retval = result.get(timeout=5)
|
||||
status, retval = result.get(timeout=5)
|
||||
except mp.TimeoutError as err:
|
||||
pool.terminate()
|
||||
pool.join()
|
||||
raise
|
||||
if status == 1:
|
||||
raise RuntimeError(f'Failed to get "{path}" due to the following error: {retval}')
|
||||
return json.loads(retval)
|
||||
|
||||
|
||||
def get_api(path):
|
||||
return get_json_from_url("http://" + api_address + path)
|
||||
|
||||
|
||||
3
dist/common/scripts/scylla_setup
vendored
3
dist/common/scripts/scylla_setup
vendored
@@ -371,6 +371,9 @@ if __name__ == '__main__':
|
||||
if not stat.S_ISBLK(os.stat(dsk).st_mode):
|
||||
print('{} is not block device'.format(dsk))
|
||||
continue
|
||||
if dsk in selected:
|
||||
print(f'{dsk} is already added')
|
||||
continue
|
||||
selected.append(dsk)
|
||||
devices.remove(dsk)
|
||||
disks = ','.join(selected)
|
||||
|
||||
20
dist/common/scripts/scylla_util.py
vendored
20
dist/common/scripts/scylla_util.py
vendored
@@ -182,7 +182,7 @@ class aws_instance:
|
||||
instance_size = self.instance_size()
|
||||
if instance_class in ['c3', 'c4', 'd2', 'i2', 'r3']:
|
||||
return 'ixgbevf'
|
||||
if instance_class in ['a1', 'c5', 'c5d', 'f1', 'g3', 'g4', 'h1', 'i3', 'i3en', 'inf1', 'm5', 'm5a', 'm5ad', 'm5d', 'm5dn', 'm5n', 'm6g', 'p2', 'p3', 'r4', 'r5', 'r5a', 'r5ad', 'r5d', 'r5dn', 'r5n', 't3', 't3a', 'u-6tb1', 'u-9tb1', 'u-12tb1', 'u-18tn1', 'u-24tb1', 'x1', 'x1e', 'z1d']:
|
||||
if instance_class in ['a1', 'c5', 'c5a', 'c5d', 'c5n', 'c6g', 'c6gd', 'f1', 'g3', 'g4', 'h1', 'i3', 'i3en', 'inf1', 'm5', 'm5a', 'm5ad', 'm5d', 'm5dn', 'm5n', 'm6g', 'm6gd', 'p2', 'p3', 'r4', 'r5', 'r5a', 'r5ad', 'r5d', 'r5dn', 'r5n', 't3', 't3a', 'u-6tb1', 'u-9tb1', 'u-12tb1', 'u-18tn1', 'u-24tb1', 'x1', 'x1e', 'z1d']:
|
||||
return 'ena'
|
||||
if instance_class == 'm4':
|
||||
if instance_size == '16xlarge':
|
||||
@@ -329,7 +329,7 @@ class scylla_cpuinfo:
|
||||
|
||||
# When a CLI tool is not installed, use relocatable CLI tool provided by Scylla
|
||||
scylla_env = os.environ.copy()
|
||||
scylla_env['PATH'] = '{}:{}'.format(scylla_env['PATH'], scyllabindir())
|
||||
scylla_env['PATH'] = '{}:{}'.format(scyllabindir(), scylla_env['PATH'])
|
||||
|
||||
def run(cmd, shell=False, silent=False, exception=True):
|
||||
stdout = subprocess.DEVNULL if silent else None
|
||||
@@ -441,6 +441,19 @@ def dist_ver():
|
||||
return platform.dist()[1]
|
||||
|
||||
|
||||
SYSTEM_PARTITION_UUIDS = [
|
||||
'21686148-6449-6e6f-744e-656564454649', # BIOS boot partition
|
||||
'c12a7328-f81f-11d2-ba4b-00a0c93ec93b', # EFI system partition
|
||||
'024dee41-33e7-11d3-9d69-0008c781f39f' # MBR partition scheme
|
||||
]
|
||||
|
||||
def get_partition_uuid(dev):
|
||||
return out(f'lsblk -n -oPARTTYPE {dev}')
|
||||
|
||||
def is_system_partition(dev):
|
||||
uuid = get_partition_uuid(dev)
|
||||
return (uuid in SYSTEM_PARTITION_UUIDS)
|
||||
|
||||
def is_unused_disk(dev):
|
||||
# dev is not in /sys/class/block/, like /dev/nvme[0-9]+
|
||||
if not os.path.isdir('/sys/class/block/{dev}'.format(dev=dev.replace('/dev/', ''))):
|
||||
@@ -448,7 +461,8 @@ def is_unused_disk(dev):
|
||||
try:
|
||||
fd = os.open(dev, os.O_EXCL)
|
||||
os.close(fd)
|
||||
return True
|
||||
# dev is not reserved for system
|
||||
return not is_system_partition(dev)
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
|
||||
1
dist/debian/control.mustache
vendored
1
dist/debian/control.mustache
vendored
@@ -5,6 +5,7 @@ Section: database
|
||||
Priority: optional
|
||||
X-Python3-Version: >= 3.4
|
||||
Standards-Version: 3.9.5
|
||||
Rules-Requires-Root: no
|
||||
|
||||
Package: {{product}}-conf
|
||||
Architecture: any
|
||||
|
||||
1
dist/debian/python3/control.mustache
vendored
1
dist/debian/python3/control.mustache
vendored
@@ -5,6 +5,7 @@ Section: python
|
||||
Priority: optional
|
||||
X-Python3-Version: >= 3.4
|
||||
Standards-Version: 3.9.5
|
||||
Rules-Requires-Root: no
|
||||
|
||||
Package: {{product}}-python3
|
||||
Architecture: amd64
|
||||
|
||||
1
dist/debian/rules.mustache
vendored
1
dist/debian/rules.mustache
vendored
@@ -37,6 +37,7 @@ override_dh_strip:
|
||||
# The binaries (ethtool...patchelf) don't pass dh_strip after going through patchelf. Since they are
|
||||
# already stripped, nothing is lost if we exclude them, so that's what we do.
|
||||
dh_strip -Xlibprotobuf.so.15 -Xld.so -Xethtool -Xgawk -Xgzip -Xhwloc-calc -Xhwloc-distrib -Xifconfig -Xlscpu -Xnetstat -Xpatchelf --dbg-package={{product}}-server-dbg
|
||||
find $(CURDIR)/debian/{{product}}-server-dbg/usr/lib/debug/.build-id/ -name "*.debug" -exec objcopy --decompress-debug-sections {} \;
|
||||
|
||||
override_dh_makeshlibs:
|
||||
|
||||
|
||||
@@ -56,22 +56,22 @@ public:
|
||||
/**
|
||||
* The unrecognized entity.
|
||||
*/
|
||||
::shared_ptr<cql3::column_identifier> entity;
|
||||
cql3::column_identifier entity;
|
||||
|
||||
/**
|
||||
* The entity relation.
|
||||
* The entity relation in a stringified form.
|
||||
*/
|
||||
cql3::relation_ptr relation;
|
||||
sstring relation_str;
|
||||
|
||||
/**
|
||||
* Creates a new <code>UnrecognizedEntityException</code>.
|
||||
* @param entity the unrecognized entity
|
||||
* @param relation the entity relation
|
||||
* @param relation_str the entity relation string
|
||||
*/
|
||||
unrecognized_entity_exception(::shared_ptr<cql3::column_identifier> entity, cql3::relation_ptr relation)
|
||||
: invalid_request_exception(format("Undefined name {} in where clause ('{}')", *entity, relation->to_string()))
|
||||
, entity(entity)
|
||||
, relation(relation)
|
||||
unrecognized_entity_exception(cql3::column_identifier entity, sstring relation_str)
|
||||
: invalid_request_exception(format("Undefined name {} in where clause ('{}')", entity, relation_str))
|
||||
, entity(std::move(entity))
|
||||
, relation_str(std::move(relation_str))
|
||||
{ }
|
||||
};
|
||||
|
||||
|
||||
@@ -487,6 +487,9 @@ public:
|
||||
size_t buffer_size() const {
|
||||
return _impl->buffer_size();
|
||||
}
|
||||
const circular_buffer<mutation_fragment>& buffer() const {
|
||||
return _impl->buffer();
|
||||
}
|
||||
// Detach the internal buffer of the reader.
|
||||
// Roughly equivalent to depleting it by calling pop_mutation_fragment()
|
||||
// until is_buffer_empty() returns true.
|
||||
|
||||
@@ -428,6 +428,7 @@ future<> gossiper::handle_shutdown_msg(inet_address from) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return seastar::async([this, from] {
|
||||
auto permit = this->lock_endpoint(from).get0();
|
||||
this->mark_as_shutdown(from);
|
||||
});
|
||||
}
|
||||
@@ -632,7 +633,7 @@ void gossiper::remove_endpoint(inet_address endpoint) {
|
||||
// We can not run on_remove callbacks here becasue on_remove in
|
||||
// storage_service might take the gossiper::timer_callback_lock
|
||||
(void)seastar::async([this, endpoint] {
|
||||
_subscribers.for_each([endpoint] (auto& subscriber) {
|
||||
_subscribers.for_each([endpoint] (shared_ptr<i_endpoint_state_change_subscriber> subscriber) {
|
||||
subscriber->on_remove(endpoint);
|
||||
});
|
||||
}).handle_exception([] (auto ep) {
|
||||
@@ -1464,7 +1465,7 @@ void gossiper::real_mark_alive(inet_address addr, endpoint_state& local_state) {
|
||||
logger.info("InetAddress {} is now UP, status = {}", addr, status);
|
||||
}
|
||||
|
||||
_subscribers.for_each([addr, local_state] (auto& subscriber) {
|
||||
_subscribers.for_each([addr, local_state] (shared_ptr<i_endpoint_state_change_subscriber> subscriber) {
|
||||
subscriber->on_alive(addr, local_state);
|
||||
logger.trace("Notified {}", subscriber.get());
|
||||
});
|
||||
@@ -1478,7 +1479,7 @@ void gossiper::mark_dead(inet_address addr, endpoint_state& local_state) {
|
||||
_live_endpoints_just_added.remove(addr);
|
||||
_unreachable_endpoints[addr] = now();
|
||||
logger.info("InetAddress {} is now DOWN, status = {}", addr, get_gossip_status(local_state));
|
||||
_subscribers.for_each([addr, local_state] (auto& subscriber) {
|
||||
_subscribers.for_each([addr, local_state] (shared_ptr<i_endpoint_state_change_subscriber> subscriber) {
|
||||
subscriber->on_dead(addr, local_state);
|
||||
logger.trace("Notified {}", subscriber.get());
|
||||
});
|
||||
@@ -1510,7 +1511,7 @@ void gossiper::handle_major_state_change(inet_address ep, const endpoint_state&
|
||||
|
||||
if (eps_old) {
|
||||
// the node restarted: it is up to the subscriber to take whatever action is necessary
|
||||
_subscribers.for_each([ep, eps_old] (auto& subscriber) {
|
||||
_subscribers.for_each([ep, eps_old] (shared_ptr<i_endpoint_state_change_subscriber> subscriber) {
|
||||
subscriber->on_restart(ep, *eps_old);
|
||||
});
|
||||
}
|
||||
@@ -1525,7 +1526,7 @@ void gossiper::handle_major_state_change(inet_address ep, const endpoint_state&
|
||||
|
||||
auto* eps_new = get_endpoint_state_for_endpoint_ptr(ep);
|
||||
if (eps_new) {
|
||||
_subscribers.for_each([ep, eps_new] (auto& subscriber) {
|
||||
_subscribers.for_each([ep, eps_new] (shared_ptr<i_endpoint_state_change_subscriber> subscriber) {
|
||||
subscriber->on_join(ep, *eps_new);
|
||||
});
|
||||
}
|
||||
@@ -1618,14 +1619,14 @@ void gossiper::apply_new_states(inet_address addr, endpoint_state& local_state,
|
||||
|
||||
// Runs inside seastar::async context
|
||||
void gossiper::do_before_change_notifications(inet_address addr, const endpoint_state& ep_state, const application_state& ap_state, const versioned_value& new_value) {
|
||||
_subscribers.for_each([addr, ep_state, ap_state, new_value] (auto& subscriber) {
|
||||
_subscribers.for_each([addr, ep_state, ap_state, new_value] (shared_ptr<i_endpoint_state_change_subscriber> subscriber) {
|
||||
subscriber->before_change(addr, ep_state, ap_state, new_value);
|
||||
});
|
||||
}
|
||||
|
||||
// Runs inside seastar::async context
|
||||
void gossiper::do_on_change_notifications(inet_address addr, const application_state& state, const versioned_value& value) {
|
||||
_subscribers.for_each([addr, state, value] (auto& subscriber) {
|
||||
_subscribers.for_each([addr, state, value] (shared_ptr<i_endpoint_state_change_subscriber> subscriber) {
|
||||
subscriber->on_change(addr, state, value);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -126,6 +126,7 @@ relocate_python3() {
|
||||
cp "$script" "$relocateddir"
|
||||
cat > "$install"<<EOF
|
||||
#!/usr/bin/env bash
|
||||
export LC_ALL=en_US.UTF-8
|
||||
x="\$(readlink -f "\$0")"
|
||||
b="\$(basename "\$x")"
|
||||
d="\$(dirname "\$x")"
|
||||
|
||||
203
licenses/abseil-license.txt
Normal file
203
licenses/abseil-license.txt
Normal file
@@ -0,0 +1,203 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
https://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
https://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
@@ -144,10 +144,33 @@ insert_token_range_to_sorted_container_while_unwrapping(
|
||||
|
||||
dht::token_range_vector
|
||||
abstract_replication_strategy::get_ranges(inet_address ep) const {
|
||||
return do_get_ranges(ep, _token_metadata, false);
|
||||
}
|
||||
|
||||
dht::token_range_vector
|
||||
abstract_replication_strategy::get_ranges_in_thread(inet_address ep) const {
|
||||
return do_get_ranges(ep, _token_metadata, true);
|
||||
}
|
||||
|
||||
dht::token_range_vector
|
||||
abstract_replication_strategy::get_ranges(inet_address ep, token_metadata& tm) const {
|
||||
return do_get_ranges(ep, tm, false);
|
||||
}
|
||||
|
||||
dht::token_range_vector
|
||||
abstract_replication_strategy::get_ranges_in_thread(inet_address ep, token_metadata& tm) const {
|
||||
return do_get_ranges(ep, tm, true);
|
||||
}
|
||||
|
||||
dht::token_range_vector
|
||||
abstract_replication_strategy::do_get_ranges(inet_address ep, token_metadata& tm, bool can_yield) const {
|
||||
dht::token_range_vector ret;
|
||||
auto prev_tok = _token_metadata.sorted_tokens().back();
|
||||
for (auto tok : _token_metadata.sorted_tokens()) {
|
||||
for (inet_address a : calculate_natural_endpoints(tok, _token_metadata)) {
|
||||
auto prev_tok = tm.sorted_tokens().back();
|
||||
for (auto tok : tm.sorted_tokens()) {
|
||||
for (inet_address a : calculate_natural_endpoints(tok, tm)) {
|
||||
if (can_yield) {
|
||||
seastar::thread::maybe_yield();
|
||||
}
|
||||
if (a == ep) {
|
||||
insert_token_range_to_sorted_container_while_unwrapping(prev_tok, tok, ret);
|
||||
break;
|
||||
|
||||
@@ -106,6 +106,15 @@ public:
|
||||
// It the analogue of Origin's getAddressRanges().get(endpoint).
|
||||
// This function is not efficient, and not meant for the fast path.
|
||||
dht::token_range_vector get_ranges(inet_address ep) const;
|
||||
dht::token_range_vector get_ranges_in_thread(inet_address ep) const;
|
||||
|
||||
// Use the token_metadata provided by the caller instead of _token_metadata
|
||||
dht::token_range_vector get_ranges(inet_address ep, token_metadata& tm) const;
|
||||
dht::token_range_vector get_ranges_in_thread(inet_address ep, token_metadata& tm) const;
|
||||
private:
|
||||
dht::token_range_vector do_get_ranges(inet_address ep, token_metadata& tm, bool can_yield) const;
|
||||
|
||||
public:
|
||||
// get_primary_ranges() returns the list of "primary ranges" for the given
|
||||
// endpoint. "Primary ranges" are the ranges that the node is responsible
|
||||
// for storing replica primarily, which means this is the first node
|
||||
|
||||
10
lua.cc
10
lua.cc
@@ -264,14 +264,12 @@ static auto visit_lua_raw_value(lua_State* l, int index, Func&& f) {
|
||||
|
||||
template <typename Func>
|
||||
static auto visit_decimal(const big_decimal &v, Func&& f) {
|
||||
boost::multiprecision::cpp_int ten(10);
|
||||
const auto& dividend = v.unscaled_value();
|
||||
auto divisor = boost::multiprecision::pow(ten, v.scale());
|
||||
boost::multiprecision::cpp_rational r = v.as_rational();
|
||||
const boost::multiprecision::cpp_int& dividend = numerator(r);
|
||||
const boost::multiprecision::cpp_int& divisor = denominator(r);
|
||||
if (dividend % divisor == 0) {
|
||||
return f(utils::multiprecision_int(boost::multiprecision::cpp_int(dividend/divisor)));
|
||||
return f(utils::multiprecision_int(dividend/divisor));
|
||||
}
|
||||
boost::multiprecision::cpp_rational r = dividend;
|
||||
r /= divisor;
|
||||
return f(r.convert_to<double>());
|
||||
}
|
||||
|
||||
|
||||
23
main.cc
23
main.cc
@@ -546,9 +546,13 @@ int main(int ac, char** av) {
|
||||
gms::feature_config fcfg = gms::feature_config_from_db_config(*cfg);
|
||||
|
||||
feature_service.start(fcfg).get();
|
||||
auto stop_feature_service = defer_verbose_shutdown("feature service", [&feature_service] {
|
||||
feature_service.stop().get();
|
||||
});
|
||||
// FIXME storage_proxy holds a reference on it and is not yet stopped.
|
||||
// also the proxy leaves range_slice_read_executor-s hanging around
|
||||
// and willing to find out if the cluster_supports_digest_multipartition_reads
|
||||
//
|
||||
//auto stop_feature_service = defer_verbose_shutdown("feature service", [&feature_service] {
|
||||
// feature_service.stop().get();
|
||||
//});
|
||||
|
||||
schema::set_default_partitioner(cfg->partitioner(), cfg->murmur3_partitioner_ignore_msb_bits());
|
||||
auto make_sched_group = [&] (sstring name, unsigned shares) {
|
||||
@@ -943,12 +947,16 @@ int main(int ac, char** av) {
|
||||
mm.init_messaging_service();
|
||||
}).get();
|
||||
supervisor::notify("initializing storage proxy RPC verbs");
|
||||
proxy.invoke_on_all([] (service::storage_proxy& p) {
|
||||
p.init_messaging_service();
|
||||
}).get();
|
||||
proxy.invoke_on_all(&service::storage_proxy::init_messaging_service).get();
|
||||
auto stop_proxy_handlers = defer_verbose_shutdown("storage proxy RPC verbs", [&proxy] {
|
||||
proxy.invoke_on_all(&service::storage_proxy::uninit_messaging_service).get();
|
||||
});
|
||||
|
||||
supervisor::notify("starting streaming service");
|
||||
streaming::stream_session::init_streaming_service(db, sys_dist_ks, view_update_generator).get();
|
||||
auto stop_streaming_service = defer_verbose_shutdown("streaming service", [] {
|
||||
streaming::stream_session::uninit_streaming_service().get();
|
||||
});
|
||||
api::set_server_stream_manager(ctx).get();
|
||||
|
||||
supervisor::notify("starting hinted handoff manager");
|
||||
@@ -981,6 +989,9 @@ int main(int ac, char** av) {
|
||||
rs.stop().get();
|
||||
});
|
||||
repair_init_messaging_service_handler(rs, sys_dist_ks, view_update_generator).get();
|
||||
auto stop_repair_messages = defer_verbose_shutdown("repair message handlers", [] {
|
||||
repair_uninit_messaging_service_handler().get();
|
||||
});
|
||||
supervisor::notify("starting storage service", true);
|
||||
auto& ss = service::get_local_storage_service();
|
||||
ss.init_messaging_service_part().get();
|
||||
|
||||
@@ -718,6 +718,10 @@ void messaging_service::register_stream_mutation_fragments(std::function<future<
|
||||
register_handler(this, messaging_verb::STREAM_MUTATION_FRAGMENTS, std::move(func));
|
||||
}
|
||||
|
||||
future<> messaging_service::unregister_stream_mutation_fragments() {
|
||||
return unregister_handler(messaging_verb::STREAM_MUTATION_FRAGMENTS);
|
||||
}
|
||||
|
||||
template<class SinkType, class SourceType>
|
||||
future<rpc::sink<SinkType>, rpc::source<SourceType>>
|
||||
do_make_sink_source(messaging_verb verb, uint32_t repair_meta_id, shared_ptr<messaging_service::rpc_protocol_client_wrapper> rpc_client, std::unique_ptr<messaging_service::rpc_protocol_wrapper>& rpc) {
|
||||
@@ -749,6 +753,9 @@ rpc::sink<repair_row_on_wire_with_cmd> messaging_service::make_sink_for_repair_g
|
||||
void messaging_service::register_repair_get_row_diff_with_rpc_stream(std::function<future<rpc::sink<repair_row_on_wire_with_cmd>> (const rpc::client_info& cinfo, uint32_t repair_meta_id, rpc::source<repair_hash_with_cmd> source)>&& func) {
|
||||
register_handler(this, messaging_verb::REPAIR_GET_ROW_DIFF_WITH_RPC_STREAM, std::move(func));
|
||||
}
|
||||
future<> messaging_service::unregister_repair_get_row_diff_with_rpc_stream() {
|
||||
return unregister_handler(messaging_verb::REPAIR_GET_ROW_DIFF_WITH_RPC_STREAM);
|
||||
}
|
||||
|
||||
// Wrapper for REPAIR_PUT_ROW_DIFF_WITH_RPC_STREAM
|
||||
future<rpc::sink<repair_row_on_wire_with_cmd>, rpc::source<repair_stream_cmd>>
|
||||
@@ -768,6 +775,9 @@ rpc::sink<repair_stream_cmd> messaging_service::make_sink_for_repair_put_row_dif
|
||||
void messaging_service::register_repair_put_row_diff_with_rpc_stream(std::function<future<rpc::sink<repair_stream_cmd>> (const rpc::client_info& cinfo, uint32_t repair_meta_id, rpc::source<repair_row_on_wire_with_cmd> source)>&& func) {
|
||||
register_handler(this, messaging_verb::REPAIR_PUT_ROW_DIFF_WITH_RPC_STREAM, std::move(func));
|
||||
}
|
||||
future<> messaging_service::unregister_repair_put_row_diff_with_rpc_stream() {
|
||||
return unregister_handler(messaging_verb::REPAIR_PUT_ROW_DIFF_WITH_RPC_STREAM);
|
||||
}
|
||||
|
||||
// Wrapper for REPAIR_GET_FULL_ROW_HASHES_WITH_RPC_STREAM
|
||||
future<rpc::sink<repair_stream_cmd>, rpc::source<repair_hash_with_cmd>>
|
||||
@@ -787,6 +797,9 @@ rpc::sink<repair_hash_with_cmd> messaging_service::make_sink_for_repair_get_full
|
||||
void messaging_service::register_repair_get_full_row_hashes_with_rpc_stream(std::function<future<rpc::sink<repair_hash_with_cmd>> (const rpc::client_info& cinfo, uint32_t repair_meta_id, rpc::source<repair_stream_cmd> source)>&& func) {
|
||||
register_handler(this, messaging_verb::REPAIR_GET_FULL_ROW_HASHES_WITH_RPC_STREAM, std::move(func));
|
||||
}
|
||||
future<> messaging_service::unregister_repair_get_full_row_hashes_with_rpc_stream() {
|
||||
return unregister_handler(messaging_verb::REPAIR_GET_FULL_ROW_HASHES_WITH_RPC_STREAM);
|
||||
}
|
||||
|
||||
// Send a message for verb
|
||||
template <typename MsgIn, typename... MsgOut>
|
||||
@@ -870,6 +883,9 @@ future<streaming::prepare_message> messaging_service::send_prepare_message(msg_a
|
||||
return send_message<streaming::prepare_message>(this, messaging_verb::PREPARE_MESSAGE, id,
|
||||
std::move(msg), plan_id, std::move(description), reason);
|
||||
}
|
||||
future<> messaging_service::unregister_prepare_message() {
|
||||
return unregister_handler(messaging_verb::PREPARE_MESSAGE);
|
||||
}
|
||||
|
||||
// PREPARE_DONE_MESSAGE
|
||||
void messaging_service::register_prepare_done_message(std::function<future<> (const rpc::client_info& cinfo, UUID plan_id, unsigned dst_cpu_id)>&& func) {
|
||||
@@ -879,6 +895,9 @@ future<> messaging_service::send_prepare_done_message(msg_addr id, UUID plan_id,
|
||||
return send_message<void>(this, messaging_verb::PREPARE_DONE_MESSAGE, id,
|
||||
plan_id, dst_cpu_id);
|
||||
}
|
||||
future<> messaging_service::unregister_prepare_done_message() {
|
||||
return unregister_handler(messaging_verb::PREPARE_DONE_MESSAGE);
|
||||
}
|
||||
|
||||
// STREAM_MUTATION
|
||||
void messaging_service::register_stream_mutation(std::function<future<> (const rpc::client_info& cinfo, UUID plan_id, frozen_mutation fm, unsigned dst_cpu_id, rpc::optional<bool> fragmented, rpc::optional<streaming::stream_reason> reason)>&& func) {
|
||||
@@ -903,6 +922,9 @@ future<> messaging_service::send_stream_mutation_done(msg_addr id, UUID plan_id,
|
||||
return send_message<void>(this, messaging_verb::STREAM_MUTATION_DONE, id,
|
||||
plan_id, std::move(ranges), cf_id, dst_cpu_id);
|
||||
}
|
||||
future<> messaging_service::unregister_stream_mutation_done() {
|
||||
return unregister_handler(messaging_verb::STREAM_MUTATION_DONE);
|
||||
}
|
||||
|
||||
// COMPLETE_MESSAGE
|
||||
void messaging_service::register_complete_message(std::function<future<> (const rpc::client_info& cinfo, UUID plan_id, unsigned dst_cpu_id, rpc::optional<bool> failed)>&& func) {
|
||||
@@ -912,6 +934,9 @@ future<> messaging_service::send_complete_message(msg_addr id, UUID plan_id, uns
|
||||
return send_message<void>(this, messaging_verb::COMPLETE_MESSAGE, id,
|
||||
plan_id, dst_cpu_id, failed);
|
||||
}
|
||||
future<> messaging_service::unregister_complete_message() {
|
||||
return unregister_handler(messaging_verb::COMPLETE_MESSAGE);
|
||||
}
|
||||
|
||||
void messaging_service::register_gossip_echo(std::function<future<> ()>&& func) {
|
||||
register_handler(this, messaging_verb::GOSSIP_ECHO, std::move(func));
|
||||
@@ -1126,14 +1151,14 @@ future<partition_checksum> messaging_service::send_repair_checksum_range(
|
||||
}
|
||||
|
||||
// Wrapper for REPAIR_GET_FULL_ROW_HASHES
|
||||
void messaging_service::register_repair_get_full_row_hashes(std::function<future<std::unordered_set<repair_hash>> (const rpc::client_info& cinfo, uint32_t repair_meta_id)>&& func) {
|
||||
void messaging_service::register_repair_get_full_row_hashes(std::function<future<repair_hash_set> (const rpc::client_info& cinfo, uint32_t repair_meta_id)>&& func) {
|
||||
register_handler(this, messaging_verb::REPAIR_GET_FULL_ROW_HASHES, std::move(func));
|
||||
}
|
||||
future<> messaging_service::unregister_repair_get_full_row_hashes() {
|
||||
return unregister_handler(messaging_verb::REPAIR_GET_FULL_ROW_HASHES);
|
||||
}
|
||||
future<std::unordered_set<repair_hash>> messaging_service::send_repair_get_full_row_hashes(msg_addr id, uint32_t repair_meta_id) {
|
||||
return send_message<future<std::unordered_set<repair_hash>>>(this, messaging_verb::REPAIR_GET_FULL_ROW_HASHES, std::move(id), repair_meta_id);
|
||||
future<repair_hash_set> messaging_service::send_repair_get_full_row_hashes(msg_addr id, uint32_t repair_meta_id) {
|
||||
return send_message<future<repair_hash_set>>(this, messaging_verb::REPAIR_GET_FULL_ROW_HASHES, std::move(id), repair_meta_id);
|
||||
}
|
||||
|
||||
// Wrapper for REPAIR_GET_COMBINED_ROW_HASH
|
||||
@@ -1158,13 +1183,13 @@ future<get_sync_boundary_response> messaging_service::send_repair_get_sync_bound
|
||||
}
|
||||
|
||||
// Wrapper for REPAIR_GET_ROW_DIFF
|
||||
void messaging_service::register_repair_get_row_diff(std::function<future<repair_rows_on_wire> (const rpc::client_info& cinfo, uint32_t repair_meta_id, std::unordered_set<repair_hash> set_diff, bool needs_all_rows)>&& func) {
|
||||
void messaging_service::register_repair_get_row_diff(std::function<future<repair_rows_on_wire> (const rpc::client_info& cinfo, uint32_t repair_meta_id, repair_hash_set set_diff, bool needs_all_rows)>&& func) {
|
||||
register_handler(this, messaging_verb::REPAIR_GET_ROW_DIFF, std::move(func));
|
||||
}
|
||||
future<> messaging_service::unregister_repair_get_row_diff() {
|
||||
return unregister_handler(messaging_verb::REPAIR_GET_ROW_DIFF);
|
||||
}
|
||||
future<repair_rows_on_wire> messaging_service::send_repair_get_row_diff(msg_addr id, uint32_t repair_meta_id, std::unordered_set<repair_hash> set_diff, bool needs_all_rows) {
|
||||
future<repair_rows_on_wire> messaging_service::send_repair_get_row_diff(msg_addr id, uint32_t repair_meta_id, repair_hash_set set_diff, bool needs_all_rows) {
|
||||
return send_message<future<repair_rows_on_wire>>(this, messaging_verb::REPAIR_GET_ROW_DIFF, std::move(id), repair_meta_id, std::move(set_diff), needs_all_rows);
|
||||
}
|
||||
|
||||
|
||||
@@ -275,10 +275,12 @@ public:
|
||||
streaming::prepare_message msg, UUID plan_id, sstring description, rpc::optional<streaming::stream_reason> reason)>&& func);
|
||||
future<streaming::prepare_message> send_prepare_message(msg_addr id, streaming::prepare_message msg, UUID plan_id,
|
||||
sstring description, streaming::stream_reason);
|
||||
future<> unregister_prepare_message();
|
||||
|
||||
// Wrapper for PREPARE_DONE_MESSAGE verb
|
||||
void register_prepare_done_message(std::function<future<> (const rpc::client_info& cinfo, UUID plan_id, unsigned dst_cpu_id)>&& func);
|
||||
future<> send_prepare_done_message(msg_addr id, UUID plan_id, unsigned dst_cpu_id);
|
||||
future<> unregister_prepare_done_message();
|
||||
|
||||
// Wrapper for STREAM_MUTATION verb
|
||||
void register_stream_mutation(std::function<future<> (const rpc::client_info& cinfo, UUID plan_id, frozen_mutation fm, unsigned dst_cpu_id, rpc::optional<bool>, rpc::optional<streaming::stream_reason>)>&& func);
|
||||
@@ -287,6 +289,7 @@ public:
|
||||
// Wrapper for STREAM_MUTATION_FRAGMENTS
|
||||
// The receiver of STREAM_MUTATION_FRAGMENTS sends status code to the sender to notify any error on the receiver side. The status code is of type int32_t. 0 means successful, -1 means error, other status code value are reserved for future use.
|
||||
void register_stream_mutation_fragments(std::function<future<rpc::sink<int32_t>> (const rpc::client_info& cinfo, UUID plan_id, UUID schema_id, UUID cf_id, uint64_t estimated_partitions, rpc::optional<streaming::stream_reason> reason_opt, rpc::source<frozen_mutation_fragment, rpc::optional<streaming::stream_mutation_fragments_cmd>> source)>&& func);
|
||||
future<> unregister_stream_mutation_fragments();
|
||||
rpc::sink<int32_t> make_sink_for_stream_mutation_fragments(rpc::source<frozen_mutation_fragment, rpc::optional<streaming::stream_mutation_fragments_cmd>>& source);
|
||||
future<rpc::sink<frozen_mutation_fragment, streaming::stream_mutation_fragments_cmd>, rpc::source<int32_t>> make_sink_and_source_for_stream_mutation_fragments(utils::UUID schema_id, utils::UUID plan_id, utils::UUID cf_id, uint64_t estimated_partitions, streaming::stream_reason reason, msg_addr id);
|
||||
|
||||
@@ -294,22 +297,27 @@ public:
|
||||
future<rpc::sink<repair_hash_with_cmd>, rpc::source<repair_row_on_wire_with_cmd>> make_sink_and_source_for_repair_get_row_diff_with_rpc_stream(uint32_t repair_meta_id, msg_addr id);
|
||||
rpc::sink<repair_row_on_wire_with_cmd> make_sink_for_repair_get_row_diff_with_rpc_stream(rpc::source<repair_hash_with_cmd>& source);
|
||||
void register_repair_get_row_diff_with_rpc_stream(std::function<future<rpc::sink<repair_row_on_wire_with_cmd>> (const rpc::client_info& cinfo, uint32_t repair_meta_id, rpc::source<repair_hash_with_cmd> source)>&& func);
|
||||
future<> unregister_repair_get_row_diff_with_rpc_stream();
|
||||
|
||||
// Wrapper for REPAIR_PUT_ROW_DIFF_WITH_RPC_STREAM
|
||||
future<rpc::sink<repair_row_on_wire_with_cmd>, rpc::source<repair_stream_cmd>> make_sink_and_source_for_repair_put_row_diff_with_rpc_stream(uint32_t repair_meta_id, msg_addr id);
|
||||
rpc::sink<repair_stream_cmd> make_sink_for_repair_put_row_diff_with_rpc_stream(rpc::source<repair_row_on_wire_with_cmd>& source);
|
||||
void register_repair_put_row_diff_with_rpc_stream(std::function<future<rpc::sink<repair_stream_cmd>> (const rpc::client_info& cinfo, uint32_t repair_meta_id, rpc::source<repair_row_on_wire_with_cmd> source)>&& func);
|
||||
future<> unregister_repair_put_row_diff_with_rpc_stream();
|
||||
|
||||
// Wrapper for REPAIR_GET_FULL_ROW_HASHES_WITH_RPC_STREAM
|
||||
future<rpc::sink<repair_stream_cmd>, rpc::source<repair_hash_with_cmd>> make_sink_and_source_for_repair_get_full_row_hashes_with_rpc_stream(uint32_t repair_meta_id, msg_addr id);
|
||||
rpc::sink<repair_hash_with_cmd> make_sink_for_repair_get_full_row_hashes_with_rpc_stream(rpc::source<repair_stream_cmd>& source);
|
||||
void register_repair_get_full_row_hashes_with_rpc_stream(std::function<future<rpc::sink<repair_hash_with_cmd>> (const rpc::client_info& cinfo, uint32_t repair_meta_id, rpc::source<repair_stream_cmd> source)>&& func);
|
||||
future<> unregister_repair_get_full_row_hashes_with_rpc_stream();
|
||||
|
||||
void register_stream_mutation_done(std::function<future<> (const rpc::client_info& cinfo, UUID plan_id, dht::token_range_vector ranges, UUID cf_id, unsigned dst_cpu_id)>&& func);
|
||||
future<> send_stream_mutation_done(msg_addr id, UUID plan_id, dht::token_range_vector ranges, UUID cf_id, unsigned dst_cpu_id);
|
||||
future<> unregister_stream_mutation_done();
|
||||
|
||||
void register_complete_message(std::function<future<> (const rpc::client_info& cinfo, UUID plan_id, unsigned dst_cpu_id, rpc::optional<bool> failed)>&& func);
|
||||
future<> send_complete_message(msg_addr id, UUID plan_id, unsigned dst_cpu_id, bool failed = false);
|
||||
future<> unregister_complete_message();
|
||||
|
||||
// Wrapper for REPAIR_CHECKSUM_RANGE verb
|
||||
void register_repair_checksum_range(std::function<future<partition_checksum> (sstring keyspace, sstring cf, dht::token_range range, rpc::optional<repair_checksum> hash_version)>&& func);
|
||||
@@ -317,9 +325,9 @@ public:
|
||||
future<partition_checksum> send_repair_checksum_range(msg_addr id, sstring keyspace, sstring cf, dht::token_range range, repair_checksum hash_version);
|
||||
|
||||
// Wrapper for REPAIR_GET_FULL_ROW_HASHES
|
||||
void register_repair_get_full_row_hashes(std::function<future<std::unordered_set<repair_hash>> (const rpc::client_info& cinfo, uint32_t repair_meta_id)>&& func);
|
||||
void register_repair_get_full_row_hashes(std::function<future<repair_hash_set> (const rpc::client_info& cinfo, uint32_t repair_meta_id)>&& func);
|
||||
future<> unregister_repair_get_full_row_hashes();
|
||||
future<std::unordered_set<repair_hash>> send_repair_get_full_row_hashes(msg_addr id, uint32_t repair_meta_id);
|
||||
future<repair_hash_set> send_repair_get_full_row_hashes(msg_addr id, uint32_t repair_meta_id);
|
||||
|
||||
// Wrapper for REPAIR_GET_COMBINED_ROW_HASH
|
||||
void register_repair_get_combined_row_hash(std::function<future<get_combined_row_hash_response> (const rpc::client_info& cinfo, uint32_t repair_meta_id, std::optional<repair_sync_boundary> common_sync_boundary)>&& func);
|
||||
@@ -332,9 +340,9 @@ public:
|
||||
future<get_sync_boundary_response> send_repair_get_sync_boundary(msg_addr id, uint32_t repair_meta_id, std::optional<repair_sync_boundary> skipped_sync_boundary);
|
||||
|
||||
// Wrapper for REPAIR_GET_ROW_DIFF
|
||||
void register_repair_get_row_diff(std::function<future<repair_rows_on_wire> (const rpc::client_info& cinfo, uint32_t repair_meta_id, std::unordered_set<repair_hash> set_diff, bool needs_all_rows)>&& func);
|
||||
void register_repair_get_row_diff(std::function<future<repair_rows_on_wire> (const rpc::client_info& cinfo, uint32_t repair_meta_id, repair_hash_set set_diff, bool needs_all_rows)>&& func);
|
||||
future<> unregister_repair_get_row_diff();
|
||||
future<repair_rows_on_wire> send_repair_get_row_diff(msg_addr id, uint32_t repair_meta_id, std::unordered_set<repair_hash> set_diff, bool needs_all_rows);
|
||||
future<repair_rows_on_wire> send_repair_get_row_diff(msg_addr id, uint32_t repair_meta_id, repair_hash_set set_diff, bool needs_all_rows);
|
||||
|
||||
// Wrapper for REPAIR_PUT_ROW_DIFF
|
||||
void register_repair_put_row_diff(std::function<future<> (const rpc::client_info& cinfo, uint32_t repair_meta_id, repair_rows_on_wire row_diff)>&& func);
|
||||
|
||||
@@ -195,6 +195,7 @@ class read_context : public reader_lifecycle_policy {
|
||||
|
||||
// One for each shard. Index is shard id.
|
||||
std::vector<reader_meta> _readers;
|
||||
std::vector<reader_concurrency_semaphore*> _semaphores;
|
||||
|
||||
gate _dismantling_gate;
|
||||
|
||||
@@ -211,7 +212,8 @@ public:
|
||||
, _schema(std::move(s))
|
||||
, _cmd(cmd)
|
||||
, _ranges(ranges)
|
||||
, _trace_state(std::move(trace_state)) {
|
||||
, _trace_state(std::move(trace_state))
|
||||
, _semaphores(smp::count, nullptr) {
|
||||
_readers.resize(smp::count);
|
||||
}
|
||||
|
||||
@@ -236,7 +238,12 @@ public:
|
||||
virtual void destroy_reader(shard_id shard, future<stopped_reader> reader_fut) noexcept override;
|
||||
|
||||
virtual reader_concurrency_semaphore& semaphore() override {
|
||||
return _readers[engine().cpu_id()].rparts->semaphore;
|
||||
const auto shard = engine().cpu_id();
|
||||
if (!_semaphores[shard]) {
|
||||
auto& table = _db.local().find_column_family(_schema);
|
||||
_semaphores[shard] = &table.read_concurrency_semaphore();
|
||||
}
|
||||
return *_semaphores[shard];
|
||||
}
|
||||
|
||||
future<> lookup_readers();
|
||||
|
||||
@@ -1721,7 +1721,7 @@ void row::apply_monotonically(const schema& s, column_kind kind, row&& other) {
|
||||
// we erase the live cells according to the shadowable_tombstone rules.
|
||||
static bool dead_marker_shadows_row(const schema& s, column_kind kind, const row_marker& marker) {
|
||||
return s.is_view()
|
||||
&& !s.view_info()->base_non_pk_columns_in_view_pk().empty()
|
||||
&& s.view_info()->has_base_non_pk_columns_in_view_pk()
|
||||
&& !marker.is_live()
|
||||
&& kind == column_kind::regular_column; // not applicable to static rows
|
||||
}
|
||||
@@ -2505,7 +2505,8 @@ mutation_partition::fully_discontinuous(const schema& s, const position_range& r
|
||||
future<mutation_opt> counter_write_query(schema_ptr s, const mutation_source& source,
|
||||
const dht::decorated_key& dk,
|
||||
const query::partition_slice& slice,
|
||||
tracing::trace_state_ptr trace_ptr)
|
||||
tracing::trace_state_ptr trace_ptr,
|
||||
db::timeout_clock::time_point timeout)
|
||||
{
|
||||
struct range_and_reader {
|
||||
dht::partition_range range;
|
||||
@@ -2530,7 +2531,7 @@ future<mutation_opt> counter_write_query(schema_ptr s, const mutation_source& so
|
||||
auto cwqrb = counter_write_query_result_builder(*s);
|
||||
auto cfq = make_stable_flattened_mutations_consumer<compact_for_query<emit_only_live_rows::yes, counter_write_query_result_builder>>(
|
||||
*s, gc_clock::now(), slice, query::max_rows, query::max_rows, std::move(cwqrb));
|
||||
auto f = r_a_r->reader.consume(std::move(cfq), db::no_timeout);
|
||||
auto f = r_a_r->reader.consume(std::move(cfq), timeout);
|
||||
return f.finally([r_a_r = std::move(r_a_r)] { });
|
||||
}
|
||||
|
||||
|
||||
@@ -113,9 +113,6 @@ class reconcilable_result_builder {
|
||||
const schema& _schema;
|
||||
const query::partition_slice& _slice;
|
||||
|
||||
utils::chunked_vector<partition> _result;
|
||||
uint32_t _live_rows{};
|
||||
|
||||
bool _return_static_content_on_partition_with_no_rows{};
|
||||
bool _static_row_is_alive{};
|
||||
uint32_t _total_live_rows = 0;
|
||||
@@ -123,6 +120,10 @@ class reconcilable_result_builder {
|
||||
stop_iteration _stop;
|
||||
bool _short_read_allowed;
|
||||
std::optional<streamed_mutation_freezer> _mutation_consumer;
|
||||
|
||||
uint32_t _live_rows{};
|
||||
// make this the last member so it is destroyed first. #7240
|
||||
utils::chunked_vector<partition> _result;
|
||||
public:
|
||||
reconcilable_result_builder(const schema& s, const query::partition_slice& slice,
|
||||
query::result_memory_accounter&& accounter)
|
||||
@@ -206,5 +207,6 @@ public:
|
||||
future<mutation_opt> counter_write_query(schema_ptr, const mutation_source&,
|
||||
const dht::decorated_key& dk,
|
||||
const query::partition_slice& slice,
|
||||
tracing::trace_state_ptr trace_ptr);
|
||||
tracing::trace_state_ptr trace_ptr,
|
||||
db::timeout_clock::time_point timeout);
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -372,6 +372,64 @@ flat_mutation_reader make_foreign_reader(schema_ptr schema,
|
||||
foreign_ptr<std::unique_ptr<flat_mutation_reader>> reader,
|
||||
streamed_mutation::forwarding fwd_sm = streamed_mutation::forwarding::no);
|
||||
|
||||
/// Make an auto-paused evictable reader.
|
||||
///
|
||||
/// The reader is paused after each use, that is after each call to any of its
|
||||
/// members that cause actual reading to be done (`fill_buffer()` and
|
||||
/// `fast_forward_to()`). When paused, the reader is made evictable, that it is
|
||||
/// it is registered with reader concurrency semaphore as an inactive read.
|
||||
/// The reader is resumed automatically on the next use. If it was evicted, it
|
||||
/// will be recreated at the position it left off reading. This is all
|
||||
/// transparent to its user.
|
||||
/// Parameters passed by reference have to be kept alive while the reader is
|
||||
/// alive.
|
||||
flat_mutation_reader make_auto_paused_evictable_reader(
|
||||
mutation_source ms,
|
||||
schema_ptr schema,
|
||||
reader_concurrency_semaphore& semaphore,
|
||||
const dht::partition_range& pr,
|
||||
const query::partition_slice& ps,
|
||||
const io_priority_class& pc,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
mutation_reader::forwarding fwd_mr);
|
||||
|
||||
class evictable_reader;
|
||||
|
||||
class evictable_reader_handle {
|
||||
friend std::pair<flat_mutation_reader, evictable_reader_handle> make_manually_paused_evictable_reader(mutation_source, schema_ptr, reader_concurrency_semaphore&,
|
||||
const dht::partition_range&, const query::partition_slice&, const io_priority_class&, tracing::trace_state_ptr, mutation_reader::forwarding);
|
||||
|
||||
private:
|
||||
evictable_reader* _r;
|
||||
|
||||
private:
|
||||
explicit evictable_reader_handle(evictable_reader& r);
|
||||
|
||||
public:
|
||||
void pause();
|
||||
};
|
||||
|
||||
/// Make a manually-paused evictable reader.
|
||||
///
|
||||
/// The reader can be paused via the evictable reader handle when desired. The
|
||||
/// intended usage is subsequent reads done in bursts, after which the reader is
|
||||
/// not used for some time. When paused, the reader is made evictable, that is,
|
||||
/// it is registered with reader concurrency semaphore as an inactive read.
|
||||
/// The reader is resumed automatically on the next use. If it was evicted, it
|
||||
/// will be recreated at the position it left off reading. This is all
|
||||
/// transparent to its user.
|
||||
/// Parameters passed by reference have to be kept alive while the reader is
|
||||
/// alive.
|
||||
std::pair<flat_mutation_reader, evictable_reader_handle> make_manually_paused_evictable_reader(
|
||||
mutation_source ms,
|
||||
schema_ptr schema,
|
||||
reader_concurrency_semaphore& semaphore,
|
||||
const dht::partition_range& pr,
|
||||
const query::partition_slice& ps,
|
||||
const io_priority_class& pc,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
mutation_reader::forwarding fwd_mr);
|
||||
|
||||
/// Reader lifecycle policy for the mulitshard combining reader.
|
||||
///
|
||||
/// This policy is expected to make sure any additional resource the readers
|
||||
|
||||
@@ -163,6 +163,11 @@ public:
|
||||
return {partition_region::clustered, bound_weight::before_all_prefixed, &ck};
|
||||
}
|
||||
|
||||
// Returns a view to before_key(pos._ck) if pos.is_clustering_row() else returns pos as-is.
|
||||
static position_in_partition_view before_key(position_in_partition_view pos) {
|
||||
return {partition_region::clustered, pos._bound_weight == bound_weight::equal ? bound_weight::before_all_prefixed : pos._bound_weight, pos._ck};
|
||||
}
|
||||
|
||||
partition_region region() const { return _type; }
|
||||
bound_weight get_bound_weight() const { return _bound_weight; }
|
||||
bool is_partition_start() const { return _type == partition_region::partition_start; }
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
|
||||
|
||||
reader_permit::impl::impl(reader_concurrency_semaphore& semaphore, reader_resources base_cost) : semaphore(semaphore), base_cost(base_cost) {
|
||||
semaphore.consume(base_cost);
|
||||
}
|
||||
|
||||
reader_permit::impl::~impl() {
|
||||
@@ -88,7 +89,6 @@ void reader_concurrency_semaphore::signal(const resources& r) noexcept {
|
||||
_resources += r;
|
||||
while (!_wait_list.empty() && has_available_units(_wait_list.front().res)) {
|
||||
auto& x = _wait_list.front();
|
||||
_resources -= x.res;
|
||||
try {
|
||||
x.pr.set_value(reader_permit(*this, x.res));
|
||||
} catch (...) {
|
||||
@@ -160,7 +160,6 @@ future<reader_permit> reader_concurrency_semaphore::wait_admission(size_t memory
|
||||
--_inactive_read_stats.population;
|
||||
}
|
||||
if (may_proceed(r)) {
|
||||
_resources -= r;
|
||||
return make_ready_future<reader_permit>(reader_permit(*this, r));
|
||||
}
|
||||
promise<reader_permit> pr;
|
||||
@@ -170,7 +169,6 @@ future<reader_permit> reader_concurrency_semaphore::wait_admission(size_t memory
|
||||
}
|
||||
|
||||
reader_permit reader_concurrency_semaphore::consume_resources(resources r) {
|
||||
_resources -= r;
|
||||
return reader_permit(*this, r);
|
||||
}
|
||||
|
||||
|
||||
@@ -128,6 +128,10 @@ private:
|
||||
return has_available_units(r) && _wait_list.empty();
|
||||
}
|
||||
|
||||
void consume(resources r) {
|
||||
_resources -= r;
|
||||
}
|
||||
|
||||
void consume_memory(size_t memory) {
|
||||
_resources.memory -= memory;
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
|
||||
#include <unordered_map>
|
||||
#include <exception>
|
||||
#include <absl/container/btree_set.h>
|
||||
|
||||
#include <seastar/core/sstring.hh>
|
||||
#include <seastar/core/sharded.hh>
|
||||
@@ -334,6 +335,8 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
using repair_hash_set = absl::btree_set<repair_hash>;
|
||||
|
||||
// Return value of the REPAIR_GET_SYNC_BOUNDARY RPC verb
|
||||
struct get_sync_boundary_response {
|
||||
std::optional<repair_sync_boundary> boundary;
|
||||
|
||||
@@ -47,6 +47,7 @@
|
||||
#include "gms/gossiper.hh"
|
||||
#include "repair/row_level.hh"
|
||||
#include "mutation_source_metadata.hh"
|
||||
#include "utils/stall_free.hh"
|
||||
|
||||
extern logging::logger rlogger;
|
||||
|
||||
@@ -373,6 +374,7 @@ private:
|
||||
std::optional<utils::phased_barrier::operation> _local_read_op;
|
||||
// Local reader or multishard reader to read the range
|
||||
flat_mutation_reader _reader;
|
||||
std::optional<evictable_reader_handle> _reader_handle;
|
||||
// Current partition read from disk
|
||||
lw_shared_ptr<const decorated_key_with_hash> _current_dk;
|
||||
|
||||
@@ -392,32 +394,49 @@ public:
|
||||
, _sharder(remote_partitioner, range, remote_shard)
|
||||
, _seed(seed)
|
||||
, _local_read_op(local_reader ? std::optional(cf.read_in_progress()) : std::nullopt)
|
||||
, _reader(make_reader(db, cf, local_reader)) {
|
||||
}
|
||||
|
||||
private:
|
||||
flat_mutation_reader
|
||||
make_reader(seastar::sharded<database>& db,
|
||||
column_family& cf,
|
||||
is_local_reader local_reader) {
|
||||
, _reader(nullptr) {
|
||||
if (local_reader) {
|
||||
return cf.make_streaming_reader(_schema, _range);
|
||||
auto ms = mutation_source([&cf] (
|
||||
schema_ptr s,
|
||||
reader_permit,
|
||||
const dht::partition_range& pr,
|
||||
const query::partition_slice& ps,
|
||||
const io_priority_class& pc,
|
||||
tracing::trace_state_ptr,
|
||||
streamed_mutation::forwarding,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
return cf.make_streaming_reader(std::move(s), pr, ps, fwd_mr);
|
||||
});
|
||||
std::tie(_reader, _reader_handle) = make_manually_paused_evictable_reader(
|
||||
std::move(ms),
|
||||
_schema,
|
||||
cf.streaming_read_concurrency_semaphore(),
|
||||
_range,
|
||||
_schema->full_slice(),
|
||||
service::get_local_streaming_read_priority(),
|
||||
{},
|
||||
mutation_reader::forwarding::no);
|
||||
} else {
|
||||
_reader = make_multishard_streaming_reader(db, _schema, [this] {
|
||||
auto shard_range = _sharder.next();
|
||||
if (shard_range) {
|
||||
return std::optional<dht::partition_range>(dht::to_partition_range(*shard_range));
|
||||
}
|
||||
return std::optional<dht::partition_range>();
|
||||
});
|
||||
}
|
||||
return make_multishard_streaming_reader(db, _schema, [this] {
|
||||
auto shard_range = _sharder.next();
|
||||
if (shard_range) {
|
||||
return std::optional<dht::partition_range>(dht::to_partition_range(*shard_range));
|
||||
}
|
||||
return std::optional<dht::partition_range>();
|
||||
});
|
||||
}
|
||||
|
||||
public:
|
||||
future<mutation_fragment_opt>
|
||||
read_mutation_fragment() {
|
||||
return _reader(db::no_timeout);
|
||||
}
|
||||
|
||||
void on_end_of_stream() {
|
||||
_reader = make_empty_flat_reader(_schema);
|
||||
_reader_handle.reset();
|
||||
}
|
||||
|
||||
lw_shared_ptr<const decorated_key_with_hash>& get_current_dk() {
|
||||
return _current_dk;
|
||||
}
|
||||
@@ -436,6 +455,11 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void pause() {
|
||||
if (_reader_handle) {
|
||||
_reader_handle->pause();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class repair_writer {
|
||||
@@ -644,7 +668,7 @@ private:
|
||||
// Tracks current sync boundary
|
||||
std::optional<repair_sync_boundary> _current_sync_boundary;
|
||||
// Contains the hashes of rows in the _working_row_buffor for all peer nodes
|
||||
std::vector<std::unordered_set<repair_hash>> _peer_row_hash_sets;
|
||||
std::vector<repair_hash_set> _peer_row_hash_sets;
|
||||
// Gate used to make sure pending operation of meta data is done
|
||||
seastar::gate _gate;
|
||||
sink_source_for_get_full_row_hashes _sink_source_for_get_full_row_hashes;
|
||||
@@ -733,11 +757,12 @@ public:
|
||||
public:
|
||||
future<> stop() {
|
||||
auto gate_future = _gate.close();
|
||||
auto writer_future = _repair_writer.wait_for_writer_done();
|
||||
auto f1 = _sink_source_for_get_full_row_hashes.close();
|
||||
auto f2 = _sink_source_for_get_row_diff.close();
|
||||
auto f3 = _sink_source_for_put_row_diff.close();
|
||||
return when_all_succeed(std::move(gate_future), std::move(writer_future), std::move(f1), std::move(f2), std::move(f3));
|
||||
return when_all_succeed(std::move(gate_future), std::move(f1), std::move(f2), std::move(f3)).finally([this] {
|
||||
return _repair_writer.wait_for_writer_done();
|
||||
});
|
||||
}
|
||||
|
||||
static std::unordered_map<node_repair_meta_id, lw_shared_ptr<repair_meta>>& repair_meta_map() {
|
||||
@@ -865,9 +890,9 @@ public:
|
||||
}
|
||||
|
||||
// Must run inside a seastar thread
|
||||
static std::unordered_set<repair_hash>
|
||||
get_set_diff(const std::unordered_set<repair_hash>& x, const std::unordered_set<repair_hash>& y) {
|
||||
std::unordered_set<repair_hash> set_diff;
|
||||
static repair_hash_set
|
||||
get_set_diff(const repair_hash_set& x, const repair_hash_set& y) {
|
||||
repair_hash_set set_diff;
|
||||
// Note std::set_difference needs x and y are sorted.
|
||||
std::copy_if(x.begin(), x.end(), std::inserter(set_diff, set_diff.end()),
|
||||
[&y] (auto& item) { thread::maybe_yield(); return y.find(item) == y.end(); });
|
||||
@@ -885,14 +910,14 @@ public:
|
||||
|
||||
}
|
||||
|
||||
std::unordered_set<repair_hash>& peer_row_hash_sets(unsigned node_idx) {
|
||||
repair_hash_set& peer_row_hash_sets(unsigned node_idx) {
|
||||
return _peer_row_hash_sets[node_idx];
|
||||
}
|
||||
|
||||
// Get a list of row hashes in _working_row_buf
|
||||
future<std::unordered_set<repair_hash>>
|
||||
future<repair_hash_set>
|
||||
working_row_hashes() {
|
||||
return do_with(std::unordered_set<repair_hash>(), [this] (std::unordered_set<repair_hash>& hashes) {
|
||||
return do_with(repair_hash_set(), [this] (repair_hash_set& hashes) {
|
||||
return do_for_each(_working_row_buf, [&hashes] (repair_row& r) {
|
||||
hashes.emplace(r.hash());
|
||||
}).then([&hashes] {
|
||||
@@ -1018,11 +1043,7 @@ private:
|
||||
return repair_hash(h.finalize_uint64());
|
||||
}
|
||||
|
||||
stop_iteration handle_mutation_fragment(mutation_fragment_opt mfopt, size_t& cur_size, size_t& new_rows_size, std::list<repair_row>& cur_rows) {
|
||||
if (!mfopt) {
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
mutation_fragment& mf = *mfopt;
|
||||
stop_iteration handle_mutation_fragment(mutation_fragment& mf, size_t& cur_size, size_t& new_rows_size, std::list<repair_row>& cur_rows) {
|
||||
if (mf.is_partition_start()) {
|
||||
auto& start = mf.as_partition_start();
|
||||
_repair_reader.set_current_dk(start.key());
|
||||
@@ -1057,32 +1078,49 @@ private:
|
||||
}
|
||||
_gate.check();
|
||||
return _repair_reader.read_mutation_fragment().then([this, &cur_size, &new_rows_size, &cur_rows] (mutation_fragment_opt mfopt) mutable {
|
||||
return handle_mutation_fragment(std::move(mfopt), cur_size, new_rows_size, cur_rows);
|
||||
if (!mfopt) {
|
||||
_repair_reader.on_end_of_stream();
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
return handle_mutation_fragment(*mfopt, cur_size, new_rows_size, cur_rows);
|
||||
});
|
||||
}).then([&cur_rows, &new_rows_size] () mutable {
|
||||
}).then_wrapped([this, &cur_rows, &new_rows_size] (future<> fut) mutable {
|
||||
if (fut.failed()) {
|
||||
_repair_reader.on_end_of_stream();
|
||||
return make_exception_future<std::list<repair_row>, size_t>(fut.get_exception());
|
||||
}
|
||||
_repair_reader.pause();
|
||||
return make_ready_future<std::list<repair_row>, size_t>(std::move(cur_rows), new_rows_size);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<> clear_row_buf() {
|
||||
return utils::clear_gently(_row_buf);
|
||||
}
|
||||
|
||||
future<> clear_working_row_buf() {
|
||||
return utils::clear_gently(_working_row_buf).then([this] {
|
||||
_working_row_buf_combined_hash.clear();
|
||||
});
|
||||
}
|
||||
|
||||
// Read rows from disk until _max_row_buf_size of rows are filled into _row_buf.
|
||||
// Calculate the combined checksum of the rows
|
||||
// Calculate the total size of the rows in _row_buf
|
||||
future<get_sync_boundary_response>
|
||||
get_sync_boundary(std::optional<repair_sync_boundary> skipped_sync_boundary) {
|
||||
auto f = make_ready_future<>();
|
||||
if (skipped_sync_boundary) {
|
||||
_current_sync_boundary = skipped_sync_boundary;
|
||||
_row_buf.clear();
|
||||
_working_row_buf.clear();
|
||||
_working_row_buf_combined_hash.clear();
|
||||
} else {
|
||||
_working_row_buf.clear();
|
||||
_working_row_buf_combined_hash.clear();
|
||||
f = clear_row_buf();
|
||||
}
|
||||
// Here is the place we update _last_sync_boundary
|
||||
rlogger.trace("SET _last_sync_boundary from {} to {}", _last_sync_boundary, _current_sync_boundary);
|
||||
_last_sync_boundary = _current_sync_boundary;
|
||||
return row_buf_size().then([this, sb = std::move(skipped_sync_boundary)] (size_t cur_size) {
|
||||
return f.then([this, sb = std::move(skipped_sync_boundary)] () mutable {
|
||||
return clear_working_row_buf().then([this, sb = sb] () mutable {
|
||||
return row_buf_size().then([this, sb = std::move(sb)] (size_t cur_size) {
|
||||
return read_rows_from_disk(cur_size).then([this, sb = std::move(sb)] (std::list<repair_row> new_rows, size_t new_rows_size) mutable {
|
||||
size_t new_rows_nr = new_rows.size();
|
||||
_row_buf.splice(_row_buf.end(), new_rows);
|
||||
@@ -1099,6 +1137,8 @@ private:
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<> move_row_buf_to_working_row_buf() {
|
||||
@@ -1174,9 +1214,9 @@ private:
|
||||
}
|
||||
|
||||
future<std::list<repair_row>>
|
||||
copy_rows_from_working_row_buf_within_set_diff(std::unordered_set<repair_hash> set_diff) {
|
||||
copy_rows_from_working_row_buf_within_set_diff(repair_hash_set set_diff) {
|
||||
return do_with(std::list<repair_row>(), std::move(set_diff),
|
||||
[this] (std::list<repair_row>& rows, std::unordered_set<repair_hash>& set_diff) {
|
||||
[this] (std::list<repair_row>& rows, repair_hash_set& set_diff) {
|
||||
return do_for_each(_working_row_buf, [this, &set_diff, &rows] (const repair_row& r) {
|
||||
if (set_diff.count(r.hash()) > 0) {
|
||||
rows.push_back(r);
|
||||
@@ -1191,7 +1231,7 @@ private:
|
||||
// Give a set of row hashes, return the corresponding rows
|
||||
// If needs_all_rows is set, return all the rows in _working_row_buf, ignore the set_diff
|
||||
future<std::list<repair_row>>
|
||||
get_row_diff(std::unordered_set<repair_hash> set_diff, needs_all_rows_t needs_all_rows = needs_all_rows_t::no) {
|
||||
get_row_diff(repair_hash_set set_diff, needs_all_rows_t needs_all_rows = needs_all_rows_t::no) {
|
||||
if (needs_all_rows) {
|
||||
if (!_repair_master || _nr_peer_nodes == 1) {
|
||||
return make_ready_future<std::list<repair_row>>(std::move(_working_row_buf));
|
||||
@@ -1202,19 +1242,28 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
future<> do_apply_rows(std::list<repair_row>& row_diff, unsigned node_idx, update_working_row_buf update_buf) {
|
||||
return with_semaphore(_repair_writer.sem(), 1, [this, node_idx, update_buf, &row_diff] {
|
||||
_repair_writer.create_writer(_db, node_idx);
|
||||
return do_for_each(row_diff, [this, node_idx, update_buf] (repair_row& r) {
|
||||
if (update_buf) {
|
||||
_working_row_buf_combined_hash.add(r.hash());
|
||||
}
|
||||
// The repair_row here is supposed to have
|
||||
// mutation_fragment attached because we have stored it in
|
||||
// to_repair_rows_list above where the repair_row is created.
|
||||
mutation_fragment mf = std::move(r.get_mutation_fragment());
|
||||
auto dk_with_hash = r.get_dk_with_hash();
|
||||
return _repair_writer.do_write(node_idx, std::move(dk_with_hash), std::move(mf));
|
||||
future<> do_apply_rows(std::list<repair_row>&& row_diff, unsigned node_idx, update_working_row_buf update_buf) {
|
||||
return do_with(std::move(row_diff), [this, node_idx, update_buf] (std::list<repair_row>& row_diff) {
|
||||
return with_semaphore(_repair_writer.sem(), 1, [this, node_idx, update_buf, &row_diff] {
|
||||
_repair_writer.create_writer(_db, node_idx);
|
||||
return repeat([this, node_idx, update_buf, &row_diff] () mutable {
|
||||
if (row_diff.empty()) {
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
}
|
||||
repair_row& r = row_diff.front();
|
||||
if (update_buf) {
|
||||
_working_row_buf_combined_hash.add(r.hash());
|
||||
}
|
||||
// The repair_row here is supposed to have
|
||||
// mutation_fragment attached because we have stored it in
|
||||
// to_repair_rows_list above where the repair_row is created.
|
||||
mutation_fragment mf = std::move(r.get_mutation_fragment());
|
||||
auto dk_with_hash = r.get_dk_with_hash();
|
||||
return _repair_writer.do_write(node_idx, std::move(dk_with_hash), std::move(mf)).then([&row_diff] {
|
||||
row_diff.pop_front();
|
||||
return make_ready_future<stop_iteration>(stop_iteration::no);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -1232,19 +1281,17 @@ private:
|
||||
stats().rx_row_nr += row_diff.size();
|
||||
stats().rx_row_nr_peer[from] += row_diff.size();
|
||||
if (update_buf) {
|
||||
std::list<repair_row> tmp;
|
||||
tmp.swap(_working_row_buf);
|
||||
// Both row_diff and _working_row_buf and are ordered, merging
|
||||
// two sored list to make sure the combination of row_diff
|
||||
// and _working_row_buf are ordered.
|
||||
std::merge(tmp.begin(), tmp.end(), row_diff.begin(), row_diff.end(), std::back_inserter(_working_row_buf),
|
||||
[this] (const repair_row& x, const repair_row& y) { thread::maybe_yield(); return _cmp(x.boundary(), y.boundary()) < 0; });
|
||||
utils::merge_to_gently(_working_row_buf, row_diff,
|
||||
[this] (const repair_row& x, const repair_row& y) { return _cmp(x.boundary(), y.boundary()) < 0; });
|
||||
}
|
||||
if (update_hash_set) {
|
||||
_peer_row_hash_sets[node_idx] = boost::copy_range<std::unordered_set<repair_hash>>(row_diff |
|
||||
_peer_row_hash_sets[node_idx] = boost::copy_range<repair_hash_set>(row_diff |
|
||||
boost::adaptors::transformed([] (repair_row& r) { thread::maybe_yield(); return r.hash(); }));
|
||||
}
|
||||
do_apply_rows(row_diff, node_idx, update_buf).get();
|
||||
do_apply_rows(std::move(row_diff), node_idx, update_buf).get();
|
||||
}
|
||||
|
||||
future<>
|
||||
@@ -1252,11 +1299,9 @@ private:
|
||||
if (rows.empty()) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return to_repair_rows_list(rows).then([this] (std::list<repair_row> row_diff) {
|
||||
return do_with(std::move(row_diff), [this] (std::list<repair_row>& row_diff) {
|
||||
unsigned node_idx = 0;
|
||||
return do_apply_rows(row_diff, node_idx, update_working_row_buf::no);
|
||||
});
|
||||
return to_repair_rows_list(std::move(rows)).then([this] (std::list<repair_row> row_diff) {
|
||||
unsigned node_idx = 0;
|
||||
return do_apply_rows(std::move(row_diff), node_idx, update_working_row_buf::no);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1335,13 +1380,13 @@ private:
|
||||
public:
|
||||
// RPC API
|
||||
// Return the hashes of the rows in _working_row_buf
|
||||
future<std::unordered_set<repair_hash>>
|
||||
future<repair_hash_set>
|
||||
get_full_row_hashes(gms::inet_address remote_node) {
|
||||
if (remote_node == _myip) {
|
||||
return get_full_row_hashes_handler();
|
||||
}
|
||||
return netw::get_local_messaging_service().send_repair_get_full_row_hashes(msg_addr(remote_node),
|
||||
_repair_meta_id).then([this, remote_node] (std::unordered_set<repair_hash> hashes) {
|
||||
_repair_meta_id).then([this, remote_node] (repair_hash_set hashes) {
|
||||
rlogger.debug("Got full hashes from peer={}, nr_hashes={}", remote_node, hashes.size());
|
||||
_metrics.rx_hashes_nr += hashes.size();
|
||||
stats().rx_hashes_nr += hashes.size();
|
||||
@@ -1352,7 +1397,7 @@ public:
|
||||
|
||||
private:
|
||||
future<> get_full_row_hashes_source_op(
|
||||
lw_shared_ptr<std::unordered_set<repair_hash>> current_hashes,
|
||||
lw_shared_ptr<repair_hash_set> current_hashes,
|
||||
gms::inet_address remote_node,
|
||||
unsigned node_idx,
|
||||
rpc::source<repair_hash_with_cmd>& source) {
|
||||
@@ -1390,12 +1435,12 @@ private:
|
||||
}
|
||||
|
||||
public:
|
||||
future<std::unordered_set<repair_hash>>
|
||||
future<repair_hash_set>
|
||||
get_full_row_hashes_with_rpc_stream(gms::inet_address remote_node, unsigned node_idx) {
|
||||
if (remote_node == _myip) {
|
||||
return get_full_row_hashes_handler();
|
||||
}
|
||||
auto current_hashes = make_lw_shared<std::unordered_set<repair_hash>>();
|
||||
auto current_hashes = make_lw_shared<repair_hash_set>();
|
||||
return _sink_source_for_get_full_row_hashes.get_sink_source(remote_node, node_idx).then(
|
||||
[this, current_hashes, remote_node, node_idx]
|
||||
(rpc::sink<repair_stream_cmd>& sink, rpc::source<repair_hash_with_cmd>& source) mutable {
|
||||
@@ -1410,7 +1455,7 @@ public:
|
||||
}
|
||||
|
||||
// RPC handler
|
||||
future<std::unordered_set<repair_hash>>
|
||||
future<repair_hash_set>
|
||||
get_full_row_hashes_handler() {
|
||||
return with_gate(_gate, [this] {
|
||||
return working_row_hashes();
|
||||
@@ -1543,7 +1588,7 @@ public:
|
||||
// RPC API
|
||||
// Return rows in the _working_row_buf with hash within the given sef_diff
|
||||
// Must run inside a seastar thread
|
||||
void get_row_diff(std::unordered_set<repair_hash> set_diff, needs_all_rows_t needs_all_rows, gms::inet_address remote_node, unsigned node_idx) {
|
||||
void get_row_diff(repair_hash_set set_diff, needs_all_rows_t needs_all_rows, gms::inet_address remote_node, unsigned node_idx) {
|
||||
if (needs_all_rows || !set_diff.empty()) {
|
||||
if (remote_node == _myip) {
|
||||
return;
|
||||
@@ -1612,11 +1657,11 @@ private:
|
||||
}
|
||||
|
||||
future<> get_row_diff_sink_op(
|
||||
std::unordered_set<repair_hash> set_diff,
|
||||
repair_hash_set set_diff,
|
||||
needs_all_rows_t needs_all_rows,
|
||||
rpc::sink<repair_hash_with_cmd>& sink,
|
||||
gms::inet_address remote_node) {
|
||||
return do_with(std::move(set_diff), [needs_all_rows, remote_node, &sink] (std::unordered_set<repair_hash>& set_diff) mutable {
|
||||
return do_with(std::move(set_diff), [needs_all_rows, remote_node, &sink] (repair_hash_set& set_diff) mutable {
|
||||
if (inject_rpc_stream_error) {
|
||||
return make_exception_future<>(std::runtime_error("get_row_diff: Inject sender error in sink loop"));
|
||||
}
|
||||
@@ -1643,7 +1688,7 @@ private:
|
||||
public:
|
||||
// Must run inside a seastar thread
|
||||
void get_row_diff_with_rpc_stream(
|
||||
std::unordered_set<repair_hash> set_diff,
|
||||
repair_hash_set set_diff,
|
||||
needs_all_rows_t needs_all_rows,
|
||||
update_peer_row_hash_sets update_hash_set,
|
||||
gms::inet_address remote_node,
|
||||
@@ -1669,7 +1714,7 @@ public:
|
||||
}
|
||||
|
||||
// RPC handler
|
||||
future<repair_rows_on_wire> get_row_diff_handler(std::unordered_set<repair_hash> set_diff, needs_all_rows_t needs_all_rows) {
|
||||
future<repair_rows_on_wire> get_row_diff_handler(repair_hash_set set_diff, needs_all_rows_t needs_all_rows) {
|
||||
return with_gate(_gate, [this, set_diff = std::move(set_diff), needs_all_rows] () mutable {
|
||||
return get_row_diff(std::move(set_diff), needs_all_rows).then([this] (std::list<repair_row> row_diff) {
|
||||
return to_repair_rows_on_wire(std::move(row_diff));
|
||||
@@ -1679,15 +1724,16 @@ public:
|
||||
|
||||
// RPC API
|
||||
// Send rows in the _working_row_buf with hash within the given sef_diff
|
||||
future<> put_row_diff(std::unordered_set<repair_hash> set_diff, needs_all_rows_t needs_all_rows, gms::inet_address remote_node) {
|
||||
future<> put_row_diff(repair_hash_set set_diff, needs_all_rows_t needs_all_rows, gms::inet_address remote_node) {
|
||||
if (!set_diff.empty()) {
|
||||
if (remote_node == _myip) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
auto sz = set_diff.size();
|
||||
size_t sz = set_diff.size();
|
||||
return get_row_diff(std::move(set_diff), needs_all_rows).then([this, remote_node, sz] (std::list<repair_row> row_diff) {
|
||||
if (row_diff.size() != sz) {
|
||||
throw std::runtime_error("row_diff.size() != set_diff.size()");
|
||||
rlogger.warn("Hash conflict detected, keyspace={}, table={}, range={}, row_diff.size={}, set_diff.size={}. It is recommended to compact the table and rerun repair for the range.",
|
||||
_schema->ks_name(), _schema->cf_name(), _range, row_diff.size(), sz);
|
||||
}
|
||||
return do_with(std::move(row_diff), [this, remote_node] (std::list<repair_row>& row_diff) {
|
||||
return get_repair_rows_size(row_diff).then([this, remote_node, &row_diff] (size_t row_bytes) mutable {
|
||||
@@ -1754,17 +1800,18 @@ private:
|
||||
|
||||
public:
|
||||
future<> put_row_diff_with_rpc_stream(
|
||||
std::unordered_set<repair_hash> set_diff,
|
||||
repair_hash_set set_diff,
|
||||
needs_all_rows_t needs_all_rows,
|
||||
gms::inet_address remote_node, unsigned node_idx) {
|
||||
if (!set_diff.empty()) {
|
||||
if (remote_node == _myip) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
auto sz = set_diff.size();
|
||||
size_t sz = set_diff.size();
|
||||
return get_row_diff(std::move(set_diff), needs_all_rows).then([this, remote_node, node_idx, sz] (std::list<repair_row> row_diff) {
|
||||
if (row_diff.size() != sz) {
|
||||
throw std::runtime_error("row_diff.size() != set_diff.size()");
|
||||
rlogger.warn("Hash conflict detected, keyspace={}, table={}, range={}, row_diff.size={}, set_diff.size={}. It is recommended to compact the table and rerun repair for the range.",
|
||||
_schema->ks_name(), _schema->cf_name(), _range, row_diff.size(), sz);
|
||||
}
|
||||
return do_with(std::move(row_diff), [this, remote_node, node_idx] (std::list<repair_row>& row_diff) {
|
||||
return get_repair_rows_size(row_diff).then([this, remote_node, node_idx, &row_diff] (size_t row_bytes) mutable {
|
||||
@@ -1803,7 +1850,7 @@ static future<stop_iteration> repair_get_row_diff_with_rpc_stream_process_op(
|
||||
rpc::sink<repair_row_on_wire_with_cmd> sink,
|
||||
rpc::source<repair_hash_with_cmd> source,
|
||||
bool &error,
|
||||
std::unordered_set<repair_hash>& current_set_diff,
|
||||
repair_hash_set& current_set_diff,
|
||||
std::optional<std::tuple<repair_hash_with_cmd>> hash_cmd_opt) {
|
||||
repair_hash_with_cmd hash_cmd = std::get<0>(hash_cmd_opt.value());
|
||||
rlogger.trace("Got repair_hash_with_cmd from peer={}, hash={}, cmd={}", from, hash_cmd.hash, int(hash_cmd.cmd));
|
||||
@@ -1816,7 +1863,7 @@ static future<stop_iteration> repair_get_row_diff_with_rpc_stream_process_op(
|
||||
}
|
||||
bool needs_all_rows = hash_cmd.cmd == repair_stream_cmd::needs_all_rows;
|
||||
_metrics.rx_hashes_nr += current_set_diff.size();
|
||||
auto fp = make_foreign(std::make_unique<std::unordered_set<repair_hash>>(std::move(current_set_diff)));
|
||||
auto fp = make_foreign(std::make_unique<repair_hash_set>(std::move(current_set_diff)));
|
||||
return smp::submit_to(src_cpu_id % smp::count, [from, repair_meta_id, needs_all_rows, fp = std::move(fp)] {
|
||||
auto rm = repair_meta::get_repair_meta(from, repair_meta_id);
|
||||
if (fp.get_owner_shard() == engine().cpu_id()) {
|
||||
@@ -1894,12 +1941,12 @@ static future<stop_iteration> repair_get_full_row_hashes_with_rpc_stream_process
|
||||
if (status == repair_stream_cmd::get_full_row_hashes) {
|
||||
return smp::submit_to(src_cpu_id % smp::count, [from, repair_meta_id] {
|
||||
auto rm = repair_meta::get_repair_meta(from, repair_meta_id);
|
||||
return rm->get_full_row_hashes_handler().then([] (std::unordered_set<repair_hash> hashes) {
|
||||
return rm->get_full_row_hashes_handler().then([] (repair_hash_set hashes) {
|
||||
_metrics.tx_hashes_nr += hashes.size();
|
||||
return hashes;
|
||||
});
|
||||
}).then([sink] (std::unordered_set<repair_hash> hashes) mutable {
|
||||
return do_with(std::move(hashes), [sink] (std::unordered_set<repair_hash>& hashes) mutable {
|
||||
}).then([sink] (repair_hash_set hashes) mutable {
|
||||
return do_with(std::move(hashes), [sink] (repair_hash_set& hashes) mutable {
|
||||
return do_for_each(hashes, [sink] (const repair_hash& hash) mutable {
|
||||
return sink(repair_hash_with_cmd{repair_stream_cmd::hash_data, hash});
|
||||
}).then([sink] () mutable {
|
||||
@@ -1922,7 +1969,7 @@ static future<> repair_get_row_diff_with_rpc_stream_handler(
|
||||
uint32_t repair_meta_id,
|
||||
rpc::sink<repair_row_on_wire_with_cmd> sink,
|
||||
rpc::source<repair_hash_with_cmd> source) {
|
||||
return do_with(false, std::unordered_set<repair_hash>(), [from, src_cpu_id, repair_meta_id, sink, source] (bool& error, std::unordered_set<repair_hash>& current_set_diff) mutable {
|
||||
return do_with(false, repair_hash_set(), [from, src_cpu_id, repair_meta_id, sink, source] (bool& error, repair_hash_set& current_set_diff) mutable {
|
||||
return repeat([from, src_cpu_id, repair_meta_id, sink, source, &error, ¤t_set_diff] () mutable {
|
||||
return source().then([from, src_cpu_id, repair_meta_id, sink, source, &error, ¤t_set_diff] (std::optional<std::tuple<repair_hash_with_cmd>> hash_cmd_opt) mutable {
|
||||
if (hash_cmd_opt) {
|
||||
@@ -2065,7 +2112,7 @@ future<> repair_init_messaging_service_handler(repair_service& rs, distributed<d
|
||||
auto from = cinfo.retrieve_auxiliary<gms::inet_address>("baddr");
|
||||
return smp::submit_to(src_cpu_id % smp::count, [from, repair_meta_id] {
|
||||
auto rm = repair_meta::get_repair_meta(from, repair_meta_id);
|
||||
return rm->get_full_row_hashes_handler().then([] (std::unordered_set<repair_hash> hashes) {
|
||||
return rm->get_full_row_hashes_handler().then([] (repair_hash_set hashes) {
|
||||
_metrics.tx_hashes_nr += hashes.size();
|
||||
return hashes;
|
||||
});
|
||||
@@ -2093,11 +2140,11 @@ future<> repair_init_messaging_service_handler(repair_service& rs, distributed<d
|
||||
});
|
||||
});
|
||||
ms.register_repair_get_row_diff([] (const rpc::client_info& cinfo, uint32_t repair_meta_id,
|
||||
std::unordered_set<repair_hash> set_diff, bool needs_all_rows) {
|
||||
repair_hash_set set_diff, bool needs_all_rows) {
|
||||
auto src_cpu_id = cinfo.retrieve_auxiliary<uint32_t>("src_cpu_id");
|
||||
auto from = cinfo.retrieve_auxiliary<gms::inet_address>("baddr");
|
||||
_metrics.rx_hashes_nr += set_diff.size();
|
||||
auto fp = make_foreign(std::make_unique<std::unordered_set<repair_hash>>(std::move(set_diff)));
|
||||
auto fp = make_foreign(std::make_unique<repair_hash_set>(std::move(set_diff)));
|
||||
return smp::submit_to(src_cpu_id % smp::count, [from, repair_meta_id, fp = std::move(fp), needs_all_rows] () mutable {
|
||||
auto rm = repair_meta::get_repair_meta(from, repair_meta_id);
|
||||
if (fp.get_owner_shard() == engine().cpu_id()) {
|
||||
@@ -2165,6 +2212,25 @@ future<> repair_init_messaging_service_handler(repair_service& rs, distributed<d
|
||||
});
|
||||
}
|
||||
|
||||
future<> repair_uninit_messaging_service_handler() {
|
||||
return netw::get_messaging_service().invoke_on_all([] (auto& ms) {
|
||||
return when_all_succeed(
|
||||
ms.unregister_repair_get_row_diff_with_rpc_stream(),
|
||||
ms.unregister_repair_put_row_diff_with_rpc_stream(),
|
||||
ms.unregister_repair_get_full_row_hashes_with_rpc_stream(),
|
||||
ms.unregister_repair_get_full_row_hashes(),
|
||||
ms.unregister_repair_get_combined_row_hash(),
|
||||
ms.unregister_repair_get_sync_boundary(),
|
||||
ms.unregister_repair_get_row_diff(),
|
||||
ms.unregister_repair_put_row_diff(),
|
||||
ms.unregister_repair_row_level_start(),
|
||||
ms.unregister_repair_row_level_stop(),
|
||||
ms.unregister_repair_get_estimated_partitions(),
|
||||
ms.unregister_repair_set_estimated_partitions(),
|
||||
ms.unregister_repair_get_diff_algorithms()).discard_result();
|
||||
});
|
||||
}
|
||||
|
||||
class row_level_repair {
|
||||
repair_info& _ri;
|
||||
sstring _cf_name;
|
||||
@@ -2394,7 +2460,7 @@ private:
|
||||
// sequentially because the rows from repair follower 1 to
|
||||
// repair master might reduce the amount of missing data
|
||||
// between repair master and repair follower 2.
|
||||
std::unordered_set<repair_hash> set_diff = repair_meta::get_set_diff(master.peer_row_hash_sets(node_idx), master.working_row_hashes().get0());
|
||||
repair_hash_set set_diff = repair_meta::get_set_diff(master.peer_row_hash_sets(node_idx), master.working_row_hashes().get0());
|
||||
// Request missing sets from peer node
|
||||
rlogger.debug("Before get_row_diff to node {}, local={}, peer={}, set_diff={}",
|
||||
node, master.working_row_hashes().get0().size(), master.peer_row_hash_sets(node_idx).size(), set_diff.size());
|
||||
@@ -2417,9 +2483,9 @@ private:
|
||||
// So we can figure out which rows peer node are missing and send the missing rows to them
|
||||
check_in_shutdown();
|
||||
_ri.check_in_abort();
|
||||
std::unordered_set<repair_hash> local_row_hash_sets = master.working_row_hashes().get0();
|
||||
repair_hash_set local_row_hash_sets = master.working_row_hashes().get0();
|
||||
auto sz = _all_live_peer_nodes.size();
|
||||
std::vector<std::unordered_set<repair_hash>> set_diffs(sz);
|
||||
std::vector<repair_hash_set> set_diffs(sz);
|
||||
for (size_t idx : boost::irange(size_t(0), sz)) {
|
||||
set_diffs[idx] = repair_meta::get_set_diff(local_row_hash_sets, master.peer_row_hash_sets(idx));
|
||||
}
|
||||
|
||||
@@ -45,6 +45,7 @@ private:
|
||||
};
|
||||
|
||||
future<> repair_init_messaging_service_handler(repair_service& rs, distributed<db::system_distributed_keyspace>& sys_dist_ks, distributed<db::view::view_update_generator>& view_update_generator);
|
||||
future<> repair_uninit_messaging_service_handler();
|
||||
|
||||
class repair_info;
|
||||
|
||||
|
||||
@@ -42,6 +42,8 @@
|
||||
|
||||
constexpr int32_t schema::NAME_LENGTH;
|
||||
|
||||
extern logging::logger dblog;
|
||||
|
||||
sstring to_sstring(column_kind k) {
|
||||
switch (k) {
|
||||
case column_kind::partition_key: return "PARTITION_KEY";
|
||||
@@ -575,11 +577,15 @@ schema::get_column_definition(const bytes& name) const {
|
||||
|
||||
const column_definition&
|
||||
schema::column_at(column_kind kind, column_id id) const {
|
||||
return _raw._columns.at(column_offset(kind) + id);
|
||||
return column_at(static_cast<ordinal_column_id>(column_offset(kind) + id));
|
||||
}
|
||||
|
||||
const column_definition&
|
||||
schema::column_at(ordinal_column_id ordinal_id) const {
|
||||
if (size_t(ordinal_id) >= _raw._columns.size()) {
|
||||
on_internal_error(dblog, format("{}.{}@{}: column id {:d} >= {:d}",
|
||||
ks_name(), cf_name(), version(), size_t(ordinal_id), _raw._columns.size()));
|
||||
}
|
||||
return _raw._columns.at(static_cast<column_count_type>(ordinal_id));
|
||||
}
|
||||
|
||||
|
||||
@@ -79,7 +79,8 @@ executables = ['build/{}/scylla'.format(args.mode),
|
||||
'/usr/sbin/ethtool',
|
||||
'/usr/bin/netstat',
|
||||
'/usr/bin/hwloc-distrib',
|
||||
'/usr/bin/hwloc-calc']
|
||||
'/usr/bin/hwloc-calc',
|
||||
'/usr/bin/lsblk']
|
||||
|
||||
output = args.dest
|
||||
|
||||
|
||||
@@ -596,7 +596,7 @@ def current_shard():
|
||||
|
||||
|
||||
def find_db(shard=None):
|
||||
if not shard:
|
||||
if shard is None:
|
||||
shard = current_shard()
|
||||
return gdb.parse_and_eval('::debug::db')['_instances']['_M_impl']['_M_start'][shard]['service']['_p']
|
||||
|
||||
|
||||
@@ -63,6 +63,17 @@ MemoryHigh=1200M
|
||||
MemoryMax=1400M
|
||||
MemoryLimit=1400M
|
||||
EOS
|
||||
|
||||
# On CentOS7, systemd does not support percentage-based parameter.
|
||||
# To apply memory parameter on CentOS7, we need to override the parameter
|
||||
# in bytes, instead of percentage.
|
||||
elif [ "$RHEL" -a "$VERSION_ID" = "7" ]; then
|
||||
MEMORY_LIMIT=$((MEMTOTAL_BYTES / 100 * 5))
|
||||
mkdir -p /etc/systemd/system/scylla-helper.slice.d/
|
||||
cat << EOS > /etc/systemd/system/scylla-helper.slice.d/memory.conf
|
||||
[Slice]
|
||||
MemoryLimit=$MEMORY_LIMIT
|
||||
EOS
|
||||
fi
|
||||
|
||||
systemctl --system daemon-reload >/dev/null || true
|
||||
|
||||
2
seastar
2
seastar
Submodule seastar updated: 0dc0fec831...748428930a
@@ -25,6 +25,7 @@
|
||||
#include <seastar/util/bool_class.hh>
|
||||
#include <boost/range/algorithm/for_each.hpp>
|
||||
#include "utils/small_vector.hh"
|
||||
#include <absl/container/btree_set.h>
|
||||
|
||||
namespace ser {
|
||||
|
||||
@@ -81,6 +82,17 @@ static inline void serialize_array(Output& out, const Container& v) {
|
||||
template<typename Container>
|
||||
struct container_traits;
|
||||
|
||||
template<typename T>
|
||||
struct container_traits<absl::btree_set<T>> {
|
||||
struct back_emplacer {
|
||||
absl::btree_set<T>& c;
|
||||
back_emplacer(absl::btree_set<T>& c_) : c(c_) {}
|
||||
void operator()(T&& v) {
|
||||
c.emplace(std::move(v));
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct container_traits<std::unordered_set<T>> {
|
||||
struct back_emplacer {
|
||||
@@ -253,6 +265,27 @@ struct serializer<std::list<T>> {
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct serializer<absl::btree_set<T>> {
|
||||
template<typename Input>
|
||||
static absl::btree_set<T> read(Input& in) {
|
||||
auto sz = deserialize(in, boost::type<uint32_t>());
|
||||
absl::btree_set<T> v;
|
||||
deserialize_array_helper<false, T>::doit(in, v, sz);
|
||||
return v;
|
||||
}
|
||||
template<typename Output>
|
||||
static void write(Output& out, const absl::btree_set<T>& v) {
|
||||
safe_serialize_as_uint32(out, v.size());
|
||||
serialize_array_helper<false, T>::doit(out, v);
|
||||
}
|
||||
template<typename Input>
|
||||
static void skip(Input& in) {
|
||||
auto sz = deserialize(in, boost::type<uint32_t>());
|
||||
skip_array<T>(in, sz);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct serializer<std::unordered_set<T>> {
|
||||
template<typename Input>
|
||||
|
||||
@@ -92,7 +92,7 @@ void migration_manager::init_messaging_service()
|
||||
//FIXME: future discarded.
|
||||
(void)with_gate(_background_tasks, [this] {
|
||||
mlogger.debug("features changed, recalculating schema version");
|
||||
return update_schema_version_and_announce(get_storage_proxy(), _feat.cluster_schema_features());
|
||||
return db::schema_tables::recalculate_schema_version(get_storage_proxy(), _feat);
|
||||
});
|
||||
};
|
||||
|
||||
@@ -277,9 +277,9 @@ future<> migration_manager::maybe_schedule_schema_pull(const utils::UUID& their_
|
||||
}).finally([me = shared_from_this()] {});
|
||||
}
|
||||
|
||||
future<> migration_manager::submit_migration_task(const gms::inet_address& endpoint)
|
||||
future<> migration_manager::submit_migration_task(const gms::inet_address& endpoint, bool can_ignore_down_node)
|
||||
{
|
||||
return service::migration_task::run_may_throw(endpoint);
|
||||
return service::migration_task::run_may_throw(endpoint, can_ignore_down_node);
|
||||
}
|
||||
|
||||
future<> migration_manager::do_merge_schema_from(netw::messaging_service::msg_addr id)
|
||||
@@ -1132,7 +1132,8 @@ future<> migration_manager::sync_schema(const database& db, const std::vector<gm
|
||||
}).then([this, &schema_map] {
|
||||
return parallel_for_each(schema_map, [this] (auto& x) {
|
||||
mlogger.debug("Pulling schema {} from {}", x.first, x.second.front());
|
||||
return submit_migration_task(x.second.front());
|
||||
bool can_ignore_down_node = false;
|
||||
return submit_migration_task(x.second.front(), can_ignore_down_node);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -82,7 +82,7 @@ public:
|
||||
|
||||
future<> maybe_schedule_schema_pull(const utils::UUID& their_version, const gms::inet_address& endpoint);
|
||||
|
||||
future<> submit_migration_task(const gms::inet_address& endpoint);
|
||||
future<> submit_migration_task(const gms::inet_address& endpoint, bool can_ignore_down_node = true);
|
||||
|
||||
// Makes sure that this node knows about all schema changes known by "nodes" that were made prior to this call.
|
||||
future<> sync_schema(const database& db, const std::vector<gms::inet_address>& nodes);
|
||||
|
||||
@@ -51,11 +51,12 @@ namespace service {
|
||||
|
||||
static logging::logger mlogger("migration_task");
|
||||
|
||||
future<> migration_task::run_may_throw(const gms::inet_address& endpoint)
|
||||
future<> migration_task::run_may_throw(const gms::inet_address& endpoint, bool can_ignore_down_node)
|
||||
{
|
||||
if (!gms::get_local_gossiper().is_alive(endpoint)) {
|
||||
mlogger.warn("Can't send migration request: node {} is down.", endpoint);
|
||||
return make_ready_future<>();
|
||||
auto msg = format("Can't send migration request: node {} is down.", endpoint);
|
||||
mlogger.warn("{}", msg);
|
||||
return can_ignore_down_node ? make_ready_future<>() : make_exception_future<>(std::runtime_error(msg));
|
||||
}
|
||||
netw::messaging_service::msg_addr id{endpoint, 0};
|
||||
return service::get_local_migration_manager().merge_schema_from(id).handle_exception([](std::exception_ptr e) {
|
||||
|
||||
@@ -47,7 +47,7 @@ namespace service {
|
||||
|
||||
class migration_task {
|
||||
public:
|
||||
static future<> run_may_throw(const gms::inet_address& endpoint);
|
||||
static future<> run_may_throw(const gms::inet_address& endpoint, bool can_ignore_down_node);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -334,6 +334,7 @@ public:
|
||||
if (_handler) {
|
||||
_handler->prune(_proposal->ballot);
|
||||
}
|
||||
_proposal.release();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1661,16 +1662,22 @@ void storage_proxy_stats::stats::register_stats() {
|
||||
});
|
||||
}
|
||||
|
||||
inline uint64_t& storage_proxy_stats::split_stats::get_ep_stat(gms::inet_address ep) {
|
||||
inline uint64_t& storage_proxy_stats::split_stats::get_ep_stat(gms::inet_address ep) noexcept {
|
||||
if (fbu::is_me(ep)) {
|
||||
return _local.val;
|
||||
}
|
||||
|
||||
sstring dc = get_dc(ep);
|
||||
if (_auto_register_metrics) {
|
||||
register_metrics_for(ep);
|
||||
try {
|
||||
sstring dc = get_dc(ep);
|
||||
if (_auto_register_metrics) {
|
||||
register_metrics_for(ep);
|
||||
}
|
||||
return _dc_stats[dc].val;
|
||||
} catch (...) {
|
||||
static thread_local uint64_t dummy_stat;
|
||||
slogger.error("Failed to obtain stats ({}), fall-back to dummy", std::current_exception());
|
||||
return dummy_stat;
|
||||
}
|
||||
return _dc_stats[dc].val;
|
||||
}
|
||||
|
||||
void storage_proxy_stats::split_stats::register_metrics_local() {
|
||||
@@ -5021,14 +5028,15 @@ void storage_proxy::init_messaging_service() {
|
||||
}
|
||||
|
||||
pruning++;
|
||||
auto d = defer([] { pruning--; });
|
||||
return get_schema_for_read(schema_id, src_addr).then([this, key = std::move(key), ballot,
|
||||
timeout, tr_state = std::move(tr_state), src_ip] (schema_ptr schema) mutable {
|
||||
timeout, tr_state = std::move(tr_state), src_ip, d = std::move(d)] (schema_ptr schema) mutable {
|
||||
dht::token token = dht::get_token(*schema, key);
|
||||
unsigned shard = dht::shard_of(*schema, token);
|
||||
bool local = shard == engine().cpu_id();
|
||||
get_stats().replica_cross_shard_ops += !local;
|
||||
return smp::submit_to(shard, _write_smp_service_group, [gs = global_schema_ptr(schema), gt = tracing::global_trace_state_ptr(std::move(tr_state)),
|
||||
local, key = std::move(key), ballot, timeout, src_ip, d = defer([] { pruning--; })] () {
|
||||
local, key = std::move(key), ballot, timeout, src_ip, d = std::move(d)] () {
|
||||
tracing::trace_state_ptr tr_state = gt;
|
||||
return paxos::paxos_state::prune(gs, key, ballot, *timeout, tr_state).then([src_ip, tr_state] () {
|
||||
tracing::trace(tr_state, "paxos_prune: handling is done, sending a response to /{}", src_ip);
|
||||
@@ -5042,18 +5050,22 @@ void storage_proxy::init_messaging_service() {
|
||||
future<> storage_proxy::uninit_messaging_service() {
|
||||
auto& ms = netw::get_local_messaging_service();
|
||||
return when_all_succeed(
|
||||
ms.unregister_counter_mutation(),
|
||||
ms.unregister_mutation(),
|
||||
ms.unregister_hint_mutation(),
|
||||
ms.unregister_mutation_done(),
|
||||
ms.unregister_mutation_failed(),
|
||||
ms.unregister_read_data(),
|
||||
ms.unregister_read_mutation_data(),
|
||||
ms.unregister_read_digest(),
|
||||
ms.unregister_truncate(),
|
||||
ms.unregister_get_schema_version(),
|
||||
ms.unregister_paxos_prepare(),
|
||||
ms.unregister_paxos_accept(),
|
||||
ms.unregister_paxos_learn(),
|
||||
ms.unregister_paxos_prune()
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
future<rpc::tuple<foreign_ptr<lw_shared_ptr<reconcilable_result>>, cache_temperature>>
|
||||
@@ -5146,8 +5158,7 @@ future<> storage_proxy::drain_on_shutdown() {
|
||||
|
||||
future<>
|
||||
storage_proxy::stop() {
|
||||
// FIXME: hints manager should be stopped here but it seems like this function is never called
|
||||
return uninit_messaging_service();
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -298,7 +298,6 @@ private:
|
||||
cdc::cdc_service* _cdc = nullptr;
|
||||
cdc_stats _cdc_stats;
|
||||
private:
|
||||
future<> uninit_messaging_service();
|
||||
future<coordinator_query_result> query_singular(lw_shared_ptr<query::read_command> cmd,
|
||||
dht::partition_range_vector&& partition_ranges,
|
||||
db::consistency_level cl,
|
||||
@@ -452,6 +451,7 @@ public:
|
||||
return next;
|
||||
}
|
||||
void init_messaging_service();
|
||||
future<> uninit_messaging_service();
|
||||
|
||||
// Applies mutation on this node.
|
||||
// Resolves with timed_out_error when timeout is reached.
|
||||
|
||||
@@ -74,7 +74,7 @@ public:
|
||||
*
|
||||
* @return a reference to the requested counter
|
||||
*/
|
||||
uint64_t& get_ep_stat(gms::inet_address ep);
|
||||
uint64_t& get_ep_stat(gms::inet_address ep) noexcept;
|
||||
};
|
||||
|
||||
struct write_stats {
|
||||
|
||||
@@ -2175,7 +2175,8 @@ storage_service::get_snapshot_details() {
|
||||
}
|
||||
|
||||
future<int64_t> storage_service::true_snapshots_size() {
|
||||
return _db.map_reduce(adder<int64_t>(), [] (database& db) {
|
||||
return run_snapshot_list_operation([] {
|
||||
return get_local_storage_service()._db.map_reduce(adder<int64_t>(), [] (database& db) {
|
||||
return do_with(int64_t(0), [&db] (auto& local_total) {
|
||||
return parallel_for_each(db.get_column_families(), [&local_total] (auto& cf_pair) {
|
||||
return cf_pair.second->get_snapshot_details().then([&local_total] (auto map) {
|
||||
@@ -2189,6 +2190,7 @@ future<int64_t> storage_service::true_snapshots_size() {
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
static std::atomic<bool> isolated = { false };
|
||||
|
||||
@@ -66,7 +66,6 @@
|
||||
#include "db_clock.hh"
|
||||
#include "mutation_compactor.hh"
|
||||
#include "leveled_manifest.hh"
|
||||
#include "utils/observable.hh"
|
||||
#include "dht/token.hh"
|
||||
|
||||
namespace sstables {
|
||||
@@ -284,68 +283,73 @@ public:
|
||||
// When compaction finishes, all the temporary sstables generated here will be deleted and removed
|
||||
// from table's sstable set.
|
||||
class garbage_collected_sstable_writer {
|
||||
compaction* _c = nullptr;
|
||||
std::vector<shared_sstable> _temp_sealed_gc_sstables;
|
||||
std::deque<compaction_write_monitor> _active_write_monitors = {};
|
||||
shared_sstable _sst;
|
||||
std::optional<sstable_writer> _writer;
|
||||
std::optional<utils::observer<>> _on_new_sstable_sealed_observer;
|
||||
utils::UUID _run_identifier = utils::make_random_uuid();
|
||||
bool _consuming_new_partition {};
|
||||
private:
|
||||
void setup_on_new_sstable_sealed_handler();
|
||||
void maybe_create_new_sstable_writer();
|
||||
void finish_sstable_writer();
|
||||
void on_end_of_stream();
|
||||
public:
|
||||
garbage_collected_sstable_writer() = default;
|
||||
explicit garbage_collected_sstable_writer(compaction& c) : _c(&c) {
|
||||
setup_on_new_sstable_sealed_handler();
|
||||
}
|
||||
// Data for GC writer is stored separately to allow compaction class to communicate directly
|
||||
// with garbage_collected_sstable_writer which is moved into mutation_compaction, making it
|
||||
// unreachable after the compaction process has started.
|
||||
class data {
|
||||
compaction* _c = nullptr;
|
||||
// Garbage collected sstables that are sealed but were not added to SSTable set yet.
|
||||
std::vector<shared_sstable> _unused_garbage_collected_sstables;
|
||||
// Garbage collected sstables that were added to SSTable set and should be eventually removed from it.
|
||||
std::vector<shared_sstable> _used_garbage_collected_sstables;
|
||||
std::deque<compaction_write_monitor> _active_write_monitors = {};
|
||||
shared_sstable _sst;
|
||||
std::optional<sstable_writer> _writer;
|
||||
utils::UUID _run_identifier = utils::make_random_uuid();
|
||||
public:
|
||||
explicit data(compaction& c) : _c(&c) {
|
||||
}
|
||||
|
||||
data& operator=(const data&) = delete;
|
||||
data(const data&) = delete;
|
||||
|
||||
void maybe_create_new_sstable_writer();
|
||||
void finish_sstable_writer();
|
||||
|
||||
// Retrieves all unused garbage collected sstables that will be subsequently added
|
||||
// to the SSTable set, and mark them as used.
|
||||
std::vector<shared_sstable> consume_unused_garbage_collected_sstables() {
|
||||
auto unused = std::exchange(_unused_garbage_collected_sstables, {});
|
||||
_used_garbage_collected_sstables.insert(_used_garbage_collected_sstables.end(), unused.begin(), unused.end());
|
||||
return unused;
|
||||
}
|
||||
|
||||
const std::vector<shared_sstable>& used_garbage_collected_sstables() const {
|
||||
return _used_garbage_collected_sstables;
|
||||
}
|
||||
|
||||
friend class garbage_collected_sstable_writer;
|
||||
};
|
||||
private:
|
||||
garbage_collected_sstable_writer::data* _data = nullptr;
|
||||
public:
|
||||
explicit garbage_collected_sstable_writer() = default;
|
||||
explicit garbage_collected_sstable_writer(garbage_collected_sstable_writer::data& data) : _data(&data) {}
|
||||
|
||||
garbage_collected_sstable_writer& operator=(const garbage_collected_sstable_writer&) = delete;
|
||||
garbage_collected_sstable_writer(const garbage_collected_sstable_writer&) = delete;
|
||||
|
||||
garbage_collected_sstable_writer(garbage_collected_sstable_writer&& other)
|
||||
: _c(other._c)
|
||||
, _temp_sealed_gc_sstables(std::move(other._temp_sealed_gc_sstables))
|
||||
, _active_write_monitors(std::move(other._active_write_monitors))
|
||||
, _sst(std::move(other._sst))
|
||||
, _writer(std::move(other._writer))
|
||||
, _run_identifier(other._run_identifier)
|
||||
, _consuming_new_partition(other._consuming_new_partition) {
|
||||
other._on_new_sstable_sealed_observer->disconnect();
|
||||
setup_on_new_sstable_sealed_handler();
|
||||
}
|
||||
|
||||
garbage_collected_sstable_writer& operator=(garbage_collected_sstable_writer&& other) {
|
||||
if (this != &other) {
|
||||
this->~garbage_collected_sstable_writer();
|
||||
new (this) garbage_collected_sstable_writer(std::move(other));
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
garbage_collected_sstable_writer(garbage_collected_sstable_writer&& other) = default;
|
||||
garbage_collected_sstable_writer& operator=(garbage_collected_sstable_writer&& other) = default;
|
||||
|
||||
void consume_new_partition(const dht::decorated_key& dk) {
|
||||
maybe_create_new_sstable_writer();
|
||||
_writer->consume_new_partition(dk);
|
||||
_consuming_new_partition = true;
|
||||
_data->maybe_create_new_sstable_writer();
|
||||
_data->_writer->consume_new_partition(dk);
|
||||
}
|
||||
|
||||
void consume(tombstone t) { _writer->consume(t); }
|
||||
stop_iteration consume(static_row&& sr, tombstone, bool) { return _writer->consume(std::move(sr)); }
|
||||
stop_iteration consume(clustering_row&& cr, row_tombstone, bool) { return _writer->consume(std::move(cr)); }
|
||||
stop_iteration consume(range_tombstone&& rt) { return _writer->consume(std::move(rt)); }
|
||||
void consume(tombstone t) { _data->_writer->consume(t); }
|
||||
stop_iteration consume(static_row&& sr, tombstone, bool) { return _data->_writer->consume(std::move(sr)); }
|
||||
stop_iteration consume(clustering_row&& cr, row_tombstone, bool) { return _data->_writer->consume(std::move(cr)); }
|
||||
stop_iteration consume(range_tombstone&& rt) { return _data->_writer->consume(std::move(rt)); }
|
||||
|
||||
stop_iteration consume_end_of_partition() {
|
||||
_writer->consume_end_of_partition();
|
||||
_consuming_new_partition = false;
|
||||
_data->_writer->consume_end_of_partition();
|
||||
return stop_iteration::no;
|
||||
}
|
||||
|
||||
void consume_end_of_stream() {
|
||||
finish_sstable_writer();
|
||||
on_end_of_stream();
|
||||
_data->finish_sstable_writer();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -380,6 +384,7 @@ public:
|
||||
class compaction {
|
||||
protected:
|
||||
column_family& _cf;
|
||||
creator_fn _sstable_creator;
|
||||
schema_ptr _schema;
|
||||
std::vector<shared_sstable> _sstables;
|
||||
// Unused sstables are tracked because if compaction is interrupted we can only delete them.
|
||||
@@ -393,15 +398,17 @@ protected:
|
||||
std::vector<unsigned long> _ancestors;
|
||||
db::replay_position _rp;
|
||||
encoding_stats_collector _stats_collector;
|
||||
utils::observable<> _on_new_sstable_sealed;
|
||||
bool _contains_multi_fragment_runs = false;
|
||||
garbage_collected_sstable_writer::data _gc_sstable_writer_data;
|
||||
protected:
|
||||
compaction(column_family& cf, std::vector<shared_sstable> sstables, uint64_t max_sstable_size, uint32_t sstable_level)
|
||||
compaction(column_family& cf, creator_fn creator, std::vector<shared_sstable> sstables, uint64_t max_sstable_size, uint32_t sstable_level)
|
||||
: _cf(cf)
|
||||
, _sstable_creator(std::move(creator))
|
||||
, _schema(cf.schema())
|
||||
, _sstables(std::move(sstables))
|
||||
, _max_sstable_size(max_sstable_size)
|
||||
, _sstable_level(sstable_level)
|
||||
, _gc_sstable_writer_data(*this)
|
||||
{
|
||||
_info->cf = &cf;
|
||||
for (auto& sst : _sstables) {
|
||||
@@ -434,10 +441,6 @@ protected:
|
||||
writer = std::nullopt;
|
||||
sst->open_data().get0();
|
||||
_info->end_size += sst->bytes_on_disk();
|
||||
// Notify GC'ed-data sstable writer's handler that an output sstable has just been sealed.
|
||||
// The handler is responsible for making sure that deleting an input sstable will not
|
||||
// result in resurrection on failure.
|
||||
_on_new_sstable_sealed();
|
||||
}
|
||||
|
||||
api::timestamp_type maximum_timestamp() const {
|
||||
@@ -447,10 +450,6 @@ protected:
|
||||
return (*m)->get_stats_metadata().max_timestamp;
|
||||
}
|
||||
|
||||
utils::observer<> add_on_new_sstable_sealed_handler(std::function<void (void)> handler) noexcept {
|
||||
return _on_new_sstable_sealed.observe(std::move(handler));
|
||||
}
|
||||
|
||||
encoding_stats get_encoding_stats() const {
|
||||
return _stats_collector.get();
|
||||
}
|
||||
@@ -562,10 +561,9 @@ private:
|
||||
};
|
||||
}
|
||||
|
||||
virtual shared_sstable create_new_sstable() const = 0;
|
||||
|
||||
// select a sstable writer based on decorated key.
|
||||
virtual sstable_writer* select_sstable_writer(const dht::decorated_key& dk) = 0;
|
||||
|
||||
// stop current writer
|
||||
virtual void stop_sstable_writer() = 0;
|
||||
// finish all writers.
|
||||
@@ -588,20 +586,9 @@ private:
|
||||
sst->mark_for_deletion();
|
||||
}
|
||||
}
|
||||
|
||||
void setup_garbage_collected_sstable(shared_sstable sst) {
|
||||
// Add new sstable to table's set because expired tombstone should be available if compaction is abruptly stopped.
|
||||
_cf.add_sstable_and_update_cache(std::move(sst)).get();
|
||||
}
|
||||
|
||||
void eventually_delete_garbage_collected_sstable(shared_sstable sst) {
|
||||
// Add sstable to compaction's input list for it to be eventually removed from table's set.
|
||||
sst->mark_for_deletion();
|
||||
_sstables.push_back(std::move(sst));
|
||||
}
|
||||
public:
|
||||
garbage_collected_sstable_writer make_garbage_collected_sstable_writer() {
|
||||
return garbage_collected_sstable_writer(*this);
|
||||
return garbage_collected_sstable_writer(_gc_sstable_writer_data);
|
||||
}
|
||||
|
||||
bool contains_multi_fragment_runs() const {
|
||||
@@ -616,6 +603,7 @@ public:
|
||||
|
||||
friend class compacting_sstable_writer;
|
||||
friend class garbage_collected_sstable_writer;
|
||||
friend class garbage_collected_sstable_writer::data;
|
||||
};
|
||||
|
||||
void compacting_sstable_writer::consume_new_partition(const dht::decorated_key& dk) {
|
||||
@@ -642,22 +630,9 @@ void compacting_sstable_writer::consume_end_of_stream() {
|
||||
_c.finish_sstable_writer();
|
||||
}
|
||||
|
||||
void garbage_collected_sstable_writer::setup_on_new_sstable_sealed_handler() {
|
||||
_on_new_sstable_sealed_observer = _c->add_on_new_sstable_sealed_handler([this] {
|
||||
// NOTE: This handler is called, BEFORE an input sstable is possibly deleted
|
||||
// *AND* AFTER a new output sstable is sealed, to flush a garbage collected
|
||||
// sstable being currently written.
|
||||
// That way, data is resurrection is prevented by making sure that the
|
||||
// GC'able data is still reachable in a temporary sstable.
|
||||
assert(!_consuming_new_partition);
|
||||
// Wait for current gc'ed-only-sstable to be flushed and added to table's set.
|
||||
this->finish_sstable_writer();
|
||||
});
|
||||
}
|
||||
|
||||
void garbage_collected_sstable_writer::maybe_create_new_sstable_writer() {
|
||||
void garbage_collected_sstable_writer::data::maybe_create_new_sstable_writer() {
|
||||
if (!_writer) {
|
||||
_sst = _c->create_new_sstable();
|
||||
_sst = _c->_sstable_creator(this_shard_id());
|
||||
|
||||
auto&& priority = service::get_local_compaction_priority();
|
||||
_active_write_monitors.emplace_back(_sst, _c->_cf, _c->maximum_timestamp(), _c->_sstable_level);
|
||||
@@ -668,25 +643,16 @@ void garbage_collected_sstable_writer::maybe_create_new_sstable_writer() {
|
||||
}
|
||||
}
|
||||
|
||||
void garbage_collected_sstable_writer::finish_sstable_writer() {
|
||||
void garbage_collected_sstable_writer::data::finish_sstable_writer() {
|
||||
if (_writer) {
|
||||
_writer->consume_end_of_stream();
|
||||
_writer = std::nullopt;
|
||||
_sst->open_data().get0();
|
||||
_c->setup_garbage_collected_sstable(_sst);
|
||||
_temp_sealed_gc_sstables.push_back(std::move(_sst));
|
||||
}
|
||||
}
|
||||
|
||||
void garbage_collected_sstable_writer::on_end_of_stream() {
|
||||
for (auto&& sst : _temp_sealed_gc_sstables) {
|
||||
clogger.debug("Asking for deletion of temporary tombstone-only sstable {}", sst->get_filename());
|
||||
_c->eventually_delete_garbage_collected_sstable(std::move(sst));
|
||||
_unused_garbage_collected_sstables.push_back(std::move(_sst));
|
||||
}
|
||||
}
|
||||
|
||||
class regular_compaction : public compaction {
|
||||
std::function<shared_sstable()> _creator;
|
||||
replacer_fn _replacer;
|
||||
std::unordered_set<shared_sstable> _compacting_for_max_purgeable_func;
|
||||
// store a clone of sstable set for column family, which needs to be alive for incremental selector.
|
||||
@@ -701,10 +667,9 @@ class regular_compaction : public compaction {
|
||||
std::deque<compaction_write_monitor> _active_write_monitors = {};
|
||||
utils::UUID _run_identifier;
|
||||
public:
|
||||
regular_compaction(column_family& cf, compaction_descriptor descriptor, std::function<shared_sstable()> creator, replacer_fn replacer)
|
||||
: compaction(cf, std::move(descriptor.sstables), descriptor.max_sstable_bytes, descriptor.level)
|
||||
, _creator(std::move(creator))
|
||||
, _replacer(std::move(replacer))
|
||||
regular_compaction(column_family& cf, compaction_descriptor descriptor)
|
||||
: compaction(cf, std::move(descriptor.creator), std::move(descriptor.sstables), descriptor.max_sstable_bytes, descriptor.level)
|
||||
, _replacer(std::move(descriptor.replacer))
|
||||
, _compacting_for_max_purgeable_func(std::unordered_set<shared_sstable>(_sstables.begin(), _sstables.end()))
|
||||
, _set(cf.get_sstable_set())
|
||||
, _selector(_set.make_incremental_selector())
|
||||
@@ -755,13 +720,9 @@ public:
|
||||
};
|
||||
}
|
||||
|
||||
virtual shared_sstable create_new_sstable() const override {
|
||||
return _creator();
|
||||
}
|
||||
|
||||
virtual sstable_writer* select_sstable_writer(const dht::decorated_key& dk) override {
|
||||
if (!_writer) {
|
||||
_sst = _creator();
|
||||
_sst = _sstable_creator(0);
|
||||
setup_new_sstable(_sst);
|
||||
|
||||
_active_write_monitors.emplace_back(_sst, _cf, maximum_timestamp(), _sstable_level);
|
||||
@@ -834,6 +795,15 @@ private:
|
||||
// Fully expired sstable is not actually compacted, therefore it's not present in the compacting set.
|
||||
_compacting->erase(sst);
|
||||
});
|
||||
// Make sure SSTable created by garbage collected writer is made available
|
||||
// before exhausted SSTable is released, so as to prevent data resurrection.
|
||||
_gc_sstable_writer_data.finish_sstable_writer();
|
||||
// Added Garbage collected SSTables to list of unused SSTables that will be added
|
||||
// to SSTable set. GC SSTables should be added before compaction completes because
|
||||
// a failure could result in data resurrection if data is not made available.
|
||||
auto unused_gc_sstables = _gc_sstable_writer_data.consume_unused_garbage_collected_sstables();
|
||||
_new_unused_sstables.insert(_new_unused_sstables.end(), unused_gc_sstables.begin(), unused_gc_sstables.end());
|
||||
|
||||
auto exhausted_ssts = std::vector<shared_sstable>(exhausted, _sstables.end());
|
||||
_replacer(get_compaction_completion_desc(exhausted_ssts, std::move(_new_unused_sstables)));
|
||||
_sstables.erase(exhausted, _sstables.end());
|
||||
@@ -842,11 +812,16 @@ private:
|
||||
}
|
||||
|
||||
void replace_remaining_exhausted_sstables() {
|
||||
if (!_sstables.empty()) {
|
||||
std::vector<shared_sstable> sstables_compacted;
|
||||
std::move(_sstables.begin(), _sstables.end(), std::back_inserter(sstables_compacted));
|
||||
_replacer(get_compaction_completion_desc(std::move(sstables_compacted), std::move(_new_unused_sstables)));
|
||||
}
|
||||
if (!_sstables.empty() || !_gc_sstable_writer_data.used_garbage_collected_sstables().empty()) {
|
||||
std::vector<shared_sstable> old_sstables;
|
||||
std::move(_sstables.begin(), _sstables.end(), std::back_inserter(old_sstables));
|
||||
|
||||
// Remove Garbage Collected SSTables from the SSTable set if any was previously added.
|
||||
auto& used_garbage_collected_sstables = _gc_sstable_writer_data.used_garbage_collected_sstables();
|
||||
old_sstables.insert(old_sstables.end(), used_garbage_collected_sstables.begin(), used_garbage_collected_sstables.end());
|
||||
|
||||
_replacer(get_compaction_completion_desc(std::move(old_sstables), std::move(_new_unused_sstables)));
|
||||
}
|
||||
}
|
||||
|
||||
void do_pending_replacements() {
|
||||
@@ -909,8 +884,8 @@ protected:
|
||||
return compaction_completion_desc{std::move(input_sstables), std::move(output_sstables), std::move(ranges_for_for_invalidation)};
|
||||
}
|
||||
public:
|
||||
cleanup_compaction(column_family& cf, compaction_descriptor descriptor, std::function<shared_sstable()> creator, replacer_fn replacer)
|
||||
: regular_compaction(cf, std::move(descriptor), std::move(creator), std::move(replacer))
|
||||
cleanup_compaction(column_family& cf, compaction_descriptor descriptor)
|
||||
: regular_compaction(cf, std::move(descriptor))
|
||||
, _owned_ranges(service::get_local_storage_service().get_local_ranges(_schema->ks_name()))
|
||||
{
|
||||
_info->type = compaction_type::Cleanup;
|
||||
@@ -1114,9 +1089,8 @@ private:
|
||||
compaction_options::scrub _options;
|
||||
|
||||
public:
|
||||
scrub_compaction(column_family& cf, compaction_descriptor descriptor, compaction_options::scrub options, std::function<shared_sstable()> creator,
|
||||
replacer_fn replacer)
|
||||
: regular_compaction(cf, std::move(descriptor), std::move(creator), std::move(replacer))
|
||||
scrub_compaction(column_family& cf, compaction_descriptor descriptor, compaction_options::scrub options)
|
||||
: regular_compaction(cf, std::move(descriptor))
|
||||
, _options(options) {
|
||||
_info->type = compaction_type::Scrub;
|
||||
}
|
||||
@@ -1143,7 +1117,6 @@ flat_mutation_reader make_scrubbing_reader(flat_mutation_reader rd, bool skip_co
|
||||
class resharding_compaction final : public compaction {
|
||||
std::vector<std::pair<shared_sstable, std::optional<sstable_writer>>> _output_sstables;
|
||||
shard_id _shard; // shard of current sstable writer
|
||||
std::function<shared_sstable(shard_id)> _sstable_creator;
|
||||
compaction_backlog_tracker _resharding_backlog_tracker;
|
||||
|
||||
// Partition count estimation for a shard S:
|
||||
@@ -1168,11 +1141,9 @@ private:
|
||||
return ceil(double(_estimation_per_shard[s].estimated_partitions) / estimated_sstables);
|
||||
}
|
||||
public:
|
||||
resharding_compaction(std::vector<shared_sstable> sstables, column_family& cf, std::function<shared_sstable(shard_id)> creator,
|
||||
uint64_t max_sstable_size, uint32_t sstable_level)
|
||||
: compaction(cf, std::move(sstables), max_sstable_size, sstable_level)
|
||||
resharding_compaction(column_family& cf, sstables::compaction_descriptor descriptor)
|
||||
: compaction(cf, std::move(descriptor.creator), std::move(descriptor.sstables), descriptor.max_sstable_bytes, descriptor.level)
|
||||
, _output_sstables(smp::count)
|
||||
, _sstable_creator(std::move(creator))
|
||||
, _resharding_backlog_tracker(std::make_unique<resharding_backlog_tracker>())
|
||||
, _estimation_per_shard(smp::count)
|
||||
, _run_identifiers(smp::count)
|
||||
@@ -1224,10 +1195,6 @@ public:
|
||||
|
||||
void backlog_tracker_adjust_charges() override { }
|
||||
|
||||
shared_sstable create_new_sstable() const override {
|
||||
return _sstable_creator(_shard);
|
||||
}
|
||||
|
||||
sstable_writer* select_sstable_writer(const dht::decorated_key& dk) override {
|
||||
_shard = dht::shard_of(*_schema, dk.token());
|
||||
auto& sst = _output_sstables[_shard].first;
|
||||
@@ -1298,38 +1265,35 @@ compaction_type compaction_options::type() const {
|
||||
return index_to_type[_options.index()];
|
||||
}
|
||||
|
||||
static std::unique_ptr<compaction> make_compaction(column_family& cf, sstables::compaction_descriptor descriptor,
|
||||
std::function<shared_sstable()> creator, replacer_fn replacer) {
|
||||
static std::unique_ptr<compaction> make_compaction(column_family& cf, sstables::compaction_descriptor descriptor) {
|
||||
struct {
|
||||
column_family& cf;
|
||||
sstables::compaction_descriptor&& descriptor;
|
||||
std::function<shared_sstable()>&& creator;
|
||||
replacer_fn&& replacer;
|
||||
|
||||
std::unique_ptr<compaction> operator()(compaction_options::regular) {
|
||||
return std::make_unique<regular_compaction>(cf, std::move(descriptor), std::move(creator), std::move(replacer));
|
||||
return std::make_unique<regular_compaction>(cf, std::move(descriptor));
|
||||
}
|
||||
std::unique_ptr<compaction> operator()(compaction_options::cleanup) {
|
||||
return std::make_unique<cleanup_compaction>(cf, std::move(descriptor), std::move(creator), std::move(replacer));
|
||||
return std::make_unique<cleanup_compaction>(cf, std::move(descriptor));
|
||||
}
|
||||
std::unique_ptr<compaction> operator()(compaction_options::upgrade) {
|
||||
return std::make_unique<cleanup_compaction>(cf, std::move(descriptor), std::move(creator), std::move(replacer));
|
||||
return std::make_unique<cleanup_compaction>(cf, std::move(descriptor));
|
||||
}
|
||||
std::unique_ptr<compaction> operator()(compaction_options::scrub scrub_options) {
|
||||
return std::make_unique<scrub_compaction>(cf, std::move(descriptor), scrub_options, std::move(creator), std::move(replacer));
|
||||
return std::make_unique<scrub_compaction>(cf, std::move(descriptor), scrub_options);
|
||||
}
|
||||
} visitor_factory{cf, std::move(descriptor), std::move(creator), std::move(replacer)};
|
||||
} visitor_factory{cf, std::move(descriptor)};
|
||||
|
||||
return descriptor.options.visit(visitor_factory);
|
||||
}
|
||||
|
||||
future<compaction_info>
|
||||
compact_sstables(sstables::compaction_descriptor descriptor, column_family& cf, std::function<shared_sstable()> creator, replacer_fn replacer) {
|
||||
compact_sstables(sstables::compaction_descriptor descriptor, column_family& cf) {
|
||||
if (descriptor.sstables.empty()) {
|
||||
throw std::runtime_error(format("Called {} compaction with empty set on behalf of {}.{}", compaction_name(descriptor.options.type()),
|
||||
cf.schema()->ks_name(), cf.schema()->cf_name()));
|
||||
}
|
||||
auto c = make_compaction(cf, std::move(descriptor), std::move(creator), std::move(replacer));
|
||||
auto c = make_compaction(cf, std::move(descriptor));
|
||||
if (c->contains_multi_fragment_runs()) {
|
||||
auto gc_writer = c->make_garbage_collected_sstable_writer();
|
||||
return compaction::run(std::move(c), std::move(gc_writer));
|
||||
@@ -1343,7 +1307,10 @@ reshard_sstables(std::vector<shared_sstable> sstables, column_family& cf, std::f
|
||||
if (sstables.empty()) {
|
||||
throw std::runtime_error(format("Called resharding with empty set on behalf of {}.{}", cf.schema()->ks_name(), cf.schema()->cf_name()));
|
||||
}
|
||||
auto c = std::make_unique<resharding_compaction>(std::move(sstables), cf, std::move(creator), max_sstable_size, sstable_level);
|
||||
sstables::compaction_descriptor descriptor(std::move(sstables), sstable_level, max_sstable_size);
|
||||
descriptor.creator = std::move(creator);
|
||||
|
||||
auto c = std::make_unique<resharding_compaction>(cf, std::move(descriptor));
|
||||
return compaction::run(std::move(c)).then([] (auto ret) {
|
||||
return std::move(ret.new_sstables);
|
||||
});
|
||||
|
||||
@@ -84,6 +84,20 @@ namespace sstables {
|
||||
compaction_type type() const;
|
||||
};
|
||||
|
||||
struct compaction_completion_desc {
|
||||
// Old, existing SSTables that should be deleted and removed from the SSTable set.
|
||||
std::vector<shared_sstable> old_sstables;
|
||||
// New, fresh SSTables that should be added to SSTable set, replacing the old ones.
|
||||
std::vector<shared_sstable> new_sstables;
|
||||
// Set of compacted partition ranges that should be invalidated in the cache.
|
||||
dht::partition_range_vector ranges_for_cache_invalidation;
|
||||
};
|
||||
|
||||
// creates a new SSTable for a given shard
|
||||
using creator_fn = std::function<shared_sstable(shard_id shard)>;
|
||||
// Replaces old sstable(s) by new one(s) which contain all non-expired data.
|
||||
using replacer_fn = std::function<void(compaction_completion_desc)>;
|
||||
|
||||
struct compaction_descriptor {
|
||||
// List of sstables to be compacted.
|
||||
std::vector<sstables::shared_sstable> sstables;
|
||||
@@ -101,6 +115,9 @@ namespace sstables {
|
||||
// This also selects the kind of compaction to do.
|
||||
compaction_options options = compaction_options::make_regular();
|
||||
|
||||
creator_fn creator;
|
||||
replacer_fn replacer;
|
||||
|
||||
compaction_descriptor() = default;
|
||||
|
||||
static constexpr int default_level = 0;
|
||||
@@ -190,16 +207,6 @@ namespace sstables {
|
||||
}
|
||||
};
|
||||
|
||||
struct compaction_completion_desc {
|
||||
std::vector<shared_sstable> input_sstables;
|
||||
std::vector<shared_sstable> output_sstables;
|
||||
// Set of compacted partition ranges that should be invalidated in the cache.
|
||||
dht::partition_range_vector ranges_for_cache_invalidation;
|
||||
};
|
||||
|
||||
// Replaces old sstable(s) by new one(s) which contain all non-expired data.
|
||||
using replacer_fn = std::function<void(compaction_completion_desc)>;
|
||||
|
||||
// Compact a list of N sstables into M sstables.
|
||||
// Returns info about the finished compaction, which includes vector to new sstables.
|
||||
//
|
||||
@@ -212,8 +219,7 @@ namespace sstables {
|
||||
// If descriptor.cleanup is true, mutation that doesn't belong to current node will be
|
||||
// cleaned up, log messages will inform the user that compact_sstables runs for
|
||||
// cleaning operation, and compaction history will not be updated.
|
||||
future<compaction_info> compact_sstables(sstables::compaction_descriptor descriptor, column_family& cf,
|
||||
std::function<shared_sstable()> creator, replacer_fn replacer);
|
||||
future<compaction_info> compact_sstables(sstables::compaction_descriptor descriptor, column_family& cf);
|
||||
|
||||
// Compacts a set of N shared sstables into M sstables. For every shard involved,
|
||||
// i.e. which owns any of the sstables, a new unshared sstable is created.
|
||||
|
||||
@@ -92,6 +92,9 @@ public:
|
||||
void transfer_ongoing_charges(compaction_backlog_tracker& new_bt, bool move_read_charges = true);
|
||||
void revert_charges(sstables::shared_sstable sst);
|
||||
private:
|
||||
// Returns true if this SSTable can be added or removed from the tracker.
|
||||
bool sstable_belongs_to_tracker(const sstables::shared_sstable& sst);
|
||||
|
||||
void disable() {
|
||||
_disabled = true;
|
||||
_ongoing_writes = {};
|
||||
|
||||
@@ -218,7 +218,7 @@ std::vector<sstables::shared_sstable> compaction_manager::get_candidates(const c
|
||||
auto& cs = cf.get_compaction_strategy();
|
||||
|
||||
// Filter out sstables that are being compacted.
|
||||
for (auto& sst : cf.candidates_for_compaction()) {
|
||||
for (auto& sst : cf.non_staging_sstables()) {
|
||||
if (_compacting_sstables.count(sst)) {
|
||||
continue;
|
||||
}
|
||||
@@ -646,8 +646,8 @@ future<> compaction_manager::rewrite_sstables(column_family* cf, sstables::compa
|
||||
return task->compaction_done.get_future().then([task] {});
|
||||
}
|
||||
|
||||
static bool needs_cleanup(const sstables::shared_sstable& sst,
|
||||
const dht::token_range_vector& owned_ranges,
|
||||
bool needs_cleanup(const sstables::shared_sstable& sst,
|
||||
const dht::token_range_vector& sorted_owned_ranges,
|
||||
schema_ptr s) {
|
||||
auto first = sst->get_first_partition_key();
|
||||
auto last = sst->get_last_partition_key();
|
||||
@@ -655,29 +655,40 @@ static bool needs_cleanup(const sstables::shared_sstable& sst,
|
||||
auto last_token = dht::get_token(*s, last);
|
||||
dht::token_range sst_token_range = dht::token_range::make(first_token, last_token);
|
||||
|
||||
auto r = std::lower_bound(sorted_owned_ranges.begin(), sorted_owned_ranges.end(), first_token,
|
||||
[] (const range<dht::token>& a, const dht::token& b) {
|
||||
// check that range a is before token b.
|
||||
return a.after(b, dht::token_comparator());
|
||||
});
|
||||
|
||||
// return true iff sst partition range isn't fully contained in any of the owned ranges.
|
||||
for (auto& r : owned_ranges) {
|
||||
if (r.contains(sst_token_range, dht::token_comparator())) {
|
||||
if (r != sorted_owned_ranges.end()) {
|
||||
if (r->contains(sst_token_range, dht::token_comparator())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
future<> compaction_manager::perform_cleanup(column_family* cf) {
|
||||
future<> compaction_manager::perform_cleanup(database& db, column_family* cf) {
|
||||
if (check_for_cleanup(cf)) {
|
||||
throw std::runtime_error(format("cleanup request failed: there is an ongoing cleanup on {}.{}",
|
||||
cf->schema()->ks_name(), cf->schema()->cf_name()));
|
||||
}
|
||||
return rewrite_sstables(cf, sstables::compaction_options::make_cleanup(), [this] (const table& table) {
|
||||
auto schema = table.schema();
|
||||
auto owned_ranges = service::get_local_storage_service().get_local_ranges(schema->ks_name());
|
||||
return seastar::async([this, cf, &db] {
|
||||
auto schema = cf->schema();
|
||||
auto& rs = db.find_keyspace(schema->ks_name()).get_replication_strategy();
|
||||
auto sorted_owned_ranges = rs.get_ranges_in_thread(utils::fb_utilities::get_broadcast_address());
|
||||
auto sstables = std::vector<sstables::shared_sstable>{};
|
||||
const auto candidates = table.candidates_for_compaction();
|
||||
std::copy_if(candidates.begin(), candidates.end(), std::back_inserter(sstables), [&owned_ranges, schema] (const sstables::shared_sstable& sst) {
|
||||
return owned_ranges.empty() || needs_cleanup(sst, owned_ranges, schema);
|
||||
const auto candidates = get_candidates(*cf);
|
||||
std::copy_if(candidates.begin(), candidates.end(), std::back_inserter(sstables), [&sorted_owned_ranges, schema] (const sstables::shared_sstable& sst) {
|
||||
seastar::thread::maybe_yield();
|
||||
return sorted_owned_ranges.empty() || needs_cleanup(sst, sorted_owned_ranges, schema);
|
||||
});
|
||||
return sstables;
|
||||
}).then([this, cf] (std::vector<sstables::shared_sstable> sstables) {
|
||||
return rewrite_sstables(cf, sstables::compaction_options::make_cleanup(),
|
||||
[sstables = std::move(sstables)] (const table&) { return sstables; });
|
||||
});
|
||||
}
|
||||
|
||||
@@ -692,7 +703,7 @@ future<> compaction_manager::perform_sstable_upgrade(column_family* cf, bool exc
|
||||
return cf->run_with_compaction_disabled([this, cf, &tables, exclude_current_version] {
|
||||
auto last_version = cf->get_sstables_manager().get_highest_supported_format();
|
||||
|
||||
for (auto& sst : cf->candidates_for_compaction()) {
|
||||
for (auto& sst : get_candidates(*cf)) {
|
||||
// if we are a "normal" upgrade, we only care about
|
||||
// tables with older versions, but potentially
|
||||
// we are to actually rewrite everything. (-a)
|
||||
@@ -708,8 +719,8 @@ future<> compaction_manager::perform_sstable_upgrade(column_family* cf, bool exc
|
||||
// Note that we potentially could be doing multiple
|
||||
// upgrades here in parallel, but that is really the users
|
||||
// problem.
|
||||
return rewrite_sstables(cf, sstables::compaction_options::make_upgrade(), [&](auto&) {
|
||||
return tables;
|
||||
return rewrite_sstables(cf, sstables::compaction_options::make_upgrade(), [&](auto&) mutable {
|
||||
return std::exchange(tables, {});
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -717,8 +728,8 @@ future<> compaction_manager::perform_sstable_upgrade(column_family* cf, bool exc
|
||||
|
||||
// Submit a column family to be scrubbed and wait for its termination.
|
||||
future<> compaction_manager::perform_sstable_scrub(column_family* cf, bool skip_corrupted) {
|
||||
return rewrite_sstables(cf, sstables::compaction_options::make_scrub(skip_corrupted), [] (const table& cf) {
|
||||
return cf.candidates_for_compaction();
|
||||
return rewrite_sstables(cf, sstables::compaction_options::make_scrub(skip_corrupted), [this] (const table& cf) {
|
||||
return get_candidates(cf);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -795,7 +806,7 @@ double compaction_backlog_tracker::backlog() const {
|
||||
}
|
||||
|
||||
void compaction_backlog_tracker::add_sstable(sstables::shared_sstable sst) {
|
||||
if (_disabled) {
|
||||
if (_disabled || !sstable_belongs_to_tracker(sst)) {
|
||||
return;
|
||||
}
|
||||
_ongoing_writes.erase(sst);
|
||||
@@ -808,7 +819,7 @@ void compaction_backlog_tracker::add_sstable(sstables::shared_sstable sst) {
|
||||
}
|
||||
|
||||
void compaction_backlog_tracker::remove_sstable(sstables::shared_sstable sst) {
|
||||
if (_disabled) {
|
||||
if (_disabled || !sstable_belongs_to_tracker(sst)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -821,6 +832,10 @@ void compaction_backlog_tracker::remove_sstable(sstables::shared_sstable sst) {
|
||||
}
|
||||
}
|
||||
|
||||
bool compaction_backlog_tracker::sstable_belongs_to_tracker(const sstables::shared_sstable& sst) {
|
||||
return !sst->requires_view_building();
|
||||
}
|
||||
|
||||
void compaction_backlog_tracker::register_partially_written_sstable(sstables::shared_sstable sst, backlog_write_progress_manager& wp) {
|
||||
if (_disabled) {
|
||||
return;
|
||||
|
||||
@@ -175,7 +175,7 @@ public:
|
||||
// Cleanup is about discarding keys that are no longer relevant for a
|
||||
// given sstable, e.g. after node loses part of its token range because
|
||||
// of a newly added node.
|
||||
future<> perform_cleanup(column_family* cf);
|
||||
future<> perform_cleanup(database& db, column_family* cf);
|
||||
|
||||
// Submit a column family to be upgraded and wait for its termination.
|
||||
future<> perform_sstable_upgrade(column_family* cf, bool exclude_current_version);
|
||||
@@ -243,3 +243,5 @@ public:
|
||||
friend class compaction_weight_registration;
|
||||
};
|
||||
|
||||
bool needs_cleanup(const sstables::shared_sstable& sst, const dht::token_range_vector& owned_ranges, schema_ptr s);
|
||||
|
||||
|
||||
@@ -792,7 +792,11 @@ uint64_t time_window_compaction_strategy::adjust_partition_estimate(const mutati
|
||||
}
|
||||
const auto min_window = get_window_for(_options, *ms_meta.min_timestamp);
|
||||
const auto max_window = get_window_for(_options, *ms_meta.max_timestamp);
|
||||
return partition_estimate / (max_window - min_window + 1);
|
||||
const auto window_size = get_window_size(_options);
|
||||
|
||||
auto estimated_window_count = (max_window + (window_size - 1) - min_window) / window_size;
|
||||
|
||||
return partition_estimate / std::max(1UL, uint64_t(estimated_window_count));
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -401,9 +401,16 @@ private:
|
||||
auto indexes = std::move(entries_reader->_consumer.indexes);
|
||||
return entries_reader->_context.close().then([indexes = std::move(indexes), ex = std::move(ex)] () mutable {
|
||||
if (ex) {
|
||||
std::rethrow_exception(std::move(ex));
|
||||
return do_with(std::move(indexes), [ex = std::move(ex)] (index_list& indexes) mutable {
|
||||
return parallel_for_each(indexes, [] (index_entry& ie) mutable {
|
||||
return ie.close_pi_stream();
|
||||
}).then_wrapped([ex = std::move(ex)] (future<>&& fut) mutable {
|
||||
fut.ignore_ready_future();
|
||||
return make_exception_future<index_list>(std::move(ex));
|
||||
});
|
||||
});
|
||||
}
|
||||
return std::move(indexes);
|
||||
return make_ready_future<index_list>(std::move(indexes));
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
@@ -67,9 +67,13 @@ data_consume_rows<data_consume_rows_context_m>(const schema& s, shared_sstable,
|
||||
static
|
||||
position_in_partition_view get_slice_upper_bound(const schema& s, const query::partition_slice& slice, dht::ring_position_view key) {
|
||||
const auto& ranges = slice.row_ranges(s, *key.key());
|
||||
return ranges.empty()
|
||||
? position_in_partition_view::for_static_row()
|
||||
: position_in_partition_view::for_range_end(ranges.back());
|
||||
if (ranges.empty()) {
|
||||
return position_in_partition_view::for_static_row();
|
||||
}
|
||||
if (slice.options.contains(query::partition_slice::option::reversed)) {
|
||||
return position_in_partition_view::for_range_end(ranges.front());
|
||||
}
|
||||
return position_in_partition_view::for_range_end(ranges.back());
|
||||
}
|
||||
|
||||
GCC6_CONCEPT(
|
||||
|
||||
@@ -305,6 +305,11 @@ public:
|
||||
get_window_for(const time_window_compaction_strategy_options& options, api::timestamp_type ts) {
|
||||
return get_window_lower_bound(options.sstable_window_size, to_timestamp_type(options.timestamp_resolution, ts));
|
||||
}
|
||||
|
||||
static api::timestamp_type
|
||||
get_window_size(const time_window_compaction_strategy_options& options) {
|
||||
return timestamp_type(std::chrono::duration_cast<std::chrono::microseconds>(options.get_sstable_window_size()).count());
|
||||
}
|
||||
private:
|
||||
void update_estimated_compaction_by_tasks(std::map<timestamp_type, std::vector<shared_sstable>>& tasks, int min_threshold) {
|
||||
int64_t n = 0;
|
||||
|
||||
@@ -319,6 +319,15 @@ void stream_session::init_messaging_service_handler() {
|
||||
});
|
||||
}
|
||||
|
||||
future<> stream_session::uninit_messaging_service_handler() {
|
||||
return when_all_succeed(
|
||||
ms().unregister_prepare_message(),
|
||||
ms().unregister_prepare_done_message(),
|
||||
ms().unregister_stream_mutation_fragments(),
|
||||
ms().unregister_stream_mutation_done(),
|
||||
ms().unregister_complete_message()).discard_result();
|
||||
}
|
||||
|
||||
distributed<database>* stream_session::_db;
|
||||
distributed<db::system_distributed_keyspace>* stream_session::_sys_dist_ks;
|
||||
distributed<db::view::view_update_generator>* stream_session::_view_update_generator;
|
||||
@@ -342,9 +351,13 @@ future<> stream_session::init_streaming_service(distributed<database>& db, distr
|
||||
// });
|
||||
return get_stream_manager().start().then([] {
|
||||
gms::get_local_gossiper().register_(get_local_stream_manager().shared_from_this());
|
||||
return _db->invoke_on_all([] (auto& db) {
|
||||
init_messaging_service_handler();
|
||||
});
|
||||
return smp::invoke_on_all([] { init_messaging_service_handler(); });
|
||||
});
|
||||
}
|
||||
|
||||
future<> stream_session::uninit_streaming_service() {
|
||||
return smp::invoke_on_all([] {
|
||||
return uninit_messaging_service_handler();
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -142,6 +142,7 @@ private:
|
||||
using token = dht::token;
|
||||
using ring_position = dht::ring_position;
|
||||
static void init_messaging_service_handler();
|
||||
static future<> uninit_messaging_service_handler();
|
||||
static distributed<database>* _db;
|
||||
static distributed<db::system_distributed_keyspace>* _sys_dist_ks;
|
||||
static distributed<db::view::view_update_generator>* _view_update_generator;
|
||||
@@ -152,6 +153,7 @@ public:
|
||||
static database& get_local_db() { return _db->local(); }
|
||||
static distributed<database>& get_db() { return *_db; };
|
||||
static future<> init_streaming_service(distributed<database>& db, distributed<db::system_distributed_keyspace>& sys_dist_ks, distributed<db::view::view_update_generator>& view_update_generator);
|
||||
static future<> uninit_streaming_service();
|
||||
public:
|
||||
/**
|
||||
* Streaming endpoint.
|
||||
|
||||
@@ -44,6 +44,7 @@
|
||||
#include "streaming/stream_reason.hh"
|
||||
#include "streaming/stream_mutation_fragments_cmd.hh"
|
||||
#include "mutation_reader.hh"
|
||||
#include "flat_mutation_reader.hh"
|
||||
#include "frozen_mutation.hh"
|
||||
#include "mutation.hh"
|
||||
#include "message/messaging_service.hh"
|
||||
@@ -203,15 +204,27 @@ future<> send_mutation_fragments(lw_shared_ptr<send_info> si) {
|
||||
}();
|
||||
|
||||
auto sink_op = [sink, si, got_error_from_peer] () mutable -> future<> {
|
||||
return do_with(std::move(sink), [si, got_error_from_peer] (rpc::sink<frozen_mutation_fragment, stream_mutation_fragments_cmd>& sink) {
|
||||
return repeat([&sink, si, got_error_from_peer] () mutable {
|
||||
return si->reader(db::no_timeout).then([&sink, si, s = si->reader.schema(), got_error_from_peer] (mutation_fragment_opt mf) mutable {
|
||||
if (mf && !(*got_error_from_peer)) {
|
||||
mutation_fragment_stream_validator validator(*(si->reader.schema()));
|
||||
return do_with(std::move(sink), std::move(validator), [si, got_error_from_peer] (rpc::sink<frozen_mutation_fragment, stream_mutation_fragments_cmd>& sink, mutation_fragment_stream_validator& validator) {
|
||||
return repeat([&sink, &validator, si, got_error_from_peer] () mutable {
|
||||
return si->reader(db::no_timeout).then([&sink, &validator, si, s = si->reader.schema(), got_error_from_peer] (mutation_fragment_opt mf) mutable {
|
||||
if (*got_error_from_peer) {
|
||||
return make_exception_future<stop_iteration>(std::runtime_error("Got status error code from peer"));
|
||||
}
|
||||
if (mf) {
|
||||
if (!validator(mf->mutation_fragment_kind())) {
|
||||
return make_exception_future<stop_iteration>(std::runtime_error(format("Stream reader mutation_fragment validator failed, previous={}, current={}",
|
||||
validator.previous_mutation_fragment_kind(), mf->mutation_fragment_kind())));
|
||||
}
|
||||
frozen_mutation_fragment fmf = freeze(*s, *mf);
|
||||
auto size = fmf.representation().size();
|
||||
streaming::get_local_stream_manager().update_progress(si->plan_id, si->id.addr, streaming::progress_info::direction::OUT, size);
|
||||
return sink(fmf, stream_mutation_fragments_cmd::mutation_fragment_data).then([] { return stop_iteration::no; });
|
||||
} else {
|
||||
if (!validator.on_end_of_stream()) {
|
||||
return make_exception_future<stop_iteration>(std::runtime_error(format("Stream reader mutation_fragment validator failed on end_of_stream, previous={}, current=end_of_stream",
|
||||
validator.previous_mutation_fragment_kind())));
|
||||
}
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
}
|
||||
});
|
||||
|
||||
40
table.cc
40
table.cc
@@ -1193,7 +1193,7 @@ table::on_compaction_completion(sstables::compaction_completion_desc& desc) {
|
||||
// unbounded time, because all shards must agree on the deletion).
|
||||
|
||||
// make sure all old sstables belong *ONLY* to current shard before we proceed to their deletion.
|
||||
for (auto& sst : desc.input_sstables) {
|
||||
for (auto& sst : desc.old_sstables) {
|
||||
auto shards = sst->get_shards_for_this_sstable();
|
||||
if (shards.size() > 1) {
|
||||
throw std::runtime_error(format("A regular compaction for {}.{} INCORRECTLY used shared sstable {}. Only resharding work with those!",
|
||||
@@ -1207,11 +1207,11 @@ table::on_compaction_completion(sstables::compaction_completion_desc& desc) {
|
||||
|
||||
auto new_compacted_but_not_deleted = _sstables_compacted_but_not_deleted;
|
||||
// rebuilding _sstables_compacted_but_not_deleted first to make the entire rebuild operation exception safe.
|
||||
new_compacted_but_not_deleted.insert(new_compacted_but_not_deleted.end(), desc.input_sstables.begin(), desc.input_sstables.end());
|
||||
new_compacted_but_not_deleted.insert(new_compacted_but_not_deleted.end(), desc.old_sstables.begin(), desc.old_sstables.end());
|
||||
|
||||
_cache.invalidate([this, &desc] () noexcept {
|
||||
// FIXME: this is not really noexcept, but we need to provide strong exception guarantees.
|
||||
rebuild_sstable_list(desc.output_sstables, desc.input_sstables);
|
||||
rebuild_sstable_list(desc.new_sstables, desc.old_sstables);
|
||||
}, std::move(desc.ranges_for_cache_invalidation)).get();
|
||||
|
||||
// refresh underlying data source in row cache to prevent it from holding reference
|
||||
@@ -1222,7 +1222,7 @@ table::on_compaction_completion(sstables::compaction_completion_desc& desc) {
|
||||
|
||||
rebuild_statistics();
|
||||
|
||||
auto f = seastar::with_gate(_sstable_deletion_gate, [this, sstables_to_remove = desc.input_sstables] {
|
||||
auto f = seastar::with_gate(_sstable_deletion_gate, [this, sstables_to_remove = desc.old_sstables] {
|
||||
return with_semaphore(_sstable_deletion_sem, 1, [sstables_to_remove = std::move(sstables_to_remove)] {
|
||||
return sstables::delete_atomically(std::move(sstables_to_remove));
|
||||
});
|
||||
@@ -1240,7 +1240,7 @@ table::on_compaction_completion(sstables::compaction_completion_desc& desc) {
|
||||
// or they could stay forever in the set, resulting in deleted files remaining
|
||||
// opened and disk space not being released until shutdown.
|
||||
std::unordered_set<sstables::shared_sstable> s(
|
||||
desc.input_sstables.begin(), desc.input_sstables.end());
|
||||
desc.old_sstables.begin(), desc.old_sstables.end());
|
||||
auto e = boost::range::remove_if(_sstables_compacted_but_not_deleted, [&] (sstables::shared_sstable sst) -> bool {
|
||||
return s.count(sst);
|
||||
});
|
||||
@@ -1294,21 +1294,21 @@ table::compact_sstables(sstables::compaction_descriptor descriptor) {
|
||||
}
|
||||
|
||||
return with_lock(_sstables_lock.for_read(), [this, descriptor = std::move(descriptor)] () mutable {
|
||||
auto create_sstable = [this] {
|
||||
descriptor.creator = [this] (shard_id dummy) {
|
||||
auto sst = make_sstable();
|
||||
sst->set_unshared();
|
||||
return sst;
|
||||
};
|
||||
auto replace_sstables = [this, release_exhausted = descriptor.release_exhausted] (sstables::compaction_completion_desc desc) {
|
||||
_compaction_strategy.notify_completion(desc.input_sstables, desc.output_sstables);
|
||||
_compaction_manager.propagate_replacement(this, desc.input_sstables, desc.output_sstables);
|
||||
descriptor.replacer = [this, release_exhausted = descriptor.release_exhausted] (sstables::compaction_completion_desc desc) {
|
||||
_compaction_strategy.notify_completion(desc.old_sstables, desc.new_sstables);
|
||||
_compaction_manager.propagate_replacement(this, desc.old_sstables, desc.new_sstables);
|
||||
this->on_compaction_completion(desc);
|
||||
if (release_exhausted) {
|
||||
release_exhausted(desc.input_sstables);
|
||||
release_exhausted(desc.old_sstables);
|
||||
}
|
||||
};
|
||||
|
||||
return sstables::compact_sstables(std::move(descriptor), *this, create_sstable, replace_sstables);
|
||||
return sstables::compact_sstables(std::move(descriptor), *this);
|
||||
}).then([this] (auto info) {
|
||||
if (info.type != sstables::compaction_type::Compaction) {
|
||||
return make_ready_future<>();
|
||||
@@ -1437,7 +1437,7 @@ future<std::unordered_set<sstring>> table::get_sstables_by_partition_key(const s
|
||||
[this] (std::unordered_set<sstring>& filenames, lw_shared_ptr<sstables::sstable_set::incremental_selector>& sel, partition_key& pk) {
|
||||
return do_with(dht::decorated_key(dht::decorate_key(*_schema, pk)),
|
||||
[this, &filenames, &sel, &pk](dht::decorated_key& dk) mutable {
|
||||
auto sst = sel->select(dk).sstables;
|
||||
const auto& sst = sel->select(dk).sstables;
|
||||
auto hk = sstables::sstable::make_hashed_key(*_schema, dk.key());
|
||||
|
||||
return do_for_each(sst, [this, &filenames, &dk, hk = std::move(hk)] (std::vector<sstables::shared_sstable>::const_iterator::reference s) mutable {
|
||||
@@ -1466,7 +1466,7 @@ std::vector<sstables::shared_sstable> table::select_sstables(const dht::partitio
|
||||
return _sstables->select(range);
|
||||
}
|
||||
|
||||
std::vector<sstables::shared_sstable> table::candidates_for_compaction() const {
|
||||
std::vector<sstables::shared_sstable> table::non_staging_sstables() const {
|
||||
return boost::copy_range<std::vector<sstables::shared_sstable>>(*get_sstables()
|
||||
| boost::adaptors::filtered([this] (auto& sst) {
|
||||
return !_sstables_need_rewrite.count(sst->generation()) && !_sstables_staging.count(sst->generation());
|
||||
@@ -2034,6 +2034,11 @@ void table::set_schema(schema_ptr s) {
|
||||
}
|
||||
_schema = std::move(s);
|
||||
|
||||
for (auto&& v : _views) {
|
||||
v->view_info()->set_base_info(
|
||||
v->view_info()->make_base_dependent_view_info(*_schema));
|
||||
}
|
||||
|
||||
set_compaction_strategy(_schema->compaction_strategy());
|
||||
trigger_compaction();
|
||||
}
|
||||
@@ -2045,7 +2050,8 @@ static std::vector<view_ptr>::iterator find_view(std::vector<view_ptr>& views, c
|
||||
}
|
||||
|
||||
void table::add_or_update_view(view_ptr v) {
|
||||
v->view_info()->initialize_base_dependent_fields(*schema());
|
||||
v->view_info()->set_base_info(
|
||||
v->view_info()->make_base_dependent_view_info(*_schema));
|
||||
auto existing = find_view(_views, v);
|
||||
if (existing != _views.end()) {
|
||||
*existing = std::move(v);
|
||||
@@ -2098,7 +2104,7 @@ static size_t memory_usage_of(const std::vector<frozen_mutation_and_schema>& ms)
|
||||
* @return a future resolving to the mutations to apply to the views, which can be empty.
|
||||
*/
|
||||
future<> table::generate_and_propagate_view_updates(const schema_ptr& base,
|
||||
std::vector<view_ptr>&& views,
|
||||
std::vector<db::view::view_and_base>&& views,
|
||||
mutation&& m,
|
||||
flat_mutation_reader_opt existings) const {
|
||||
auto base_token = m.token();
|
||||
@@ -2206,7 +2212,7 @@ table::local_base_lock(
|
||||
* @return a future that resolves when the updates have been acknowledged by the view replicas
|
||||
*/
|
||||
future<> table::populate_views(
|
||||
std::vector<view_ptr> views,
|
||||
std::vector<db::view::view_and_base> views,
|
||||
dht::token base_token,
|
||||
flat_mutation_reader&& reader) {
|
||||
auto& schema = reader.schema();
|
||||
@@ -2527,7 +2533,7 @@ future<row_locker::lock_holder> table::do_push_view_replica_updates(const schema
|
||||
}
|
||||
auto& base = schema();
|
||||
m.upgrade(base);
|
||||
auto views = affected_views(base, m);
|
||||
auto views = db::view::with_base_info_snapshot(affected_views(base, m));
|
||||
if (views.empty()) {
|
||||
return make_ready_future<row_locker::lock_holder>();
|
||||
}
|
||||
|
||||
@@ -28,8 +28,8 @@ fi
|
||||
SCYLLA_IP=127.1.$(($$ >> 8 & 255)).$(($$ & 255))
|
||||
echo "Running Scylla on $SCYLLA_IP"
|
||||
|
||||
tmp_dir=/tmp/alternator-test-$$
|
||||
mkdir $tmp_dir
|
||||
tmp_dir="$(readlink -e ${TMPDIR-/tmp})"/alternator-test-$$
|
||||
mkdir "$tmp_dir"
|
||||
|
||||
# We run the cleanup() function on exit for any reason - successful finish
|
||||
# of the script, an error (since we have "set -e"), or a signal.
|
||||
|
||||
@@ -1351,3 +1351,37 @@ def test_condition_expression_with_forbidden_rmw(scylla_only, dynamodb, test_tab
|
||||
assert test_table_s.get_item(Key={'p': s}, ConsistentRead=True)['Item'] == {'p': s, 'regular': 'write'}
|
||||
test_table_s.update_item(Key={'p': s}, AttributeUpdates={'write': {'Value': 'regular', 'Action': 'PUT'}})
|
||||
assert test_table_s.get_item(Key={'p': s}, ConsistentRead=True)['Item'] == {'p': s, 'regular': 'write', 'write': 'regular'}
|
||||
|
||||
# Reproducer for issue #6573: binary strings should be ordered as unsigned
|
||||
# bytes, i.e., byte 128 comes after 127, not before as with signed bytes.
|
||||
# Test the five ordering operators: <, <=, >, >=, between
|
||||
def test_condition_expression_unsigned_bytes(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'b': bytearray([127])})
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='b < :oldval',
|
||||
ExpressionAttributeValues={':newval': 1, ':oldval': bytearray([128])})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 1
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='b <= :oldval',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': bytearray([128])})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 2
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='b between :oldval1 and :oldval2',
|
||||
ExpressionAttributeValues={':newval': 3, ':oldval1': bytearray([126]), ':oldval2': bytearray([128])})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 3
|
||||
|
||||
test_table_s.put_item(Item={'p': p, 'b': bytearray([128])})
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='b > :oldval',
|
||||
ExpressionAttributeValues={':newval': 4, ':oldval': bytearray([127])})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 4
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='b >= :oldval',
|
||||
ExpressionAttributeValues={':newval': 5, ':oldval': bytearray([127])})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 5
|
||||
|
||||
@@ -1077,3 +1077,42 @@ def test_put_item_expected(test_table_s):
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'a': 2}
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.put_item(Item={'p': p, 'a': 3}, Expected={'a': {'Value': 1}})
|
||||
|
||||
# Reproducer for issue #6573: binary strings should be ordered as unsigned
|
||||
# bytes, i.e., byte 128 comes after 127, not before as with signed bytes.
|
||||
# Test the five ordering operators: LT, LE, GT, GE, BETWEEN
|
||||
def test_update_expected_unsigned_bytes(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'b': bytearray([127])})
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'z': {'Value': 1, 'Action': 'PUT'}},
|
||||
Expected={'b': {'ComparisonOperator': 'LT',
|
||||
'AttributeValueList': [bytearray([128])]}}
|
||||
)
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 1
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'z': {'Value': 2, 'Action': 'PUT'}},
|
||||
Expected={'b': {'ComparisonOperator': 'LE',
|
||||
'AttributeValueList': [bytearray([128])]}}
|
||||
)
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 2
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'z': {'Value': 3, 'Action': 'PUT'}},
|
||||
Expected={'b': {'ComparisonOperator': 'BETWEEN',
|
||||
'AttributeValueList': [bytearray([126]), bytearray([128])]}}
|
||||
)
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 3
|
||||
|
||||
test_table_s.put_item(Item={'p': p, 'b': bytearray([128])})
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'z': {'Value': 4, 'Action': 'PUT'}},
|
||||
Expected={'b': {'ComparisonOperator': 'GT',
|
||||
'AttributeValueList': [bytearray([127])]}}
|
||||
)
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 4
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'z': {'Value': 5, 'Action': 'PUT'}},
|
||||
Expected={'b': {'ComparisonOperator': 'GE',
|
||||
'AttributeValueList': [bytearray([127])]}}
|
||||
)
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 5
|
||||
|
||||
@@ -520,6 +520,42 @@ def test_key_condition_expression_and_conditions(test_table_sn_with_sorted_parti
|
||||
'ComparisonOperator': 'GT'}}
|
||||
)
|
||||
|
||||
# Demonstrate that issue #6573 was not a bug for KeyConditionExpression:
|
||||
# binary strings are ordered as unsigned bytes, i.e., byte 128 comes after
|
||||
# 127, not as signed bytes.
|
||||
# Test the five ordering operators: <, <=, >, >=, between
|
||||
def test_key_condition_expression_unsigned_bytes(test_table_sb):
|
||||
p = random_string()
|
||||
items = [{'p': p, 'c': bytearray([i])} for i in range(126,129)]
|
||||
with test_table_sb.batch_writer() as batch:
|
||||
for item in items:
|
||||
batch.put_item(item)
|
||||
got_items = full_query(test_table_sb,
|
||||
KeyConditionExpression='p=:p AND c<:c',
|
||||
ExpressionAttributeValues={':p': p, ':c': bytearray([127])})
|
||||
expected_items = [item for item in items if item['c'] < bytearray([127])]
|
||||
assert(got_items == expected_items)
|
||||
got_items = full_query(test_table_sb,
|
||||
KeyConditionExpression='p=:p AND c<=:c',
|
||||
ExpressionAttributeValues={':p': p, ':c': bytearray([127])})
|
||||
expected_items = [item for item in items if item['c'] <= bytearray([127])]
|
||||
assert(got_items == expected_items)
|
||||
got_items = full_query(test_table_sb,
|
||||
KeyConditionExpression='p=:p AND c>:c',
|
||||
ExpressionAttributeValues={':p': p, ':c': bytearray([127])})
|
||||
expected_items = [item for item in items if item['c'] > bytearray([127])]
|
||||
assert(got_items == expected_items)
|
||||
got_items = full_query(test_table_sb,
|
||||
KeyConditionExpression='p=:p AND c>=:c',
|
||||
ExpressionAttributeValues={':p': p, ':c': bytearray([127])})
|
||||
expected_items = [item for item in items if item['c'] >= bytearray([127])]
|
||||
assert(got_items == expected_items)
|
||||
got_items = full_query(test_table_sb,
|
||||
KeyConditionExpression='p=:p AND c BETWEEN :c1 AND :c2',
|
||||
ExpressionAttributeValues={':p': p, ':c1': bytearray([127]), ':c2': bytearray([128])})
|
||||
expected_items = [item for item in items if item['c'] >= bytearray([127]) and item['c'] <= bytearray([128])]
|
||||
assert(got_items == expected_items)
|
||||
|
||||
# The following is an older test we had, which test one arbitrary use case
|
||||
# for KeyConditionExpression. It uses filled_test_table (the one we also
|
||||
# use in test_scan.py) instead of the fixtures defined in this file.
|
||||
|
||||
@@ -132,6 +132,13 @@ BOOST_AUTO_TEST_CASE(test_big_decimal_div) {
|
||||
test_div("-0.25", 10, "-0.02");
|
||||
test_div("-0.26", 10, "-0.03");
|
||||
test_div("-10E10", 3, "-3E10");
|
||||
|
||||
// Document a small oddity, 1e1 has -1 decimal places, so dividing
|
||||
// it by 2 produces 0. This is not the behavior in cassandra, but
|
||||
// scylla doesn't expose arithmetic operations, so this doesn't
|
||||
// seem to be visible from CQL.
|
||||
test_div("10", 2, "5");
|
||||
test_div("1e1", 2, "0e1");
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_big_decimal_assignadd) {
|
||||
|
||||
@@ -142,6 +142,19 @@ SEASTAR_TEST_CASE(test_decimal_to_bigint) {
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_decimal_to_float) {
|
||||
return do_with_cql_env_thread([&](auto& e) {
|
||||
e.execute_cql("CREATE TABLE test (key text primary key, value decimal)").get();
|
||||
e.execute_cql("INSERT INTO test (key, value) VALUES ('k1', 10)").get();
|
||||
e.execute_cql("INSERT INTO test (key, value) VALUES ('k2', 1e1)").get();
|
||||
auto v = e.execute_cql("SELECT key, CAST(value as float) from test").get0();
|
||||
assert_that(v).is_rows().with_rows_ignore_order({
|
||||
{{serialized("k1")}, {serialized(float(10))}},
|
||||
{{serialized("k2")}, {serialized(float(10))}},
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_varint_to_bigint) {
|
||||
return do_with_cql_env_thread([&](auto& e) {
|
||||
e.execute_cql("CREATE TABLE test (key text primary key, value varint)").get();
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
*/
|
||||
|
||||
|
||||
#include <boost/algorithm/string/join.hpp>
|
||||
#include <boost/range/irange.hpp>
|
||||
#include <boost/range/adaptors.hpp>
|
||||
#include <boost/range/algorithm.hpp>
|
||||
@@ -1625,13 +1626,13 @@ SEASTAR_TEST_CASE(test_table_compression) {
|
||||
BOOST_REQUIRE(e.local_db().find_schema("ks", "tb5")->get_compressor_params().get_compressor() == nullptr);
|
||||
|
||||
BOOST_REQUIRE_THROW(e.execute_cql(
|
||||
"create table tb2 (foo text PRIMARY KEY, bar text) with compression = { 'sstable_compression' : 'LossyCompressor' };"),
|
||||
"create table tb2 (foo text PRIMARY KEY, bar text) with compression = { 'sstable_compression' : 'LossyCompressor' };").get(),
|
||||
std::exception);
|
||||
BOOST_REQUIRE_THROW(e.execute_cql(
|
||||
"create table tb2 (foo text PRIMARY KEY, bar text) with compression = { 'sstable_compression' : 'LZ4Compressor', 'chunk_length_kb' : -1 };"),
|
||||
"create table tb2 (foo text PRIMARY KEY, bar text) with compression = { 'sstable_compression' : 'LZ4Compressor', 'chunk_length_kb' : -1 };").get(),
|
||||
std::exception);
|
||||
BOOST_REQUIRE_THROW(e.execute_cql(
|
||||
"create table tb2 (foo text PRIMARY KEY, bar text) with compression = { 'sstable_compression' : 'LZ4Compressor', 'chunk_length_kb' : 3 };"),
|
||||
"create table tb2 (foo text PRIMARY KEY, bar text) with compression = { 'sstable_compression' : 'LZ4Compressor', 'chunk_length_kb' : 3 };").get(),
|
||||
std::exception);
|
||||
|
||||
e.execute_cql("create table tb2 (foo text PRIMARY KEY, bar text) with compression = { 'sstable_compression' : 'LZ4Compressor', 'chunk_length_kb' : 2 };").get();
|
||||
@@ -2717,6 +2718,85 @@ SEASTAR_TEST_CASE(test_reversed_slice_with_empty_range_before_all_rows) {
|
||||
});
|
||||
}
|
||||
|
||||
// Test that the sstable layer correctly handles reversed slices, in particular
|
||||
// slices that read many clustering ranges, such that there is a large enough gap
|
||||
// between the ranges for the reader to attempt to use the promoted index for
|
||||
// skipping between them.
|
||||
// For this reason, the test writes a large partition (10MB), then issues a
|
||||
// reverse query which reads 4 singular clustering ranges from it. The ranges are
|
||||
// constructed such that there is many clustering rows between them: roughly 20%
|
||||
// which is ~2MB.
|
||||
// See #6171
|
||||
SEASTAR_TEST_CASE(test_reversed_slice_with_many_clustering_ranges) {
|
||||
cql_test_config cfg;
|
||||
cfg.db_config->max_memory_for_unlimited_query(std::numeric_limits<uint64_t>::max());
|
||||
return do_with_cql_env_thread([] (cql_test_env& e) {
|
||||
e.execute_cql("CREATE TABLE test (pk int, ck int, v text, PRIMARY KEY (pk, ck));").get();
|
||||
auto id = e.prepare("INSERT INTO test (pk, ck, v) VALUES (?, ?, ?);").get0();
|
||||
|
||||
const int pk = 0;
|
||||
const auto raw_pk = int32_type->decompose(data_value(pk));
|
||||
const auto cql3_pk = cql3::raw_value::make_value(raw_pk);
|
||||
|
||||
const auto value = sstring(1024, 'a');
|
||||
const auto raw_value = utf8_type->decompose(data_value(value));
|
||||
const auto cql3_value = cql3::raw_value::make_value(raw_value);
|
||||
|
||||
const int num_rows = 10 * 1024;
|
||||
|
||||
for (int i = 0; i != num_rows; ++i) {
|
||||
const auto cql3_ck = cql3::raw_value::make_value(int32_type->decompose(data_value(i)));
|
||||
e.execute_prepared(id, {cql3_pk, cql3_ck, cql3_value}).get();
|
||||
}
|
||||
|
||||
e.db().invoke_on_all([] (database& db) {
|
||||
return db.flush_all_memtables();
|
||||
}).get();
|
||||
|
||||
const std::vector<int> selected_cks{
|
||||
2 * (num_rows / 10),
|
||||
4 * (num_rows / 10),
|
||||
6 * (num_rows / 10),
|
||||
8 * (num_rows / 10)};
|
||||
|
||||
const auto make_expected_row = [&] (int ck) -> std::vector<bytes_opt> {
|
||||
return {raw_pk, int32_type->decompose(ck), raw_value};
|
||||
};
|
||||
|
||||
// Many singular ranges - to check that the right range is used for
|
||||
// determining the disk read-range upper bound.
|
||||
{
|
||||
const auto select_query = format(
|
||||
"SELECT * FROM test WHERE pk = {} and ck IN ({}) ORDER BY ck DESC BYPASS CACHE;",
|
||||
pk,
|
||||
boost::algorithm::join(selected_cks | boost::adaptors::transformed([] (int ck) { return format("{}", ck); }), ", "));
|
||||
assert_that(e.execute_cql(select_query).get0())
|
||||
.is_rows()
|
||||
.with_rows(boost::copy_range<std::vector<std::vector<bytes_opt>>>(
|
||||
selected_cks
|
||||
| boost::adaptors::reversed
|
||||
| boost::adaptors::transformed(make_expected_row)));
|
||||
}
|
||||
|
||||
// A single wide range - to check that the right range bound is used for
|
||||
// determining the disk read-range upper bound.
|
||||
{
|
||||
const auto select_query = format(
|
||||
"SELECT * FROM test WHERE pk = {} and ck >= {} and ck <= {} ORDER BY ck DESC BYPASS CACHE;",
|
||||
pk,
|
||||
selected_cks[0],
|
||||
selected_cks[1]);
|
||||
|
||||
assert_that(e.execute_cql(select_query).get0())
|
||||
.is_rows()
|
||||
.with_rows(boost::copy_range<std::vector<std::vector<bytes_opt>>>(
|
||||
boost::irange(selected_cks[0], selected_cks[1] + 1)
|
||||
| boost::adaptors::reversed
|
||||
| boost::adaptors::transformed(make_expected_row)));
|
||||
}
|
||||
}, std::move(cfg));
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_query_with_range_tombstones) {
|
||||
return do_with_cql_env([] (cql_test_env& e) {
|
||||
return seastar::async([&e] {
|
||||
@@ -3363,10 +3443,13 @@ SEASTAR_TEST_CASE(test_select_with_mixed_order_table) {
|
||||
}
|
||||
|
||||
uint64_t
|
||||
run_and_examine_cache_stat_change(cql_test_env& e, uint64_t cache_tracker::stats::*metric, std::function<void (cql_test_env& e)> func) {
|
||||
run_and_examine_cache_read_stats_change(cql_test_env& e, std::string_view cf_name, std::function<void (cql_test_env& e)> func) {
|
||||
auto read_stat = [&] {
|
||||
auto local_read_metric = [metric] (database& db) { return db.row_cache_tracker().get_stats().*metric; };
|
||||
return e.db().map_reduce0(local_read_metric, uint64_t(0), std::plus<uint64_t>()).get0();
|
||||
return e.db().map_reduce0([&cf_name] (const database& db) {
|
||||
auto& t = db.find_column_family("ks", sstring(cf_name));
|
||||
auto& stats = t.get_row_cache().stats();
|
||||
return stats.reads_with_misses.count() + stats.reads_with_no_misses.count();
|
||||
}, uint64_t(0), std::plus<uint64_t>()).get0();
|
||||
};
|
||||
auto before = read_stat();
|
||||
func(e);
|
||||
@@ -3377,11 +3460,11 @@ run_and_examine_cache_stat_change(cql_test_env& e, uint64_t cache_tracker::stats
|
||||
SEASTAR_TEST_CASE(test_cache_bypass) {
|
||||
return do_with_cql_env_thread([] (cql_test_env& e) {
|
||||
e.execute_cql("CREATE TABLE t (k int PRIMARY KEY)").get();
|
||||
auto with_cache = run_and_examine_cache_stat_change(e, &cache_tracker::stats::reads, [] (cql_test_env& e) {
|
||||
auto with_cache = run_and_examine_cache_read_stats_change(e, "t", [] (cql_test_env& e) {
|
||||
e.execute_cql("SELECT * FROM t").get();
|
||||
});
|
||||
BOOST_REQUIRE(with_cache >= smp::count); // scan may make multiple passes per shard
|
||||
auto without_cache = run_and_examine_cache_stat_change(e, &cache_tracker::stats::reads, [] (cql_test_env& e) {
|
||||
auto without_cache = run_and_examine_cache_read_stats_change(e, "t", [] (cql_test_env& e) {
|
||||
e.execute_cql("SELECT * FROM t BYPASS CACHE").get();
|
||||
});
|
||||
BOOST_REQUIRE_EQUAL(without_cache, 0);
|
||||
@@ -4414,3 +4497,41 @@ SEASTAR_TEST_CASE(test_range_deletions_for_specific_column) {
|
||||
exceptions::invalid_request_exception);
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_alter_table_default_ttl_reset) {
|
||||
return do_with_cql_env([](cql_test_env& e) {
|
||||
return seastar::async([&e] {
|
||||
e.execute_cql("CREATE TABLE tbl (a int, b int, PRIMARY KEY (a)) WITH default_time_to_live=10").get();
|
||||
BOOST_REQUIRE(e.local_db().find_schema("ks", "tbl")->default_time_to_live().count() == 10);
|
||||
e.execute_cql("ALTER TABLE tbl WITH gc_grace_seconds=0").get();
|
||||
BOOST_REQUIRE(e.local_db().find_schema("ks", "tbl")->default_time_to_live().count() == 10);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_impossible_where) {
|
||||
return do_with_cql_env_thread([] (cql_test_env& e) {
|
||||
cquery_nofail(e, "CREATE TABLE t(p int PRIMARY KEY, r int)");
|
||||
cquery_nofail(e, "INSERT INTO t(p,r) VALUES (0, 0)");
|
||||
cquery_nofail(e, "INSERT INTO t(p,r) VALUES (1, 10)");
|
||||
cquery_nofail(e, "INSERT INTO t(p,r) VALUES (2, 20)");
|
||||
require_rows(e, "SELECT * FROM t WHERE r>10 AND r<10 ALLOW FILTERING", {});
|
||||
require_rows(e, "SELECT * FROM t WHERE r>=10 AND r<=0 ALLOW FILTERING", {});
|
||||
|
||||
cquery_nofail(e, "CREATE TABLE t2(p int, c int, PRIMARY KEY(p, c)) WITH CLUSTERING ORDER BY (c DESC)");
|
||||
cquery_nofail(e, "INSERT INTO t2(p,c) VALUES (0, 0)");
|
||||
cquery_nofail(e, "INSERT INTO t2(p,c) VALUES (1, 10)");
|
||||
cquery_nofail(e, "INSERT INTO t2(p,c) VALUES (2, 20)");
|
||||
require_rows(e, "SELECT * FROM t2 WHERE c>10 AND c<10 ALLOW FILTERING", {});
|
||||
require_rows(e, "SELECT * FROM t2 WHERE c>=10 AND c<=0 ALLOW FILTERING", {});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_counter_column_added_into_non_counter_table) {
|
||||
return do_with_cql_env_thread([] (cql_test_env& e) {
|
||||
cquery_nofail(e, "CREATE TABLE t (pk int, ck int, PRIMARY KEY(pk, ck))");
|
||||
|
||||
BOOST_REQUIRE_THROW(e.execute_cql("ALTER TABLE t ADD \"c\" counter;").get(),
|
||||
exceptions::configuration_exception);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1134,6 +1134,9 @@ SEASTAR_TEST_CASE(test_filtering) {
|
||||
{ int32_type->decompose(8), int32_type->decompose(3) },
|
||||
{ int32_type->decompose(9), int32_type->decompose(3) },
|
||||
});
|
||||
require_rows(e, "SELECT k FROM cf WHERE k=12 AND (m,n)>=(4,0) ALLOW FILTERING;", {
|
||||
{ int32_type->decompose(12), int32_type->decompose(4), int32_type->decompose(5)},
|
||||
});
|
||||
}
|
||||
|
||||
// test filtering on clustering keys
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
#include "test/lib/make_random_string.hh"
|
||||
#include "test/lib/dummy_partitioner.hh"
|
||||
#include "test/lib/reader_lifecycle_policy.hh"
|
||||
#include "test/lib/random_utils.hh"
|
||||
|
||||
#include "dht/sharder.hh"
|
||||
#include "mutation_reader.hh"
|
||||
@@ -2737,3 +2738,597 @@ SEASTAR_THREAD_TEST_CASE(test_compacting_reader_next_partition) {
|
||||
}
|
||||
reader_assertions.produces_end_of_stream();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_auto_paused_evictable_reader_is_mutation_source) {
|
||||
auto make_populate = [] (schema_ptr s, const std::vector<mutation>& mutations, gc_clock::time_point query_time) {
|
||||
auto mt = make_lw_shared<memtable>(s);
|
||||
for (auto& mut : mutations) {
|
||||
mt->apply(mut);
|
||||
}
|
||||
auto sem = make_lw_shared<reader_concurrency_semaphore>(reader_concurrency_semaphore::no_limits());
|
||||
return mutation_source([=] (
|
||||
schema_ptr s,
|
||||
reader_permit permit,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
streamed_mutation::forwarding fwd_sm,
|
||||
mutation_reader::forwarding fwd_mr) mutable {
|
||||
auto mr = make_auto_paused_evictable_reader(mt->as_data_source(), std::move(s), *sem, range, slice, pc, std::move(trace_state), fwd_mr);
|
||||
if (fwd_sm == streamed_mutation::forwarding::yes) {
|
||||
return make_forwardable(std::move(mr));
|
||||
}
|
||||
return mr;
|
||||
});
|
||||
};
|
||||
|
||||
run_mutation_source_tests(make_populate);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_manual_paused_evictable_reader_is_mutation_source) {
|
||||
class maybe_pausing_reader : public flat_mutation_reader::impl {
|
||||
flat_mutation_reader _reader;
|
||||
std::optional<evictable_reader_handle> _handle;
|
||||
|
||||
private:
|
||||
void maybe_pause() {
|
||||
if (!tests::random::get_int(0, 4)) {
|
||||
_handle->pause();
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
maybe_pausing_reader(
|
||||
memtable& mt,
|
||||
reader_concurrency_semaphore& semaphore,
|
||||
const dht::partition_range& pr,
|
||||
const query::partition_slice& ps,
|
||||
const io_priority_class& pc,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
mutation_reader::forwarding fwd_mr)
|
||||
: impl(mt.schema()), _reader(nullptr) {
|
||||
std::tie(_reader, _handle) = make_manually_paused_evictable_reader(mt.as_data_source(), mt.schema(), semaphore, pr, ps, pc,
|
||||
std::move(trace_state), fwd_mr);
|
||||
}
|
||||
virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override {
|
||||
return _reader.fill_buffer(timeout).then([this] {
|
||||
_end_of_stream = _reader.is_end_of_stream();
|
||||
_reader.move_buffer_content_to(*this);
|
||||
}).then([this] {
|
||||
maybe_pause();
|
||||
});
|
||||
}
|
||||
virtual void next_partition() override {
|
||||
clear_buffer_to_next_partition();
|
||||
if (!is_buffer_empty()) {
|
||||
return;
|
||||
}
|
||||
_end_of_stream = false;
|
||||
_reader.next_partition();
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) override {
|
||||
clear_buffer();
|
||||
_end_of_stream = false;
|
||||
return _reader.fast_forward_to(pr, timeout).then([this] {
|
||||
maybe_pause();
|
||||
});
|
||||
}
|
||||
virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override {
|
||||
throw_with_backtrace<std::bad_function_call>();
|
||||
}
|
||||
virtual size_t buffer_size() const override {
|
||||
return flat_mutation_reader::impl::buffer_size() + _reader.buffer_size();
|
||||
}
|
||||
};
|
||||
|
||||
auto make_populate = [this] (schema_ptr s, const std::vector<mutation>& mutations, gc_clock::time_point query_time) {
|
||||
auto mt = make_lw_shared<memtable>(s);
|
||||
for (auto& mut : mutations) {
|
||||
mt->apply(mut);
|
||||
}
|
||||
auto sem = make_lw_shared<reader_concurrency_semaphore>(reader_concurrency_semaphore::no_limits());
|
||||
return mutation_source([=] (
|
||||
schema_ptr s,
|
||||
reader_permit permit,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
streamed_mutation::forwarding fwd_sm,
|
||||
mutation_reader::forwarding fwd_mr) mutable {
|
||||
auto mr = make_flat_mutation_reader<maybe_pausing_reader>(*mt, *sem, range, slice, pc, std::move(trace_state), fwd_mr);
|
||||
if (fwd_sm == streamed_mutation::forwarding::yes) {
|
||||
return make_forwardable(std::move(mr));
|
||||
}
|
||||
return mr;
|
||||
});
|
||||
};
|
||||
|
||||
run_mutation_source_tests(make_populate);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
std::deque<mutation_fragment> copy_fragments(const schema& s, const std::deque<mutation_fragment>& o) {
|
||||
std::deque<mutation_fragment> buf;
|
||||
for (const auto& mf : o) {
|
||||
buf.emplace_back(s, mf);
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
flat_mutation_reader create_evictable_reader_and_evict_after_first_buffer(
|
||||
schema_ptr schema,
|
||||
reader_concurrency_semaphore& semaphore,
|
||||
const dht::partition_range& prange,
|
||||
const query::partition_slice& slice,
|
||||
std::deque<mutation_fragment> first_buffer,
|
||||
position_in_partition_view last_fragment_position,
|
||||
std::deque<mutation_fragment> second_buffer,
|
||||
size_t max_buffer_size) {
|
||||
class factory {
|
||||
schema_ptr _schema;
|
||||
std::optional<std::deque<mutation_fragment>> _first_buffer;
|
||||
std::optional<std::deque<mutation_fragment>> _second_buffer;
|
||||
size_t _max_buffer_size;
|
||||
|
||||
private:
|
||||
std::optional<std::deque<mutation_fragment>> copy_buffer(const std::optional<std::deque<mutation_fragment>>& o) {
|
||||
if (!o) {
|
||||
return {};
|
||||
}
|
||||
return copy_fragments(*_schema, *o);
|
||||
}
|
||||
|
||||
public:
|
||||
factory(schema_ptr schema, std::deque<mutation_fragment> first_buffer, std::deque<mutation_fragment> second_buffer, size_t max_buffer_size)
|
||||
: _schema(std::move(schema)), _first_buffer(std::move(first_buffer)), _second_buffer(std::move(second_buffer)), _max_buffer_size(max_buffer_size) {
|
||||
}
|
||||
|
||||
factory(const factory& o)
|
||||
: _schema(o._schema)
|
||||
, _first_buffer(copy_buffer(o._first_buffer))
|
||||
, _second_buffer(copy_buffer(o._second_buffer)) {
|
||||
}
|
||||
factory(factory&& o) = default;
|
||||
|
||||
flat_mutation_reader operator()(
|
||||
schema_ptr s,
|
||||
reader_permit permit,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
streamed_mutation::forwarding fwd_sm,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
BOOST_REQUIRE(s == _schema);
|
||||
if (_first_buffer) {
|
||||
auto buf = *std::exchange(_first_buffer, {});
|
||||
auto rd = make_flat_mutation_reader_from_fragments(_schema, std::move(buf));
|
||||
rd.set_max_buffer_size(_max_buffer_size);
|
||||
return rd;
|
||||
}
|
||||
if (_second_buffer) {
|
||||
auto buf = *std::exchange(_second_buffer, {});
|
||||
auto rd = make_flat_mutation_reader_from_fragments(_schema, std::move(buf));
|
||||
rd.set_max_buffer_size(_max_buffer_size);
|
||||
return rd;
|
||||
}
|
||||
return make_empty_flat_reader(_schema);
|
||||
}
|
||||
};
|
||||
auto ms = mutation_source(factory(schema, std::move(first_buffer), std::move(second_buffer), max_buffer_size));
|
||||
|
||||
auto [rd, handle] = make_manually_paused_evictable_reader(
|
||||
std::move(ms),
|
||||
schema,
|
||||
semaphore,
|
||||
prange,
|
||||
slice,
|
||||
seastar::default_priority_class(),
|
||||
nullptr,
|
||||
mutation_reader::forwarding::yes);
|
||||
|
||||
rd.set_max_buffer_size(max_buffer_size);
|
||||
|
||||
rd.fill_buffer(db::no_timeout).get0();
|
||||
|
||||
const auto eq_cmp = position_in_partition::equal_compare(*schema);
|
||||
BOOST_REQUIRE(rd.is_buffer_full());
|
||||
BOOST_REQUIRE(eq_cmp(rd.buffer().back().position(), last_fragment_position));
|
||||
BOOST_REQUIRE(!rd.is_end_of_stream());
|
||||
|
||||
rd.detach_buffer();
|
||||
|
||||
handle.pause();
|
||||
|
||||
while(semaphore.try_evict_one_inactive_read());
|
||||
|
||||
return std::move(rd);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_evictable_reader_trim_range_tombstones) {
|
||||
reader_concurrency_semaphore semaphore(reader_concurrency_semaphore::no_limits{});
|
||||
simple_schema s;
|
||||
|
||||
const auto pkey = s.make_pkey();
|
||||
size_t max_buffer_size = 512;
|
||||
const int first_ck = 100;
|
||||
const int second_buffer_ck = first_ck + 100;
|
||||
|
||||
size_t mem_usage = 0;
|
||||
|
||||
std::deque<mutation_fragment> first_buffer;
|
||||
first_buffer.emplace_back(partition_start{pkey, {}});
|
||||
mem_usage = first_buffer.back().memory_usage(*s.schema());
|
||||
for (int i = 0; i < second_buffer_ck; ++i) {
|
||||
first_buffer.emplace_back(s.make_row(s.make_ckey(i++), "v"));
|
||||
mem_usage += first_buffer.back().memory_usage(*s.schema());
|
||||
}
|
||||
const auto last_fragment_position = position_in_partition(first_buffer.back().position());
|
||||
max_buffer_size = mem_usage;
|
||||
first_buffer.emplace_back(s.make_row(s.make_ckey(second_buffer_ck), "v"));
|
||||
|
||||
std::deque<mutation_fragment> second_buffer;
|
||||
second_buffer.emplace_back(partition_start{pkey, {}});
|
||||
mem_usage = second_buffer.back().memory_usage(*s.schema());
|
||||
second_buffer.emplace_back(s.make_range_tombstone(query::clustering_range::make_ending_with(s.make_ckey(second_buffer_ck + 10))));
|
||||
int ckey = second_buffer_ck;
|
||||
while (mem_usage <= max_buffer_size) {
|
||||
second_buffer.emplace_back(s.make_row(s.make_ckey(ckey++), "v"));
|
||||
mem_usage += second_buffer.back().memory_usage(*s.schema());
|
||||
}
|
||||
second_buffer.emplace_back(partition_end{});
|
||||
|
||||
auto rd = create_evictable_reader_and_evict_after_first_buffer(s.schema(), semaphore, query::full_partition_range,
|
||||
s.schema()->full_slice(), std::move(first_buffer), last_fragment_position, std::move(second_buffer), max_buffer_size);
|
||||
|
||||
rd.fill_buffer(db::no_timeout).get();
|
||||
|
||||
const auto tri_cmp = position_in_partition::tri_compare(*s.schema());
|
||||
|
||||
BOOST_REQUIRE(tri_cmp(last_fragment_position, rd.peek_buffer().position()) < 0);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
void check_evictable_reader_validation_is_triggered(
|
||||
std::string_view test_name,
|
||||
std::string_view error_prefix, // empty str if no exception is expected
|
||||
schema_ptr schema,
|
||||
reader_concurrency_semaphore& semaphore,
|
||||
const dht::partition_range& prange,
|
||||
const query::partition_slice& slice,
|
||||
std::deque<mutation_fragment> first_buffer,
|
||||
position_in_partition_view last_fragment_position,
|
||||
std::deque<mutation_fragment> second_buffer,
|
||||
size_t max_buffer_size) {
|
||||
|
||||
testlog.info("check_evictable_reader_validation_is_triggered(): checking {} test case: {}", error_prefix.empty() ? "positive" : "negative", test_name);
|
||||
|
||||
auto rd = create_evictable_reader_and_evict_after_first_buffer(std::move(schema), semaphore, prange, slice, std::move(first_buffer),
|
||||
last_fragment_position, std::move(second_buffer), max_buffer_size);
|
||||
|
||||
const bool fail = !error_prefix.empty();
|
||||
|
||||
try {
|
||||
rd.fill_buffer(db::no_timeout).get0();
|
||||
} catch (std::runtime_error& e) {
|
||||
if (fail) {
|
||||
if (error_prefix == std::string_view(e.what(), error_prefix.size())) {
|
||||
testlog.trace("Expected exception caught: {}", std::current_exception());
|
||||
return;
|
||||
} else {
|
||||
BOOST_FAIL(fmt::format("Exception with unexpected message caught: {}", std::current_exception()));
|
||||
}
|
||||
} else {
|
||||
BOOST_FAIL(fmt::format("Unexpected exception caught: {}", std::current_exception()));
|
||||
}
|
||||
}
|
||||
if (fail) {
|
||||
BOOST_FAIL(fmt::format("Expected exception not thrown"));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_evictable_reader_self_validation) {
|
||||
set_abort_on_internal_error(false);
|
||||
auto reset_on_internal_abort = defer([] {
|
||||
set_abort_on_internal_error(true);
|
||||
});
|
||||
|
||||
reader_concurrency_semaphore semaphore(reader_concurrency_semaphore::no_limits{});
|
||||
simple_schema s;
|
||||
|
||||
auto pkeys = s.make_pkeys(4);
|
||||
boost::sort(pkeys, dht::decorated_key::less_comparator(s.schema()));
|
||||
|
||||
size_t max_buffer_size = 512;
|
||||
const int first_ck = 100;
|
||||
const int second_buffer_ck = first_ck + 100;
|
||||
const int last_ck = second_buffer_ck + 100;
|
||||
|
||||
static const char partition_error_prefix[] = "maybe_validate_partition_start(): validation failed";
|
||||
static const char position_in_partition_error_prefix[] = "validate_position_in_partition(): validation failed";
|
||||
static const char trim_range_tombstones_error_prefix[] = "maybe_trim_range_tombstone(): validation failed";
|
||||
|
||||
const auto prange = dht::partition_range::make(
|
||||
dht::partition_range::bound(pkeys[1], true),
|
||||
dht::partition_range::bound(pkeys[2], true));
|
||||
|
||||
const auto ckrange = query::clustering_range::make(
|
||||
query::clustering_range::bound(s.make_ckey(first_ck), true),
|
||||
query::clustering_range::bound(s.make_ckey(last_ck), true));
|
||||
|
||||
const auto slice = partition_slice_builder(*s.schema()).with_range(ckrange).build();
|
||||
|
||||
std::deque<mutation_fragment> first_buffer;
|
||||
first_buffer.emplace_back(partition_start{pkeys[1], {}});
|
||||
size_t mem_usage = first_buffer.back().memory_usage(*s.schema());
|
||||
for (int i = 0; i < second_buffer_ck; ++i) {
|
||||
first_buffer.emplace_back(s.make_row(s.make_ckey(i++), "v"));
|
||||
mem_usage += first_buffer.back().memory_usage(*s.schema());
|
||||
}
|
||||
max_buffer_size = mem_usage;
|
||||
auto last_fragment_position = position_in_partition(first_buffer.back().position());
|
||||
first_buffer.emplace_back(s.make_row(s.make_ckey(second_buffer_ck), "v"));
|
||||
|
||||
auto make_second_buffer = [&s, &max_buffer_size, second_buffer_ck] (dht::decorated_key pkey, std::optional<int> first_ckey = {},
|
||||
bool inject_range_tombstone = false) mutable {
|
||||
auto ckey = first_ckey ? *first_ckey : second_buffer_ck;
|
||||
std::deque<mutation_fragment> second_buffer;
|
||||
second_buffer.emplace_back(partition_start{std::move(pkey), {}});
|
||||
size_t mem_usage = second_buffer.back().memory_usage(*s.schema());
|
||||
if (inject_range_tombstone) {
|
||||
second_buffer.emplace_back(s.make_range_tombstone(query::clustering_range::make_ending_with(s.make_ckey(last_ck))));
|
||||
}
|
||||
while (mem_usage <= max_buffer_size) {
|
||||
second_buffer.emplace_back(s.make_row(s.make_ckey(ckey++), "v"));
|
||||
mem_usage += second_buffer.back().memory_usage(*s.schema());
|
||||
}
|
||||
second_buffer.emplace_back(partition_end{});
|
||||
return second_buffer;
|
||||
};
|
||||
|
||||
//
|
||||
// Continuing the same partition
|
||||
//
|
||||
|
||||
check_evictable_reader_validation_is_triggered(
|
||||
"pkey < _last_pkey; pkey ∉ prange",
|
||||
partition_error_prefix,
|
||||
s.schema(),
|
||||
semaphore,
|
||||
prange,
|
||||
slice,
|
||||
copy_fragments(*s.schema(), first_buffer),
|
||||
last_fragment_position,
|
||||
make_second_buffer(pkeys[0]),
|
||||
max_buffer_size);
|
||||
|
||||
check_evictable_reader_validation_is_triggered(
|
||||
"pkey == _last_pkey",
|
||||
"",
|
||||
s.schema(),
|
||||
semaphore,
|
||||
prange,
|
||||
slice,
|
||||
copy_fragments(*s.schema(), first_buffer),
|
||||
last_fragment_position,
|
||||
make_second_buffer(pkeys[1]),
|
||||
max_buffer_size);
|
||||
|
||||
check_evictable_reader_validation_is_triggered(
|
||||
"pkey == _last_pkey; position_in_partition ∉ ckrange (<)",
|
||||
position_in_partition_error_prefix,
|
||||
s.schema(),
|
||||
semaphore,
|
||||
prange,
|
||||
slice,
|
||||
copy_fragments(*s.schema(), first_buffer),
|
||||
last_fragment_position,
|
||||
make_second_buffer(pkeys[1], first_ck - 10),
|
||||
max_buffer_size);
|
||||
|
||||
check_evictable_reader_validation_is_triggered(
|
||||
"pkey == _last_pkey; position_in_partition ∉ ckrange (<); start with trimmable range-tombstone",
|
||||
position_in_partition_error_prefix,
|
||||
s.schema(),
|
||||
semaphore,
|
||||
prange,
|
||||
slice,
|
||||
copy_fragments(*s.schema(), first_buffer),
|
||||
last_fragment_position,
|
||||
make_second_buffer(pkeys[1], first_ck - 10, true),
|
||||
max_buffer_size);
|
||||
|
||||
check_evictable_reader_validation_is_triggered(
|
||||
"pkey == _last_pkey; position_in_partition ∉ ckrange; position_in_partition < _next_position_in_partition",
|
||||
position_in_partition_error_prefix,
|
||||
s.schema(),
|
||||
semaphore,
|
||||
prange,
|
||||
slice,
|
||||
copy_fragments(*s.schema(), first_buffer),
|
||||
last_fragment_position,
|
||||
make_second_buffer(pkeys[1], second_buffer_ck - 2),
|
||||
max_buffer_size);
|
||||
|
||||
check_evictable_reader_validation_is_triggered(
|
||||
"pkey == _last_pkey; position_in_partition ∉ ckrange; position_in_partition < _next_position_in_partition; start with trimmable range-tombstone",
|
||||
position_in_partition_error_prefix,
|
||||
s.schema(),
|
||||
semaphore,
|
||||
prange,
|
||||
slice,
|
||||
copy_fragments(*s.schema(), first_buffer),
|
||||
last_fragment_position,
|
||||
make_second_buffer(pkeys[1], second_buffer_ck - 2, true),
|
||||
max_buffer_size);
|
||||
|
||||
{
|
||||
auto second_buffer = make_second_buffer(pkeys[1], second_buffer_ck);
|
||||
second_buffer[1] = s.make_range_tombstone(query::clustering_range::make_ending_with(s.make_ckey(second_buffer_ck - 10)));
|
||||
check_evictable_reader_validation_is_triggered(
|
||||
"pkey == _last_pkey; end(range_tombstone) < _next_position_in_partition",
|
||||
trim_range_tombstones_error_prefix,
|
||||
s.schema(),
|
||||
semaphore,
|
||||
prange,
|
||||
slice,
|
||||
copy_fragments(*s.schema(), first_buffer),
|
||||
last_fragment_position,
|
||||
std::move(second_buffer),
|
||||
max_buffer_size);
|
||||
}
|
||||
|
||||
{
|
||||
auto second_buffer = make_second_buffer(pkeys[1], second_buffer_ck);
|
||||
second_buffer[1] = s.make_range_tombstone(query::clustering_range::make_ending_with(s.make_ckey(second_buffer_ck + 10)));
|
||||
check_evictable_reader_validation_is_triggered(
|
||||
"pkey == _last_pkey; end(range_tombstone) > _next_position_in_partition",
|
||||
"",
|
||||
s.schema(),
|
||||
semaphore,
|
||||
prange,
|
||||
slice,
|
||||
copy_fragments(*s.schema(), first_buffer),
|
||||
last_fragment_position,
|
||||
std::move(second_buffer),
|
||||
max_buffer_size);
|
||||
}
|
||||
|
||||
{
|
||||
auto second_buffer = make_second_buffer(pkeys[1], second_buffer_ck);
|
||||
second_buffer[1] = s.make_range_tombstone(query::clustering_range::make_starting_with(s.make_ckey(last_ck + 10)));
|
||||
check_evictable_reader_validation_is_triggered(
|
||||
"pkey == _last_pkey; start(range_tombstone) ∉ ckrange (>)",
|
||||
position_in_partition_error_prefix,
|
||||
s.schema(),
|
||||
semaphore,
|
||||
prange,
|
||||
slice,
|
||||
copy_fragments(*s.schema(), first_buffer),
|
||||
last_fragment_position,
|
||||
std::move(second_buffer),
|
||||
max_buffer_size);
|
||||
}
|
||||
|
||||
check_evictable_reader_validation_is_triggered(
|
||||
"pkey == _last_pkey; position_in_partition ∈ ckrange",
|
||||
"",
|
||||
s.schema(),
|
||||
semaphore,
|
||||
prange,
|
||||
slice,
|
||||
copy_fragments(*s.schema(), first_buffer),
|
||||
last_fragment_position,
|
||||
make_second_buffer(pkeys[1], second_buffer_ck),
|
||||
max_buffer_size);
|
||||
|
||||
check_evictable_reader_validation_is_triggered(
|
||||
"pkey == _last_pkey; position_in_partition ∉ ckrange (>)",
|
||||
position_in_partition_error_prefix,
|
||||
s.schema(),
|
||||
semaphore,
|
||||
prange,
|
||||
slice,
|
||||
copy_fragments(*s.schema(), first_buffer),
|
||||
last_fragment_position,
|
||||
make_second_buffer(pkeys[1], last_ck + 10),
|
||||
max_buffer_size);
|
||||
|
||||
check_evictable_reader_validation_is_triggered(
|
||||
"pkey > _last_pkey; pkey ∈ pkrange",
|
||||
partition_error_prefix,
|
||||
s.schema(),
|
||||
semaphore,
|
||||
prange,
|
||||
slice,
|
||||
copy_fragments(*s.schema(), first_buffer),
|
||||
last_fragment_position,
|
||||
make_second_buffer(pkeys[2]),
|
||||
max_buffer_size);
|
||||
|
||||
check_evictable_reader_validation_is_triggered(
|
||||
"pkey > _last_pkey; pkey ∉ pkrange",
|
||||
partition_error_prefix,
|
||||
s.schema(),
|
||||
semaphore,
|
||||
prange,
|
||||
slice,
|
||||
copy_fragments(*s.schema(), first_buffer),
|
||||
last_fragment_position,
|
||||
make_second_buffer(pkeys[3]),
|
||||
max_buffer_size);
|
||||
|
||||
//
|
||||
// Continuing from next partition
|
||||
//
|
||||
|
||||
first_buffer.clear();
|
||||
|
||||
first_buffer.emplace_back(partition_start{pkeys[1], {}});
|
||||
mem_usage = first_buffer.back().memory_usage(*s.schema());
|
||||
for (int i = 0; i < second_buffer_ck; ++i) {
|
||||
first_buffer.emplace_back(s.make_row(s.make_ckey(i++), "v"));
|
||||
mem_usage += first_buffer.back().memory_usage(*s.schema());
|
||||
}
|
||||
first_buffer.emplace_back(partition_end{});
|
||||
mem_usage += first_buffer.back().memory_usage(*s.schema());
|
||||
last_fragment_position = position_in_partition(first_buffer.back().position());
|
||||
max_buffer_size = mem_usage;
|
||||
first_buffer.emplace_back(partition_start{pkeys[2], {}});
|
||||
|
||||
check_evictable_reader_validation_is_triggered(
|
||||
"pkey < _last_pkey; pkey ∉ pkrange",
|
||||
partition_error_prefix,
|
||||
s.schema(),
|
||||
semaphore,
|
||||
prange,
|
||||
slice,
|
||||
copy_fragments(*s.schema(), first_buffer),
|
||||
last_fragment_position,
|
||||
make_second_buffer(pkeys[0]),
|
||||
max_buffer_size);
|
||||
|
||||
check_evictable_reader_validation_is_triggered(
|
||||
"pkey == _last_pkey",
|
||||
partition_error_prefix,
|
||||
s.schema(),
|
||||
semaphore,
|
||||
prange,
|
||||
slice,
|
||||
copy_fragments(*s.schema(), first_buffer),
|
||||
last_fragment_position,
|
||||
make_second_buffer(pkeys[1]),
|
||||
max_buffer_size);
|
||||
|
||||
check_evictable_reader_validation_is_triggered(
|
||||
"pkey > _last_pkey; pkey ∈ pkrange",
|
||||
"",
|
||||
s.schema(),
|
||||
semaphore,
|
||||
prange,
|
||||
slice,
|
||||
copy_fragments(*s.schema(), first_buffer),
|
||||
last_fragment_position,
|
||||
make_second_buffer(pkeys[2]),
|
||||
max_buffer_size);
|
||||
|
||||
check_evictable_reader_validation_is_triggered(
|
||||
"pkey > _last_pkey; pkey ∉ pkrange",
|
||||
partition_error_prefix,
|
||||
s.schema(),
|
||||
semaphore,
|
||||
prange,
|
||||
slice,
|
||||
copy_fragments(*s.schema(), first_buffer),
|
||||
last_fragment_position,
|
||||
make_second_buffer(pkeys[3]),
|
||||
max_buffer_size);
|
||||
}
|
||||
|
||||
@@ -2999,12 +2999,14 @@ static flat_mutation_reader compacted_sstable_reader(test_env& env, schema_ptr s
|
||||
tmpdir tmp;
|
||||
auto sstables = open_sstables(env, s, format("test/resource/sstables/3.x/uncompressed/{}", table_name), generations);
|
||||
auto new_generation = generations.back() + 1;
|
||||
auto new_sstable = [s, &tmp, &env, new_generation] {
|
||||
|
||||
auto desc = sstables::compaction_descriptor(std::move(sstables));
|
||||
desc.creator = [s, &tmp, &env, new_generation] (shard_id dummy) {
|
||||
return env.make_sstable(s, tmp.path().string(), new_generation,
|
||||
sstables::sstable_version_types::mc, sstable::format_types::big, 4096);
|
||||
};
|
||||
|
||||
sstables::compact_sstables(sstables::compaction_descriptor(std::move(sstables)), *cf, new_sstable, replacer_fn_no_op()).get();
|
||||
desc.replacer = replacer_fn_no_op();
|
||||
sstables::compact_sstables(std::move(desc), *cf).get();
|
||||
|
||||
auto compacted_sst = open_sstable(env, s, tmp.path().string(), new_generation);
|
||||
return compacted_sst->as_mutation_source().make_reader(s, no_reader_permit(), query::full_partition_range, s->full_slice());
|
||||
|
||||
@@ -1145,7 +1145,7 @@ SEASTAR_TEST_CASE(compact) {
|
||||
return env.make_sstable(s, tmpdir_path,
|
||||
(*gen)++, sstables::sstable::version_types::la, sstables::sstable::format_types::big);
|
||||
};
|
||||
return sstables::compact_sstables(sstables::compaction_descriptor(std::move(sstables)), *cf, new_sstable, replacer_fn_no_op()).then([&env, s, generation, cf, cm, tmpdir_path] (auto) {
|
||||
return compact_sstables(sstables::compaction_descriptor(std::move(sstables)), *cf, new_sstable).then([&env, s, generation, cf, cm, tmpdir_path] (auto) {
|
||||
// Verify that the compacted sstable has the right content. We expect to see:
|
||||
// name | age | height
|
||||
// -------+-----+--------
|
||||
@@ -1291,7 +1291,7 @@ static future<std::vector<unsigned long>> compact_sstables(test_env& env, sstrin
|
||||
auto sstables_to_compact = sstables::size_tiered_compaction_strategy::most_interesting_bucket(*sstables, min_threshold, max_threshold);
|
||||
// We do expect that all candidates were selected for compaction (in this case).
|
||||
BOOST_REQUIRE(sstables_to_compact.size() == sstables->size());
|
||||
return sstables::compact_sstables(sstables::compaction_descriptor(std::move(sstables_to_compact)), *cf, new_sstable, replacer_fn_no_op()).then([generation] (auto) {});
|
||||
return compact_sstables(sstables::compaction_descriptor(std::move(sstables_to_compact)), *cf, new_sstable).then([generation] (auto) {});
|
||||
} else if (strategy == compaction_strategy_type::leveled) {
|
||||
for (auto& sst : *sstables) {
|
||||
BOOST_REQUIRE(sst->get_sstable_level() == 0);
|
||||
@@ -1308,8 +1308,8 @@ static future<std::vector<unsigned long>> compact_sstables(test_env& env, sstrin
|
||||
BOOST_REQUIRE(candidate.level == 1);
|
||||
BOOST_REQUIRE(candidate.max_sstable_bytes == 1024*1024);
|
||||
|
||||
return sstables::compact_sstables(sstables::compaction_descriptor(std::move(candidate.sstables), candidate.level, 1024*1024),
|
||||
*cf, new_sstable, replacer_fn_no_op()).then([generation] (auto) {});
|
||||
return compact_sstables(sstables::compaction_descriptor(std::move(candidate.sstables), candidate.level, 1024*1024),
|
||||
*cf, new_sstable).then([generation] (auto) {});
|
||||
} else {
|
||||
throw std::runtime_error("unexpected strategy");
|
||||
}
|
||||
@@ -2234,7 +2234,7 @@ SEASTAR_TEST_CASE(tombstone_purge_test) {
|
||||
for (auto&& sst : all) {
|
||||
column_family_test(cf).add_sstable(sst);
|
||||
}
|
||||
return sstables::compact_sstables(sstables::compaction_descriptor(to_compact), *cf, sst_gen, replacer_fn_no_op()).get0().new_sstables;
|
||||
return compact_sstables(sstables::compaction_descriptor(to_compact), *cf, sst_gen).get0().new_sstables;
|
||||
};
|
||||
|
||||
auto next_timestamp = [] {
|
||||
@@ -2496,7 +2496,7 @@ SEASTAR_TEST_CASE(sstable_rewrite) {
|
||||
std::vector<shared_sstable> sstables;
|
||||
sstables.push_back(std::move(sstp));
|
||||
|
||||
return sstables::compact_sstables(sstables::compaction_descriptor(std::move(sstables)), *cf, creator, replacer_fn_no_op()).then([s, key, new_tables] (auto) {
|
||||
return compact_sstables(sstables::compaction_descriptor(std::move(sstables)), *cf, creator).then([s, key, new_tables] (auto) {
|
||||
BOOST_REQUIRE(new_tables->size() == 1);
|
||||
auto newsst = (*new_tables)[0];
|
||||
BOOST_REQUIRE(newsst->generation() == 52);
|
||||
@@ -2884,8 +2884,7 @@ SEASTAR_TEST_CASE(test_sstable_max_local_deletion_time_2) {
|
||||
BOOST_REQUIRE(now.time_since_epoch().count() == sst2->get_stats_metadata().max_local_deletion_time);
|
||||
|
||||
auto creator = [&env, s, tmpdir_path, version, gen = make_lw_shared<unsigned>(56)] { return env.make_sstable(s, tmpdir_path, (*gen)++, version, big); };
|
||||
auto info = sstables::compact_sstables(sstables::compaction_descriptor({sst1, sst2}), *cf,
|
||||
creator, replacer_fn_no_op()).get0();
|
||||
auto info = compact_sstables(sstables::compaction_descriptor({sst1, sst2}), *cf, creator).get0();
|
||||
BOOST_REQUIRE(info.new_sstables.size() == 1);
|
||||
BOOST_REQUIRE(((now + gc_clock::duration(100)).time_since_epoch().count()) ==
|
||||
info.new_sstables.front()->get_stats_metadata().max_local_deletion_time);
|
||||
@@ -2982,7 +2981,7 @@ SEASTAR_TEST_CASE(compaction_with_fully_expired_table) {
|
||||
expired = get_fully_expired_sstables(*cf, ssts, gc_clock::now());
|
||||
BOOST_REQUIRE(expired.size() == 0);
|
||||
|
||||
auto ret = sstables::compact_sstables(sstables::compaction_descriptor(ssts), *cf, sst_gen, replacer_fn_no_op()).get0();
|
||||
auto ret = compact_sstables(sstables::compaction_descriptor(ssts), *cf, sst_gen).get0();
|
||||
BOOST_REQUIRE(ret.start_size == sst->bytes_on_disk());
|
||||
BOOST_REQUIRE(ret.total_keys_written == 0);
|
||||
BOOST_REQUIRE(ret.new_sstables.empty());
|
||||
@@ -3442,7 +3441,7 @@ SEASTAR_TEST_CASE(min_max_clustering_key_test_2) {
|
||||
check_min_max_column_names(sst2, {"9ck101"}, {"9ck298"});
|
||||
|
||||
auto creator = [&env, s, &tmp, version] { return env.make_sstable(s, tmp.path().string(), 3, version, big); };
|
||||
auto info = sstables::compact_sstables(sstables::compaction_descriptor({sst, sst2}), *cf, creator, replacer_fn_no_op()).get0();
|
||||
auto info = compact_sstables(sstables::compaction_descriptor({sst, sst2}), *cf, creator).get0();
|
||||
BOOST_REQUIRE(info.new_sstables.size() == 1);
|
||||
check_min_max_column_names(info.new_sstables.front(), {"0ck100"}, {"9ck298"});
|
||||
}
|
||||
@@ -4080,7 +4079,7 @@ SEASTAR_TEST_CASE(sstable_expired_data_ratio) {
|
||||
sst->set_unshared();
|
||||
return sst;
|
||||
};
|
||||
auto info = sstables::compact_sstables(sstables::compaction_descriptor({ sst }), *cf, creator, replacer_fn_no_op()).get0();
|
||||
auto info = compact_sstables(sstables::compaction_descriptor({ sst }), *cf, creator).get0();
|
||||
BOOST_REQUIRE(info.new_sstables.size() == 1);
|
||||
BOOST_REQUIRE(info.new_sstables.front()->estimate_droppable_tombstone_ratio(gc_before) == 0.0f);
|
||||
BOOST_REQUIRE_CLOSE(info.new_sstables.front()->data_size(), uncompacted_size*(1-expired), 5);
|
||||
@@ -4351,8 +4350,8 @@ SEASTAR_TEST_CASE(compaction_correctness_with_partitioned_sstable_set) {
|
||||
// NEEDED for partitioned_sstable_set to actually have an effect
|
||||
std::for_each(all.begin(), all.end(), [] (auto& sst) { sst->set_sstable_level(1); });
|
||||
column_family_for_tests cf(s);
|
||||
return sstables::compact_sstables(sstables::compaction_descriptor(std::move(all), 0, 0 /*std::numeric_limits<uint64_t>::max()*/),
|
||||
*cf, sst_gen, replacer_fn_no_op()).get0().new_sstables;
|
||||
return compact_sstables(sstables::compaction_descriptor(std::move(all), 0, 0 /*std::numeric_limits<uint64_t>::max()*/),
|
||||
*cf, sst_gen).get0().new_sstables;
|
||||
};
|
||||
|
||||
auto make_insert = [&] (auto p) {
|
||||
@@ -4591,7 +4590,7 @@ SEASTAR_TEST_CASE(sstable_cleanup_correctness_test) {
|
||||
|
||||
auto descriptor = sstables::compaction_descriptor({std::move(sst)}, compaction_descriptor::default_level,
|
||||
compaction_descriptor::default_max_sstable_bytes, run_identifier, compaction_options::make_cleanup());
|
||||
auto ret = sstables::compact_sstables(std::move(descriptor), *cf, sst_gen, sstables::replacer_fn_no_op()).get0();
|
||||
auto ret = compact_sstables(std::move(descriptor), *cf, sst_gen).get0();
|
||||
|
||||
BOOST_REQUIRE(ret.total_keys_written == total_partitions);
|
||||
BOOST_REQUIRE(ret.new_sstables.size() == 1);
|
||||
@@ -4734,8 +4733,8 @@ SEASTAR_TEST_CASE(sstable_scrub_test) {
|
||||
|
||||
table->add_sstable_and_update_cache(sst).get();
|
||||
|
||||
BOOST_REQUIRE(table->candidates_for_compaction().size() == 1);
|
||||
BOOST_REQUIRE(table->candidates_for_compaction().front() == sst);
|
||||
BOOST_REQUIRE(table->non_staging_sstables().size() == 1);
|
||||
BOOST_REQUIRE(table->non_staging_sstables().front() == sst);
|
||||
|
||||
auto verify_fragments = [&] (sstables::shared_sstable sst, const std::vector<mutation_fragment>& mfs) {
|
||||
auto r = assert_that(sst->as_mutation_source().make_reader(schema));
|
||||
@@ -4756,7 +4755,7 @@ SEASTAR_TEST_CASE(sstable_scrub_test) {
|
||||
// We expect the scrub with skip_corrupted=false to stop on the first invalid fragment.
|
||||
compaction_manager.perform_sstable_scrub(table.get(), false).get();
|
||||
|
||||
BOOST_REQUIRE(table->candidates_for_compaction().size() == 1);
|
||||
BOOST_REQUIRE(table->non_staging_sstables().size() == 1);
|
||||
verify_fragments(sst, corrupt_fragments);
|
||||
|
||||
testlog.info("Scrub with --skip-corrupted=true");
|
||||
@@ -4764,9 +4763,9 @@ SEASTAR_TEST_CASE(sstable_scrub_test) {
|
||||
// We expect the scrub with skip_corrupted=true to get rid of all invalid data.
|
||||
compaction_manager.perform_sstable_scrub(table.get(), true).get();
|
||||
|
||||
BOOST_REQUIRE(table->candidates_for_compaction().size() == 1);
|
||||
BOOST_REQUIRE(table->candidates_for_compaction().front() != sst);
|
||||
verify_fragments(table->candidates_for_compaction().front(), scrubbed_fragments);
|
||||
BOOST_REQUIRE(table->non_staging_sstables().size() == 1);
|
||||
BOOST_REQUIRE(table->non_staging_sstables().front() != sst);
|
||||
verify_fragments(table->non_staging_sstables().front(), scrubbed_fragments);
|
||||
});
|
||||
}, test_cfg);
|
||||
}
|
||||
@@ -4994,7 +4993,7 @@ SEASTAR_TEST_CASE(sstable_run_based_compaction_test) {
|
||||
cf->start();
|
||||
cf->set_compaction_strategy(sstables::compaction_strategy_type::size_tiered);
|
||||
auto compact = [&, s] (std::vector<shared_sstable> all, auto replacer) -> std::vector<shared_sstable> {
|
||||
return sstables::compact_sstables(sstables::compaction_descriptor(std::move(all), 1, 0), *cf, sst_gen, replacer).get0().new_sstables;
|
||||
return compact_sstables(sstables::compaction_descriptor(std::move(all), 1, 0), *cf, sst_gen, replacer).get0().new_sstables;
|
||||
};
|
||||
auto make_insert = [&] (auto p) {
|
||||
auto key = partition_key::from_exploded(*s, {to_bytes(p.first)});
|
||||
@@ -5061,8 +5060,8 @@ SEASTAR_TEST_CASE(sstable_run_based_compaction_test) {
|
||||
auto expected_sst = sstable_run.begin();
|
||||
auto closed_sstables_tracker = sstable_run.begin();
|
||||
auto replacer = [&] (sstables::compaction_completion_desc desc) {
|
||||
auto old_sstables = std::move(desc.input_sstables);
|
||||
auto new_sstables = std::move(desc.output_sstables);
|
||||
auto old_sstables = std::move(desc.old_sstables);
|
||||
auto new_sstables = std::move(desc.new_sstables);
|
||||
BOOST_REQUIRE(expected_sst != sstable_run.end());
|
||||
if (incremental_enabled) {
|
||||
do_incremental_replace(std::move(old_sstables), std::move(new_sstables), expected_sst, closed_sstables_tracker);
|
||||
@@ -5218,7 +5217,7 @@ SEASTAR_TEST_CASE(backlog_tracker_correctness_after_stop_tracking_compaction) {
|
||||
// Start compaction, then stop tracking compaction, switch to TWCS, wait for compaction to finish and check for backlog.
|
||||
// That's done to assert backlog will work for compaction that is finished and was stopped tracking.
|
||||
|
||||
auto fut = sstables::compact_sstables(sstables::compaction_descriptor(ssts), *cf, sst_gen, replacer_fn_no_op());
|
||||
auto fut = compact_sstables(sstables::compaction_descriptor(ssts), *cf, sst_gen);
|
||||
|
||||
bool stopped_tracking = false;
|
||||
for (auto& info : cf._data->cm.get_compactions()) {
|
||||
@@ -5537,9 +5536,10 @@ SEASTAR_TEST_CASE(incremental_compaction_data_resurrection_test) {
|
||||
return m;
|
||||
};
|
||||
|
||||
auto deletion_time = gc_clock::now();
|
||||
auto make_delete = [&] (partition_key key) {
|
||||
mutation m(s, key);
|
||||
tombstone tomb(next_timestamp(), gc_clock::now());
|
||||
tombstone tomb(next_timestamp(), deletion_time);
|
||||
m.partition().apply(tomb);
|
||||
return m;
|
||||
};
|
||||
@@ -5597,15 +5597,20 @@ SEASTAR_TEST_CASE(incremental_compaction_data_resurrection_test) {
|
||||
BOOST_REQUIRE(is_partition_dead(alpha));
|
||||
|
||||
auto replacer = [&] (sstables::compaction_completion_desc desc) {
|
||||
auto old_sstables = std::move(desc.input_sstables);
|
||||
auto new_sstables = std::move(desc.output_sstables);
|
||||
auto old_sstables = std::move(desc.old_sstables);
|
||||
auto new_sstables = std::move(desc.new_sstables);
|
||||
// expired_sst is exhausted, and new sstable is written with mut 2.
|
||||
BOOST_REQUIRE(old_sstables.size() == 1);
|
||||
BOOST_REQUIRE(old_sstables.front() == expired_sst);
|
||||
BOOST_REQUIRE(new_sstables.size() == 1);
|
||||
assert_that(sstable_reader(new_sstables.front(), s))
|
||||
BOOST_REQUIRE(new_sstables.size() == 2);
|
||||
for (auto& new_sstable : new_sstables) {
|
||||
if (new_sstable->get_max_local_deletion_time() == deletion_time) { // Skipping GC SSTable.
|
||||
continue;
|
||||
}
|
||||
assert_that(sstable_reader(new_sstable, s))
|
||||
.produces(mut2)
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
column_family_test(cf).rebuild_sstable_list(new_sstables, old_sstables);
|
||||
// force compaction failure after sstable containing expired tombstone is removed from set.
|
||||
throw std::runtime_error("forcing compaction failure on early replacement");
|
||||
@@ -5615,7 +5620,7 @@ SEASTAR_TEST_CASE(incremental_compaction_data_resurrection_test) {
|
||||
try {
|
||||
// The goal is to have one sstable generated for each mutation to trigger the issue.
|
||||
auto max_sstable_size = 0;
|
||||
auto result = sstables::compact_sstables(sstables::compaction_descriptor(sstables, 0, max_sstable_size), *cf, sst_gen, replacer).get0().new_sstables;
|
||||
auto result = compact_sstables(sstables::compaction_descriptor(sstables, 0, max_sstable_size), *cf, sst_gen, replacer).get0().new_sstables;
|
||||
BOOST_REQUIRE_EQUAL(2, result.size());
|
||||
} catch (...) {
|
||||
// swallow exception
|
||||
@@ -5626,3 +5631,48 @@ SEASTAR_TEST_CASE(incremental_compaction_data_resurrection_test) {
|
||||
BOOST_REQUIRE(is_partition_dead(alpha));
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(sstable_needs_cleanup_test) {
|
||||
test_env env;
|
||||
auto s = make_lw_shared(schema({}, some_keyspace, some_column_family,
|
||||
{{"p1", utf8_type}}, {}, {}, {}, utf8_type));
|
||||
|
||||
auto tokens = token_generation_for_current_shard(10);
|
||||
|
||||
auto sst_gen = [&env, s, gen = make_lw_shared<unsigned>(1)] (sstring first, sstring last) mutable {
|
||||
return sstable_for_overlapping_test(env, s, (*gen)++, first, last);
|
||||
};
|
||||
auto token = [&] (size_t index) -> dht::token {
|
||||
return tokens[index].second;
|
||||
};
|
||||
auto key_from_token = [&] (size_t index) -> sstring {
|
||||
return tokens[index].first;
|
||||
};
|
||||
auto token_range = [&] (size_t first, size_t last) -> dht::token_range {
|
||||
return dht::token_range::make(token(first), token(last));
|
||||
};
|
||||
|
||||
{
|
||||
auto local_ranges = { token_range(0, 9) };
|
||||
auto sst = sst_gen(key_from_token(0), key_from_token(9));
|
||||
BOOST_REQUIRE(!needs_cleanup(sst, local_ranges, s));
|
||||
}
|
||||
|
||||
{
|
||||
auto local_ranges = { token_range(0, 1), token_range(3, 4), token_range(5, 6) };
|
||||
|
||||
auto sst = sst_gen(key_from_token(0), key_from_token(1));
|
||||
BOOST_REQUIRE(!needs_cleanup(sst, local_ranges, s));
|
||||
|
||||
auto sst2 = sst_gen(key_from_token(2), key_from_token(2));
|
||||
BOOST_REQUIRE(needs_cleanup(sst2, local_ranges, s));
|
||||
|
||||
auto sst3 = sst_gen(key_from_token(0), key_from_token(6));
|
||||
BOOST_REQUIRE(needs_cleanup(sst3, local_ranges, s));
|
||||
|
||||
auto sst5 = sst_gen(key_from_token(7), key_from_token(7));
|
||||
BOOST_REQUIRE(needs_cleanup(sst5, local_ranges, s));
|
||||
}
|
||||
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
55
test/boost/stall_free_test.cc
Normal file
55
test/boost/stall_free_test.cc
Normal file
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (C) 2020 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <seastar/testing/thread_test_case.hh>
|
||||
#include "utils/stall_free.hh"
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_merge1) {
|
||||
std::list<int> l1{1, 2, 5, 8};
|
||||
std::list<int> l2{3};
|
||||
std::list<int> expected{1,2,3,5,8};
|
||||
utils::merge_to_gently(l1, l2, std::less<int>());
|
||||
BOOST_CHECK(l1 == expected);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_merge2) {
|
||||
std::list<int> l1{1};
|
||||
std::list<int> l2{3, 5, 6};
|
||||
std::list<int> expected{1,3,5,6};
|
||||
utils::merge_to_gently(l1, l2, std::less<int>());
|
||||
BOOST_CHECK(l1 == expected);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_merge3) {
|
||||
std::list<int> l1{};
|
||||
std::list<int> l2{3, 5, 6};
|
||||
std::list<int> expected{3,5,6};
|
||||
utils::merge_to_gently(l1, l2, std::less<int>());
|
||||
BOOST_CHECK(l1 == expected);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_merge4) {
|
||||
std::list<int> l1{1};
|
||||
std::list<int> l2{};
|
||||
std::list<int> expected{1};
|
||||
utils::merge_to_gently(l1, l2, std::less<int>());
|
||||
BOOST_CHECK(l1 == expected);
|
||||
}
|
||||
@@ -118,6 +118,25 @@ SEASTAR_TEST_CASE(test_access_and_schema) {
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_column_dropped_from_base) {
|
||||
return do_with_cql_env_thread([] (auto& e) {
|
||||
e.execute_cql("create table cf (p int, c ascii, a int, v int, primary key (p, c));").get();
|
||||
e.execute_cql("create materialized view vcf as select p, c, v from cf "
|
||||
"where v is not null and p is not null and c is not null "
|
||||
"primary key (v, p, c)").get();
|
||||
e.execute_cql("alter table cf drop a;").get();
|
||||
e.execute_cql("insert into cf (p, c, v) values (0, 'foo', 1);").get();
|
||||
eventually([&] {
|
||||
auto msg = e.execute_cql("select v from vcf").get0();
|
||||
assert_that(msg).is_rows()
|
||||
.with_size(1)
|
||||
.with_row({
|
||||
{int32_type->decompose(1)}
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_updates) {
|
||||
return do_with_cql_env_thread([] (auto& e) {
|
||||
e.execute_cql("create table base (k int, v int, primary key (k));").get();
|
||||
|
||||
29
test/cql/counters_test.cql
Normal file
29
test/cql/counters_test.cql
Normal file
@@ -0,0 +1,29 @@
|
||||
CREATE TABLE ks.tbl_cnt (pk int PRIMARY KEY, c1 counter, c2 counter);
|
||||
|
||||
-- insert some values in one column
|
||||
UPDATE ks.tbl_cnt SET c1 = c1+1 WHERE pk = 1;
|
||||
UPDATE ks.tbl_cnt SET c1 = c1+2 WHERE pk = 2;
|
||||
UPDATE ks.tbl_cnt SET c1 = c1+3 WHERE pk = 3;
|
||||
UPDATE ks.tbl_cnt SET c1 = c1+4 WHERE pk = 4;
|
||||
|
||||
-- test various filtering options on counter column
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 < 3 ALLOW FILTERING;
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 < 1 ALLOW FILTERING;
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 <= 3 ALLOW FILTERING;
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 > 2 AND pk = 4 ALLOW FILTERING;
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 >= 3 and pk = 3 ALLOW FILTERING;
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 > 4 ALLOW FILTERING;
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 in (-1, 2, 3) ALLOW FILTERING;
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 = 0 ALLOW FILTERING;
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 = 1 ALLOW FILTERING;
|
||||
|
||||
-- now filter through untouched counters `c2` - they should appear as NULLs and evaluate as zeros
|
||||
SELECT pk, c1, c2 FROM ks.tbl_cnt WHERE c2 = 0 ALLOW FILTERING;
|
||||
SELECT pk, c2 FROM ks.tbl_cnt WHERE c2 < 0 ALLOW FILTERING;
|
||||
SELECT pk, c2 FROM ks.tbl_cnt WHERE c2 > 0 ALLOW FILTERING;
|
||||
|
||||
-- delete `c1` and make sure it doesn't appear in filtering results
|
||||
DELETE c1 from ks.tbl_cnt WHERE pk = 1;
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 = 1 ALLOW FILTERING;
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 <= 1000 ALLOW FILTERING;
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 > -1000 ALLOW FILTERING;
|
||||
190
test/cql/counters_test.result
Normal file
190
test/cql/counters_test.result
Normal file
@@ -0,0 +1,190 @@
|
||||
CREATE TABLE ks.tbl_cnt (pk int PRIMARY KEY, c1 counter, c2 counter);
|
||||
{
|
||||
"status" : "ok"
|
||||
}
|
||||
|
||||
-- insert some values in one column
|
||||
UPDATE ks.tbl_cnt SET c1 = c1+1 WHERE pk = 1;
|
||||
{
|
||||
"status" : "ok"
|
||||
}
|
||||
UPDATE ks.tbl_cnt SET c1 = c1+2 WHERE pk = 2;
|
||||
{
|
||||
"status" : "ok"
|
||||
}
|
||||
UPDATE ks.tbl_cnt SET c1 = c1+3 WHERE pk = 3;
|
||||
{
|
||||
"status" : "ok"
|
||||
}
|
||||
UPDATE ks.tbl_cnt SET c1 = c1+4 WHERE pk = 4;
|
||||
{
|
||||
"status" : "ok"
|
||||
}
|
||||
|
||||
-- test various filtering options on counter column
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 < 3 ALLOW FILTERING;
|
||||
{
|
||||
"rows" :
|
||||
[
|
||||
{
|
||||
"c1" : "1",
|
||||
"pk" : "1"
|
||||
},
|
||||
{
|
||||
"c1" : "2",
|
||||
"pk" : "2"
|
||||
}
|
||||
]
|
||||
}
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 < 1 ALLOW FILTERING;
|
||||
{
|
||||
"rows" : null
|
||||
}
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 <= 3 ALLOW FILTERING;
|
||||
{
|
||||
"rows" :
|
||||
[
|
||||
{
|
||||
"c1" : "1",
|
||||
"pk" : "1"
|
||||
},
|
||||
{
|
||||
"c1" : "2",
|
||||
"pk" : "2"
|
||||
},
|
||||
{
|
||||
"c1" : "3",
|
||||
"pk" : "3"
|
||||
}
|
||||
]
|
||||
}
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 > 2 AND pk = 4 ALLOW FILTERING;
|
||||
{
|
||||
"rows" :
|
||||
[
|
||||
{
|
||||
"c1" : "4",
|
||||
"pk" : "4"
|
||||
}
|
||||
]
|
||||
}
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 >= 3 and pk = 3 ALLOW FILTERING;
|
||||
{
|
||||
"rows" :
|
||||
[
|
||||
{
|
||||
"c1" : "3",
|
||||
"pk" : "3"
|
||||
}
|
||||
]
|
||||
}
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 > 4 ALLOW FILTERING;
|
||||
{
|
||||
"rows" : null
|
||||
}
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 in (-1, 2, 3) ALLOW FILTERING;
|
||||
{
|
||||
"rows" :
|
||||
[
|
||||
{
|
||||
"c1" : "2",
|
||||
"pk" : "2"
|
||||
},
|
||||
{
|
||||
"c1" : "3",
|
||||
"pk" : "3"
|
||||
}
|
||||
]
|
||||
}
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 = 0 ALLOW FILTERING;
|
||||
{
|
||||
"rows" : null
|
||||
}
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 = 1 ALLOW FILTERING;
|
||||
{
|
||||
"rows" :
|
||||
[
|
||||
{
|
||||
"c1" : "1",
|
||||
"pk" : "1"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
-- now filter through untouched counters `c2` - they should appear as NULLs and evaluate as zeros
|
||||
SELECT pk, c1, c2 FROM ks.tbl_cnt WHERE c2 = 0 ALLOW FILTERING;
|
||||
{
|
||||
"rows" :
|
||||
[
|
||||
{
|
||||
"c1" : "1",
|
||||
"pk" : "1"
|
||||
},
|
||||
{
|
||||
"c1" : "2",
|
||||
"pk" : "2"
|
||||
},
|
||||
{
|
||||
"c1" : "4",
|
||||
"pk" : "4"
|
||||
},
|
||||
{
|
||||
"c1" : "3",
|
||||
"pk" : "3"
|
||||
}
|
||||
]
|
||||
}
|
||||
SELECT pk, c2 FROM ks.tbl_cnt WHERE c2 < 0 ALLOW FILTERING;
|
||||
{
|
||||
"rows" : null
|
||||
}
|
||||
SELECT pk, c2 FROM ks.tbl_cnt WHERE c2 > 0 ALLOW FILTERING;
|
||||
{
|
||||
"rows" : null
|
||||
}
|
||||
|
||||
-- delete `c1` and make sure it doesn't appear in filtering results
|
||||
DELETE c1 from ks.tbl_cnt WHERE pk = 1;
|
||||
{
|
||||
"status" : "ok"
|
||||
}
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 = 1 ALLOW FILTERING;
|
||||
{
|
||||
"rows" : null
|
||||
}
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 <= 1000 ALLOW FILTERING;
|
||||
{
|
||||
"rows" :
|
||||
[
|
||||
{
|
||||
"c1" : "2",
|
||||
"pk" : "2"
|
||||
},
|
||||
{
|
||||
"c1" : "4",
|
||||
"pk" : "4"
|
||||
},
|
||||
{
|
||||
"c1" : "3",
|
||||
"pk" : "3"
|
||||
}
|
||||
]
|
||||
}
|
||||
SELECT pk, c1 FROM ks.tbl_cnt WHERE c1 > -1000 ALLOW FILTERING;
|
||||
{
|
||||
"rows" :
|
||||
[
|
||||
{
|
||||
"c1" : "2",
|
||||
"pk" : "2"
|
||||
},
|
||||
{
|
||||
"c1" : "4",
|
||||
"pk" : "4"
|
||||
},
|
||||
{
|
||||
"c1" : "3",
|
||||
"pk" : "3"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -121,6 +121,14 @@ token_generation_for_shard(unsigned tokens_to_generate, unsigned shard,
|
||||
return key_and_token_pair;
|
||||
}
|
||||
|
||||
future<compaction_info> compact_sstables(sstables::compaction_descriptor descriptor, column_family& cf, std::function<shared_sstable()> creator, replacer_fn replacer) {
|
||||
descriptor.creator = [creator = std::move(creator)] (shard_id dummy) mutable {
|
||||
return creator();
|
||||
};
|
||||
descriptor.replacer = std::move(replacer);
|
||||
return sstables::compact_sstables(std::move(descriptor), cf);
|
||||
}
|
||||
|
||||
std::vector<std::pair<sstring, dht::token>> token_generation_for_current_shard(unsigned tokens_to_generate) {
|
||||
return token_generation_for_shard(tokens_to_generate, engine().cpu_id());
|
||||
}
|
||||
|
||||
@@ -355,5 +355,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
} // namespace sstables
|
||||
|
||||
future<compaction_info> compact_sstables(sstables::compaction_descriptor descriptor, column_family& cf,
|
||||
std::function<shared_sstable()> creator, replacer_fn replacer = sstables::replacer_fn_no_op());
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user