Compare commits
72 Commits
scylla-4.6
...
next-4.6
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6c0825e2a6 | ||
|
|
db3dd3bdf6 | ||
|
|
4ad24180f5 | ||
|
|
755c7eeb6a | ||
|
|
8914ca8c58 | ||
|
|
e82e4bbed3 | ||
|
|
f9c457778e | ||
|
|
8315a7b164 | ||
|
|
291ca8db60 | ||
|
|
4da5fbaa24 | ||
|
|
fc16664d81 | ||
|
|
80bea5341e | ||
|
|
6ecc772b56 | ||
|
|
0b2e951954 | ||
|
|
f2a738497f | ||
|
|
badf7c816f | ||
|
|
bfb86f2c78 | ||
|
|
18e7a46038 | ||
|
|
cbcfa31e51 | ||
|
|
5ee69ff3a9 | ||
|
|
949103d22a | ||
|
|
549cb60f4c | ||
|
|
37633c5576 | ||
|
|
abd9f43fa7 | ||
|
|
d41d4db5c0 | ||
|
|
c500043a78 | ||
|
|
af4752a526 | ||
|
|
0aa9a8c266 | ||
|
|
85fd6ab377 | ||
|
|
7c79c513d1 | ||
|
|
9a8e73f0c3 | ||
|
|
fac0443200 | ||
|
|
6bcfef2cfa | ||
|
|
d2c67a2429 | ||
|
|
d6c2f228e7 | ||
|
|
a1b1df2074 | ||
|
|
14e13ecbd4 | ||
|
|
b8740bde6e | ||
|
|
1b23f8d038 | ||
|
|
05a228e4c5 | ||
|
|
2ec293ab0e | ||
|
|
b60f14601e | ||
|
|
284dd21ef7 | ||
|
|
8b52f1d6e7 | ||
|
|
157951f756 | ||
|
|
4f643ed4a5 | ||
|
|
b598629b7f | ||
|
|
43f82047b9 | ||
|
|
ec3c07de6e | ||
|
|
82572e8cfe | ||
|
|
2b9ed79c6f | ||
|
|
ab0b6fd372 | ||
|
|
12f1718ef4 | ||
|
|
322dfe8403 | ||
|
|
11f008e8fd | ||
|
|
fd7314a362 | ||
|
|
d27468f078 | ||
|
|
74ef1ee961 | ||
|
|
07549d159c | ||
|
|
189bbcd82d | ||
|
|
70e6921125 | ||
|
|
e314158708 | ||
|
|
46586532c9 | ||
|
|
0114244363 | ||
|
|
f154c8b719 | ||
|
|
8bf149fdd6 | ||
|
|
0265d56173 | ||
|
|
e50452ba43 | ||
|
|
a205f644cb | ||
|
|
f136b5b950 | ||
|
|
69a1325884 | ||
|
|
ab153c9b94 |
@@ -60,7 +60,7 @@ fi
|
||||
|
||||
# Default scylla product/version tags
|
||||
PRODUCT=scylla
|
||||
VERSION=4.6.2
|
||||
VERSION=4.6.11
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
@@ -415,6 +415,11 @@ future<executor::request_return_type> executor::describe_table(client_state& cli
|
||||
rjson::add(table_description, "BillingModeSummary", rjson::empty_object());
|
||||
rjson::add(table_description["BillingModeSummary"], "BillingMode", "PAY_PER_REQUEST");
|
||||
rjson::add(table_description["BillingModeSummary"], "LastUpdateToPayPerRequestDateTime", rjson::value(creation_date_seconds));
|
||||
// In PAY_PER_REQUEST billing mode, provisioned capacity should return 0
|
||||
rjson::add(table_description, "ProvisionedThroughput", rjson::empty_object());
|
||||
rjson::add(table_description["ProvisionedThroughput"], "ReadCapacityUnits", 0);
|
||||
rjson::add(table_description["ProvisionedThroughput"], "WriteCapacityUnits", 0);
|
||||
rjson::add(table_description["ProvisionedThroughput"], "NumberOfDecreasesToday", 0);
|
||||
|
||||
std::unordered_map<std::string,std::string> key_attribute_types;
|
||||
// Add base table's KeySchema and collect types for AttributeDefinitions:
|
||||
@@ -2078,6 +2083,9 @@ static attrs_to_get calculate_attrs_to_get(const rjson::value& req, std::unorder
|
||||
for (auto it = attributes_to_get.Begin(); it != attributes_to_get.End(); ++it) {
|
||||
attribute_path_map_add("AttributesToGet", ret, it->GetString());
|
||||
}
|
||||
if (ret.empty()) {
|
||||
throw api_error::validation("Empty AttributesToGet is not allowed. Consider using Select=COUNT instead.");
|
||||
}
|
||||
return ret;
|
||||
} else if (has_projection_expression) {
|
||||
const rjson::value& projection_expression = req["ProjectionExpression"];
|
||||
|
||||
@@ -94,10 +94,7 @@ future<executor::request_return_type> executor::update_time_to_live(client_state
|
||||
}
|
||||
|
||||
future<executor::request_return_type> executor::describe_time_to_live(client_state& client_state, service_permit permit, rjson::value request) {
|
||||
_stats.api_operations.update_time_to_live++;
|
||||
if (!_proxy.get_db().local().features().cluster_supports_alternator_ttl()) {
|
||||
co_return api_error::unknown_operation("DescribeTimeToLive not yet supported. Experimental support is available if the 'alternator_ttl' experimental feature is enabled on all nodes.");
|
||||
}
|
||||
_stats.api_operations.describe_time_to_live++;
|
||||
schema_ptr schema = get_table(_proxy, request);
|
||||
std::map<sstring, sstring> tags_map = get_tags_of_table(schema);
|
||||
rjson::value desc = rjson::empty_object();
|
||||
|
||||
@@ -604,15 +604,21 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
return make_exception_future<json::json_return_type>(
|
||||
std::runtime_error("Can not perform cleanup operation when topology changes"));
|
||||
}
|
||||
return ctx.db.invoke_on_all([keyspace, column_families] (database& db) {
|
||||
std::vector<column_family*> column_families_vec;
|
||||
auto& cm = db.get_compaction_manager();
|
||||
for (auto cf : column_families) {
|
||||
column_families_vec.push_back(&db.find_column_family(keyspace, cf));
|
||||
}
|
||||
return parallel_for_each(column_families_vec, [&cm, &db] (column_family* cf) {
|
||||
return cm.perform_cleanup(db, cf);
|
||||
return ctx.db.invoke_on_all([keyspace, column_families] (database& db) -> future<> {
|
||||
auto table_ids = boost::copy_range<std::vector<utils::UUID>>(column_families | boost::adaptors::transformed([&] (auto& table_name) {
|
||||
return db.find_uuid(keyspace, table_name);
|
||||
}));
|
||||
// cleanup smaller tables first, to increase chances of success if low on space.
|
||||
std::ranges::sort(table_ids, std::less<>(), [&] (const utils::UUID& id) {
|
||||
return db.find_column_family(id).get_stats().live_disk_space_used;
|
||||
});
|
||||
auto& cm = db.get_compaction_manager();
|
||||
// as a table can be dropped during loop below, let's find it before issuing the cleanup request.
|
||||
for (auto& id : table_ids) {
|
||||
table& t = db.find_column_family(id);
|
||||
co_await cm.perform_cleanup(db, &t);
|
||||
}
|
||||
co_return;
|
||||
}).then([]{
|
||||
return make_ready_future<json::json_return_type>(0);
|
||||
});
|
||||
|
||||
@@ -765,8 +765,12 @@ future<> generation_service::check_and_repair_cdc_streams() {
|
||||
std::optional<cdc::generation_id> latest = _gen_id;
|
||||
const auto& endpoint_states = _gossiper.get_endpoint_states();
|
||||
for (const auto& [addr, state] : endpoint_states) {
|
||||
if (!_gossiper.is_normal(addr)) {
|
||||
throw std::runtime_error(format("All nodes must be in NORMAL state while performing check_and_repair_cdc_streams"
|
||||
if (_gossiper.is_left(addr)) {
|
||||
cdc_log.info("check_and_repair_cdc_streams ignored node {} because it is in LEFT state", addr);
|
||||
continue;
|
||||
}
|
||||
if (!_gossiper.is_normal(addr)) {
|
||||
throw std::runtime_error(format("All nodes must be in NORMAL or LEFT state while performing check_and_repair_cdc_streams"
|
||||
" ({} is in state {})", addr, _gossiper.get_gossip_status(state)));
|
||||
}
|
||||
|
||||
@@ -830,6 +834,11 @@ future<> generation_service::check_and_repair_cdc_streams() {
|
||||
latest, db_clock::now());
|
||||
should_regenerate = true;
|
||||
} else {
|
||||
if (tmptr->sorted_tokens().size() != gen->entries().size()) {
|
||||
// We probably have garbage streams from old generations
|
||||
cdc_log.info("Generation size does not match the token ring, regenerating");
|
||||
should_regenerate = true;
|
||||
} else {
|
||||
std::unordered_set<dht::token> gen_ends;
|
||||
for (const auto& entry : gen->entries()) {
|
||||
gen_ends.insert(entry.token_range_end);
|
||||
@@ -841,6 +850,7 @@ future<> generation_service::check_and_repair_cdc_streams() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
20
cdc/log.cc
20
cdc/log.cc
@@ -73,7 +73,7 @@ using namespace std::chrono_literals;
|
||||
logging::logger cdc_log("cdc");
|
||||
|
||||
namespace cdc {
|
||||
static schema_ptr create_log_schema(const schema&, std::optional<utils::UUID> = {});
|
||||
static schema_ptr create_log_schema(const schema&, std::optional<utils::UUID> = {}, schema_ptr = nullptr);
|
||||
}
|
||||
|
||||
static constexpr auto cdc_group_name = "cdc";
|
||||
@@ -220,7 +220,7 @@ public:
|
||||
return;
|
||||
}
|
||||
|
||||
auto new_log_schema = create_log_schema(new_schema, log_schema ? std::make_optional(log_schema->id()) : std::nullopt);
|
||||
auto new_log_schema = create_log_schema(new_schema, log_schema ? std::make_optional(log_schema->id()) : std::nullopt, log_schema);
|
||||
|
||||
auto log_mut = log_schema
|
||||
? db::schema_tables::make_update_table_mutations(db, keyspace.metadata(), log_schema, new_log_schema, timestamp, false)
|
||||
@@ -503,7 +503,7 @@ bytes log_data_column_deleted_elements_name_bytes(const bytes& column_name) {
|
||||
return to_bytes(cdc_deleted_elements_column_prefix) + column_name;
|
||||
}
|
||||
|
||||
static schema_ptr create_log_schema(const schema& s, std::optional<utils::UUID> uuid) {
|
||||
static schema_ptr create_log_schema(const schema& s, std::optional<utils::UUID> uuid, schema_ptr old) {
|
||||
schema_builder b(s.ks_name(), log_name(s.cf_name()));
|
||||
b.with_partitioner("com.scylladb.dht.CDCPartitioner");
|
||||
b.set_compaction_strategy(sstables::compaction_strategy_type::time_window);
|
||||
@@ -590,6 +590,20 @@ static schema_ptr create_log_schema(const schema& s, std::optional<utils::UUID>
|
||||
b.set_uuid(*uuid);
|
||||
}
|
||||
|
||||
/**
|
||||
* #10473 - if we are redefining the log table, we need to ensure any dropped
|
||||
* columns are registered in "dropped_columns" table, otherwise clients will not
|
||||
* be able to read data older than now.
|
||||
*/
|
||||
if (old) {
|
||||
// not super efficient, but we don't do this often.
|
||||
for (auto& col : old->all_columns()) {
|
||||
if (!b.has_column({col.name(), col.name_as_text() })) {
|
||||
b.without_column(col.name_as_text(), col.type, api::new_timestamp());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return b.build();
|
||||
}
|
||||
|
||||
|
||||
@@ -527,16 +527,11 @@ future<> compaction_manager::stop() {
|
||||
}
|
||||
}
|
||||
|
||||
void compaction_manager::really_do_stop() {
|
||||
if (_state == state::none || _state == state::stopped) {
|
||||
return;
|
||||
}
|
||||
|
||||
_state = state::stopped;
|
||||
future<> compaction_manager::really_do_stop() {
|
||||
cmlog.info("Asked to stop");
|
||||
// Reset the metrics registry
|
||||
_metrics.clear();
|
||||
_stop_future.emplace(stop_ongoing_compactions("shutdown").then([this] () mutable {
|
||||
return stop_ongoing_compactions("shutdown").then([this] () mutable {
|
||||
reevaluate_postponed_compactions();
|
||||
return std::move(_waiting_reevalution);
|
||||
}).then([this] {
|
||||
@@ -544,12 +539,34 @@ void compaction_manager::really_do_stop() {
|
||||
_compaction_submission_timer.cancel();
|
||||
cmlog.info("Stopped");
|
||||
return _compaction_controller.shutdown();
|
||||
}));
|
||||
});
|
||||
}
|
||||
|
||||
template <typename Ex>
|
||||
requires std::is_base_of_v<std::exception, Ex> &&
|
||||
requires (const Ex& ex) {
|
||||
{ ex.code() } noexcept -> std::same_as<const std::error_code&>;
|
||||
}
|
||||
auto swallow_enospc(const Ex& ex) noexcept {
|
||||
if (ex.code().value() != ENOSPC) {
|
||||
return make_exception_future<>(std::make_exception_ptr(ex));
|
||||
}
|
||||
|
||||
cmlog.warn("Got ENOSPC on stop, ignoring...");
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
void compaction_manager::do_stop() noexcept {
|
||||
if (_state == state::none || _state == state::stopped) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
really_do_stop();
|
||||
_state = state::stopped;
|
||||
_stop_future = really_do_stop()
|
||||
.handle_exception_type([] (const std::system_error& ex) { return swallow_enospc(ex); })
|
||||
.handle_exception_type([] (const storage_io_error& ex) { return swallow_enospc(ex); })
|
||||
;
|
||||
} catch (...) {
|
||||
try {
|
||||
cmlog.error("Failed to stop the manager: {}", std::current_exception());
|
||||
@@ -681,6 +698,7 @@ void compaction_manager::submit_offstrategy(column_family* cf) {
|
||||
_stats.active_tasks++;
|
||||
task->setup_new_compaction();
|
||||
|
||||
return with_scheduling_group(_maintenance_sg.cpu, [this, task, cf] {
|
||||
return cf->run_offstrategy_compaction(task->compaction_data).then_wrapped([this, task] (future<> f) mutable {
|
||||
_stats.active_tasks--;
|
||||
task->finish_compaction();
|
||||
@@ -703,6 +721,7 @@ void compaction_manager::submit_offstrategy(column_family* cf) {
|
||||
_tasks.remove(task);
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -719,9 +738,20 @@ inline bool compaction_manager::check_for_cleanup(column_family* cf) {
|
||||
|
||||
future<> compaction_manager::rewrite_sstables(column_family* cf, sstables::compaction_type_options options, get_candidates_func get_func, can_purge_tombstones can_purge) {
|
||||
auto task = make_lw_shared<compaction_manager::task>(cf, options.type());
|
||||
_tasks.push_back(task);
|
||||
|
||||
auto sstables = std::make_unique<std::vector<sstables::shared_sstable>>(get_func(*cf));
|
||||
std::unique_ptr<std::vector<sstables::shared_sstable>> sstables;
|
||||
lw_shared_ptr<compacting_sstable_registration> compacting;
|
||||
|
||||
// since we might potentially have ongoing compactions, and we
|
||||
// must ensure that all sstables created before we run are included
|
||||
// in the re-write, we need to barrier out any previously running
|
||||
// compaction.
|
||||
auto get_and_register_candidates_func = [this, &sstables, &compacting, &get_func] () mutable -> future<> {
|
||||
sstables = std::make_unique<std::vector<sstables::shared_sstable>>(co_await get_func());
|
||||
compacting = make_lw_shared<compacting_sstable_registration>(this, *sstables);
|
||||
};
|
||||
|
||||
co_await cf->run_with_compaction_disabled(std::ref(get_and_register_candidates_func));
|
||||
// sort sstables by size in descending order, such that the smallest files will be rewritten first
|
||||
// (as sstable to be rewritten is popped off from the back of container), so rewrite will have higher
|
||||
// chance to succeed when the biggest files are reached.
|
||||
@@ -729,10 +759,11 @@ future<> compaction_manager::rewrite_sstables(column_family* cf, sstables::compa
|
||||
return a->data_size() > b->data_size();
|
||||
});
|
||||
|
||||
auto compacting = make_lw_shared<compacting_sstable_registration>(this, *sstables);
|
||||
auto sstables_ptr = sstables.get();
|
||||
_stats.pending_tasks += sstables->size();
|
||||
|
||||
_tasks.push_back(task);
|
||||
|
||||
task->compaction_done = do_until([this, sstables_ptr, task] { return sstables_ptr->empty() || !can_proceed(task); },
|
||||
[this, task, options, sstables_ptr, compacting, can_purge] () mutable {
|
||||
auto sst = sstables_ptr->back();
|
||||
@@ -789,7 +820,7 @@ future<> compaction_manager::rewrite_sstables(column_family* cf, sstables::compa
|
||||
_tasks.remove(task);
|
||||
});
|
||||
|
||||
return task->compaction_done.get_future().then([task] {});
|
||||
co_return co_await task->compaction_done.get_future();
|
||||
}
|
||||
|
||||
future<> compaction_manager::perform_sstable_scrub_validate_mode(column_family* cf) {
|
||||
@@ -871,31 +902,29 @@ future<> compaction_manager::perform_cleanup(database& db, column_family* cf) {
|
||||
return make_exception_future<>(std::runtime_error(format("cleanup request failed: there is an ongoing cleanup on {}.{}",
|
||||
cf->schema()->ks_name(), cf->schema()->cf_name())));
|
||||
}
|
||||
return seastar::async([this, cf, &db] {
|
||||
// FIXME: indentation
|
||||
auto sorted_owned_ranges = db.get_keyspace_local_ranges(cf->schema()->ks_name());
|
||||
auto get_sstables = [this, &db, cf, sorted_owned_ranges] () -> future<std::vector<sstables::shared_sstable>> {
|
||||
return seastar::async([this, &db, cf, sorted_owned_ranges = std::move(sorted_owned_ranges)] {
|
||||
auto schema = cf->schema();
|
||||
auto sorted_owned_ranges = db.get_keyspace_local_ranges(schema->ks_name());
|
||||
auto sstables = std::vector<sstables::shared_sstable>{};
|
||||
const auto candidates = get_candidates(*cf);
|
||||
std::copy_if(candidates.begin(), candidates.end(), std::back_inserter(sstables), [&sorted_owned_ranges, schema] (const sstables::shared_sstable& sst) {
|
||||
seastar::thread::maybe_yield();
|
||||
return sorted_owned_ranges.empty() || needs_cleanup(sst, sorted_owned_ranges, schema);
|
||||
});
|
||||
return std::tuple<dht::token_range_vector, std::vector<sstables::shared_sstable>>(sorted_owned_ranges, sstables);
|
||||
}).then_unpack([this, cf, &db] (dht::token_range_vector owned_ranges, std::vector<sstables::shared_sstable> sstables) {
|
||||
return rewrite_sstables(cf, sstables::compaction_type_options::make_cleanup(std::move(owned_ranges)),
|
||||
[sstables = std::move(sstables)] (const table&) { return sstables; });
|
||||
return sstables;
|
||||
});
|
||||
};
|
||||
return rewrite_sstables(cf, sstables::compaction_type_options::make_cleanup(std::move(sorted_owned_ranges)), std::move(get_sstables));
|
||||
}
|
||||
|
||||
// Submit a column family to be upgraded and wait for its termination.
|
||||
future<> compaction_manager::perform_sstable_upgrade(database& db, column_family* cf, bool exclude_current_version) {
|
||||
using shared_sstables = std::vector<sstables::shared_sstable>;
|
||||
return do_with(shared_sstables{}, [this, &db, cf, exclude_current_version](shared_sstables& tables) {
|
||||
// since we might potentially have ongoing compactions, and we
|
||||
// must ensure that all sstables created before we run are included
|
||||
// in the re-write, we need to barrier out any previously running
|
||||
// compaction.
|
||||
return cf->run_with_compaction_disabled([this, cf, &tables, exclude_current_version] {
|
||||
auto get_sstables = [this, &db, cf, exclude_current_version] {
|
||||
// FIXME: indentation
|
||||
std::vector<sstables::shared_sstable> tables;
|
||||
|
||||
auto last_version = cf->get_sstables_manager().get_highest_supported_format();
|
||||
|
||||
for (auto& sst : get_candidates(*cf)) {
|
||||
@@ -906,21 +935,17 @@ future<> compaction_manager::perform_sstable_upgrade(database& db, column_family
|
||||
tables.emplace_back(sst);
|
||||
}
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}).then([&db, cf] {
|
||||
return db.get_keyspace_local_ranges(cf->schema()->ks_name());
|
||||
}).then([this, &db, cf, &tables] (dht::token_range_vector owned_ranges) {
|
||||
// doing a "cleanup" is about as compacting as we need
|
||||
// to be, provided we get to decide the tables to process,
|
||||
// and ignoring any existing operations.
|
||||
// Note that we potentially could be doing multiple
|
||||
// upgrades here in parallel, but that is really the users
|
||||
// problem.
|
||||
return rewrite_sstables(cf, sstables::compaction_type_options::make_upgrade(std::move(owned_ranges)), [&](auto&) mutable {
|
||||
return std::exchange(tables, {});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
return make_ready_future<std::vector<sstables::shared_sstable>>(tables);
|
||||
};
|
||||
|
||||
// doing a "cleanup" is about as compacting as we need
|
||||
// to be, provided we get to decide the tables to process,
|
||||
// and ignoring any existing operations.
|
||||
// Note that we potentially could be doing multiple
|
||||
// upgrades here in parallel, but that is really the users
|
||||
// problem.
|
||||
return rewrite_sstables(cf, sstables::compaction_type_options::make_upgrade(db.get_keyspace_local_ranges(cf->schema()->ks_name())), std::move(get_sstables));
|
||||
}
|
||||
|
||||
// Submit a column family to be scrubbed and wait for its termination.
|
||||
@@ -928,14 +953,10 @@ future<> compaction_manager::perform_sstable_scrub(column_family* cf, sstables::
|
||||
if (scrub_mode == sstables::compaction_type_options::scrub::mode::validate) {
|
||||
return perform_sstable_scrub_validate_mode(cf);
|
||||
}
|
||||
// since we might potentially have ongoing compactions, and we
|
||||
// must ensure that all sstables created before we run are scrubbed,
|
||||
// we need to barrier out any previously running compaction.
|
||||
return cf->run_with_compaction_disabled([this, cf, scrub_mode] {
|
||||
return rewrite_sstables(cf, sstables::compaction_type_options::make_scrub(scrub_mode), [this] (const table& cf) {
|
||||
return get_candidates(cf);
|
||||
// FIXME: indentation
|
||||
return rewrite_sstables(cf, sstables::compaction_type_options::make_scrub(scrub_mode), [this, cf] {
|
||||
return make_ready_future<std::vector<sstables::shared_sstable>>(get_candidates(*cf));
|
||||
}, can_purge_tombstones::no);
|
||||
});
|
||||
}
|
||||
|
||||
future<> compaction_manager::remove(column_family* cf) {
|
||||
|
||||
@@ -178,7 +178,7 @@ private:
|
||||
maintenance_scheduling_group _maintenance_sg;
|
||||
size_t _available_memory;
|
||||
|
||||
using get_candidates_func = std::function<std::vector<sstables::shared_sstable>(const column_family&)>;
|
||||
using get_candidates_func = std::function<future<std::vector<sstables::shared_sstable>>()>;
|
||||
class can_purge_tombstones_tag;
|
||||
using can_purge_tombstones = bool_class<can_purge_tombstones_tag>;
|
||||
|
||||
@@ -209,7 +209,7 @@ public:
|
||||
|
||||
// Stop all fibers, without waiting. Safe to be called multiple times.
|
||||
void do_stop() noexcept;
|
||||
void really_do_stop();
|
||||
future<> really_do_stop();
|
||||
|
||||
// Submit a column family to be compacted.
|
||||
void submit(column_family* cf);
|
||||
|
||||
@@ -80,7 +80,11 @@ compaction_descriptor leveled_compaction_strategy::get_major_compaction_job(colu
|
||||
}
|
||||
|
||||
void leveled_compaction_strategy::notify_completion(const std::vector<shared_sstable>& removed, const std::vector<shared_sstable>& added) {
|
||||
if (removed.empty() || added.empty()) {
|
||||
// All the update here is only relevant for regular compaction's round-robin picking policy, and if
|
||||
// last_compacted_keys wasn't generated by regular, it means regular is disabled since last restart,
|
||||
// therefore we can skip the updates here until regular runs for the first time. Once it runs,
|
||||
// it will be able to generate last_compacted_keys correctly by looking at metadata of files.
|
||||
if (removed.empty() || added.empty() || !_last_compacted_keys) {
|
||||
return;
|
||||
}
|
||||
auto min_level = std::numeric_limits<uint32_t>::max();
|
||||
|
||||
@@ -225,6 +225,7 @@ time_window_compaction_strategy::get_sstables_for_compaction(column_family& cf,
|
||||
auto gc_before = gc_clock::now() - cf.schema()->gc_grace_seconds();
|
||||
|
||||
if (candidates.empty()) {
|
||||
_estimated_remaining_tasks = 0;
|
||||
return compaction_descriptor();
|
||||
}
|
||||
|
||||
|
||||
@@ -1403,7 +1403,7 @@ serviceLevelOrRoleName returns [sstring name]
|
||||
std::transform($name.begin(), $name.end(), $name.begin(), ::tolower); }
|
||||
| t=STRING_LITERAL { $name = sstring($t.text); }
|
||||
| t=QUOTED_NAME { $name = sstring($t.text); }
|
||||
| k=unreserved_keyword { $name = sstring($t.text);
|
||||
| k=unreserved_keyword { $name = k;
|
||||
std::transform($name.begin(), $name.end(), $name.begin(), ::tolower);}
|
||||
| QMARK {add_recognition_error("Bind variables cannot be used for service levels or role names");}
|
||||
;
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
|
||||
#include "cql3_type.hh"
|
||||
#include "cql3/util.hh"
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include "ut_name.hh"
|
||||
#include "database.hh"
|
||||
#include "user_types_metadata.hh"
|
||||
@@ -448,7 +449,20 @@ sstring maybe_quote(const sstring& identifier) {
|
||||
}
|
||||
|
||||
if (!need_quotes) {
|
||||
return identifier;
|
||||
// A seemingly valid identifier matching [a-z][a-z0-9_]* may still
|
||||
// need quoting if it is a CQL keyword, e.g., "to" (see issue #9450).
|
||||
// While our parser Cql.g has different production rules for different
|
||||
// types of identifiers (column names, table names, etc.), all of
|
||||
// these behave identically for alphanumeric strings: they exclude
|
||||
// many keywords but allow keywords listed as "unreserved keywords".
|
||||
// So we can use any of them, for example cident.
|
||||
try {
|
||||
cql3::util::do_with_parser(identifier, std::mem_fn(&cql3_parser::CqlParser::cident));
|
||||
return identifier;
|
||||
} catch(exceptions::syntax_exception&) {
|
||||
// This alphanumeric string is not a valid identifier, so fall
|
||||
// through to have it quoted:
|
||||
}
|
||||
}
|
||||
if (num_quotes == 0) {
|
||||
return make_sstring("\"", identifier, "\"");
|
||||
|
||||
@@ -109,9 +109,7 @@ public:
|
||||
virtual seastar::future<seastar::shared_ptr<cql_transport::messages::result_message>>
|
||||
execute(query_processor& qp, service::query_state& state, const query_options& options) const = 0;
|
||||
|
||||
virtual bool depends_on_keyspace(const seastar::sstring& ks_name) const = 0;
|
||||
|
||||
virtual bool depends_on_column_family(const seastar::sstring& cf_name) const = 0;
|
||||
virtual bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const = 0;
|
||||
|
||||
virtual seastar::shared_ptr<const metadata> get_result_metadata() const = 0;
|
||||
|
||||
|
||||
@@ -123,10 +123,38 @@ managed_bytes_opt get_value(const column_value& col, const column_value_eval_bag
|
||||
format("Column definition {} does not match any column in the query selection",
|
||||
cdef->name_as_text()));
|
||||
}
|
||||
const auto deserialized = cdef->type->deserialize(managed_bytes_view(*data.other_columns[index]));
|
||||
const managed_bytes_opt& serialized = data.other_columns[index];
|
||||
if (!serialized) {
|
||||
// For null[i] we return null.
|
||||
return std::nullopt;
|
||||
}
|
||||
const auto deserialized = cdef->type->deserialize(managed_bytes_view(*serialized));
|
||||
const auto& data_map = value_cast<map_type_impl::native_type>(deserialized);
|
||||
const auto key = evaluate_to_raw_view(col.sub, options);
|
||||
auto&& key_type = col_type->name_comparator();
|
||||
if (key.is_null()) {
|
||||
// For m[null] return null.
|
||||
// This is different from Cassandra - which treats m[null]
|
||||
// as an invalid request error. But m[null] -> null is more
|
||||
// consistent with our usual null treatement (e.g., both
|
||||
// null[2] and null < 2 return null). It will also allow us
|
||||
// to support non-constant subscripts (e.g., m[a]) where "a"
|
||||
// may be null in some rows and non-null in others, and it's
|
||||
// not an error.
|
||||
return std::nullopt;
|
||||
}
|
||||
if (key.is_unset_value()) {
|
||||
// An m[?] with ? bound to UNSET_VALUE is a invalid query.
|
||||
// We could have detected it earlier while binding, but since
|
||||
// we currently don't, we must protect the following code
|
||||
// which can't work with an UNSET_VALUE. Note that the
|
||||
// placement of this check here means that in an empty table,
|
||||
// where we never need to evaluate the filter expression, this
|
||||
// error will not be detected.
|
||||
throw exceptions::invalid_request_exception(
|
||||
format("Unsupported unset map key for column {}",
|
||||
cdef->name_as_text()));
|
||||
}
|
||||
const auto found = key.with_linearized([&] (bytes_view key_bv) {
|
||||
using entry = std::pair<data_value, data_value>;
|
||||
return std::find_if(data_map.cbegin(), data_map.cend(), [&] (const entry& element) {
|
||||
|
||||
@@ -970,7 +970,7 @@ bool query_processor::migration_subscriber::should_invalidate(
|
||||
sstring ks_name,
|
||||
std::optional<sstring> cf_name,
|
||||
::shared_ptr<cql_statement> statement) {
|
||||
return statement->depends_on_keyspace(ks_name) && (!cf_name || statement->depends_on_column_family(*cf_name));
|
||||
return statement->depends_on(ks_name, cf_name);
|
||||
}
|
||||
|
||||
future<> query_processor::query_internal(
|
||||
|
||||
@@ -450,11 +450,16 @@ bool result_set_builder::restrictions_filter::do_filter(const selection& selecti
|
||||
}
|
||||
|
||||
auto clustering_columns_restrictions = _restrictions->get_clustering_columns_restrictions();
|
||||
if (dynamic_pointer_cast<cql3::restrictions::multi_column_restriction>(clustering_columns_restrictions)) {
|
||||
bool has_multi_col_clustering_restrictions =
|
||||
dynamic_pointer_cast<cql3::restrictions::multi_column_restriction>(clustering_columns_restrictions) != nullptr;
|
||||
if (has_multi_col_clustering_restrictions) {
|
||||
clustering_key_prefix ckey = clustering_key_prefix::from_exploded(clustering_key);
|
||||
return expr::is_satisfied_by(
|
||||
bool multi_col_clustering_satisfied = expr::is_satisfied_by(
|
||||
clustering_columns_restrictions->expression,
|
||||
partition_key, clustering_key, static_row, row, selection, _options);
|
||||
if (!multi_col_clustering_satisfied) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
auto static_row_iterator = static_row.iterator();
|
||||
@@ -502,6 +507,13 @@ bool result_set_builder::restrictions_filter::do_filter(const selection& selecti
|
||||
if (_skip_ck_restrictions) {
|
||||
continue;
|
||||
}
|
||||
if (has_multi_col_clustering_restrictions) {
|
||||
// Mixing multi column and single column restrictions on clustering
|
||||
// key columns is forbidden.
|
||||
// Since there are multi column restrictions we have to skip
|
||||
// evaluating single column restrictions or we will get an error.
|
||||
continue;
|
||||
}
|
||||
auto clustering_key_restrictions_map = _restrictions->get_single_column_clustering_key_restrictions();
|
||||
auto restr_it = clustering_key_restrictions_map.find(cdef);
|
||||
if (restr_it == clustering_key_restrictions_map.end()) {
|
||||
|
||||
@@ -46,13 +46,7 @@ uint32_t cql3::statements::authentication_statement::get_bound_terms() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool cql3::statements::authentication_statement::depends_on_keyspace(
|
||||
const sstring& ks_name) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool cql3::statements::authentication_statement::depends_on_column_family(
|
||||
const sstring& cf_name) const {
|
||||
bool cql3::statements::authentication_statement::depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -55,9 +55,7 @@ public:
|
||||
|
||||
uint32_t get_bound_terms() const override;
|
||||
|
||||
bool depends_on_keyspace(const sstring& ks_name) const override;
|
||||
|
||||
bool depends_on_column_family(const sstring& cf_name) const override;
|
||||
bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const override;
|
||||
|
||||
future<> check_access(service::storage_proxy& proxy, const service::client_state& state) const override;
|
||||
|
||||
|
||||
@@ -48,13 +48,7 @@ uint32_t cql3::statements::authorization_statement::get_bound_terms() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool cql3::statements::authorization_statement::depends_on_keyspace(
|
||||
const sstring& ks_name) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool cql3::statements::authorization_statement::depends_on_column_family(
|
||||
const sstring& cf_name) const {
|
||||
bool cql3::statements::authorization_statement::depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -59,9 +59,7 @@ public:
|
||||
|
||||
uint32_t get_bound_terms() const override;
|
||||
|
||||
bool depends_on_keyspace(const sstring& ks_name) const override;
|
||||
|
||||
bool depends_on_column_family(const sstring& cf_name) const override;
|
||||
bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const override;
|
||||
|
||||
future<> check_access(service::storage_proxy& proxy, const service::client_state& state) const override;
|
||||
|
||||
|
||||
@@ -98,14 +98,9 @@ batch_statement::batch_statement(type type_,
|
||||
{
|
||||
}
|
||||
|
||||
bool batch_statement::depends_on_keyspace(const sstring& ks_name) const
|
||||
bool batch_statement::depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool batch_statement::depends_on_column_family(const sstring& cf_name) const
|
||||
{
|
||||
return false;
|
||||
return boost::algorithm::any_of(_statements, [&ks_name, &cf_name] (auto&& s) { return s.statement->depends_on(ks_name, cf_name); });
|
||||
}
|
||||
|
||||
uint32_t batch_statement::get_bound_terms() const
|
||||
|
||||
@@ -115,9 +115,7 @@ public:
|
||||
std::unique_ptr<attributes> attrs,
|
||||
cql_stats& stats);
|
||||
|
||||
virtual bool depends_on_keyspace(const sstring& ks_name) const override;
|
||||
|
||||
virtual bool depends_on_column_family(const sstring& cf_name) const override;
|
||||
virtual bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const override;
|
||||
|
||||
virtual uint32_t get_bound_terms() const override;
|
||||
|
||||
|
||||
@@ -46,6 +46,7 @@
|
||||
#include "cdc/cdc_extension.hh"
|
||||
#include "gms/feature.hh"
|
||||
#include "gms/feature_service.hh"
|
||||
#include "utils/bloom_calculations.hh"
|
||||
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
|
||||
@@ -168,6 +169,16 @@ void cf_prop_defs::validate(const database& db, const schema::extensions_map& sc
|
||||
throw exceptions::configuration_exception(KW_MAX_INDEX_INTERVAL + " must be greater than " + KW_MIN_INDEX_INTERVAL);
|
||||
}
|
||||
|
||||
if (get_simple(KW_BF_FP_CHANCE)) {
|
||||
double bloom_filter_fp_chance = get_double(KW_BF_FP_CHANCE, 0/*not used*/);
|
||||
double min_bloom_filter_fp_chance = utils::bloom_calculations::min_supported_bloom_filter_fp_chance();
|
||||
if (bloom_filter_fp_chance <= min_bloom_filter_fp_chance || bloom_filter_fp_chance > 1.0) {
|
||||
throw exceptions::configuration_exception(format(
|
||||
"{} must be larger than {} and less than or equal to 1.0 (got {})",
|
||||
KW_BF_FP_CHANCE, min_bloom_filter_fp_chance, bloom_filter_fp_chance));
|
||||
}
|
||||
}
|
||||
|
||||
speculative_retry::from_sstring(get_string(KW_SPECULATIVE_RETRY, speculative_retry(speculative_retry::type::NONE, 0).to_sstring()));
|
||||
}
|
||||
|
||||
|
||||
@@ -571,12 +571,8 @@ modification_statement::validate(service::storage_proxy&, const service::client_
|
||||
}
|
||||
}
|
||||
|
||||
bool modification_statement::depends_on_keyspace(const sstring& ks_name) const {
|
||||
return keyspace() == ks_name;
|
||||
}
|
||||
|
||||
bool modification_statement::depends_on_column_family(const sstring& cf_name) const {
|
||||
return column_family() == cf_name;
|
||||
bool modification_statement::depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const {
|
||||
return keyspace() == ks_name && (!cf_name || column_family() == *cf_name);
|
||||
}
|
||||
|
||||
void modification_statement::add_operation(::shared_ptr<operation> op) {
|
||||
|
||||
@@ -165,9 +165,7 @@ public:
|
||||
// Validate before execute, using client state and current schema
|
||||
void validate(service::storage_proxy&, const service::client_state& state) const override;
|
||||
|
||||
virtual bool depends_on_keyspace(const sstring& ks_name) const override;
|
||||
|
||||
virtual bool depends_on_column_family(const sstring& cf_name) const override;
|
||||
virtual bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const override;
|
||||
|
||||
void add_operation(::shared_ptr<operation> op);
|
||||
|
||||
|
||||
@@ -67,12 +67,7 @@ future<> schema_altering_statement::grant_permissions_to_creator(const service::
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
bool schema_altering_statement::depends_on_keyspace(const sstring& ks_name) const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool schema_altering_statement::depends_on_column_family(const sstring& cf_name) const
|
||||
bool schema_altering_statement::depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -79,9 +79,7 @@ protected:
|
||||
*/
|
||||
virtual future<> grant_permissions_to_creator(const service::client_state&) const;
|
||||
|
||||
virtual bool depends_on_keyspace(const sstring& ks_name) const override;
|
||||
|
||||
virtual bool depends_on_column_family(const sstring& cf_name) const override;
|
||||
virtual bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const override;
|
||||
|
||||
virtual uint32_t get_bound_terms() const override;
|
||||
|
||||
|
||||
@@ -194,12 +194,8 @@ void select_statement::validate(service::storage_proxy&, const service::client_s
|
||||
// Nothing to do, all validation has been done by raw_statemet::prepare()
|
||||
}
|
||||
|
||||
bool select_statement::depends_on_keyspace(const sstring& ks_name) const {
|
||||
return keyspace() == ks_name;
|
||||
}
|
||||
|
||||
bool select_statement::depends_on_column_family(const sstring& cf_name) const {
|
||||
return column_family() == cf_name;
|
||||
bool select_statement::depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const {
|
||||
return keyspace() == ks_name && (!cf_name || column_family() == *cf_name);
|
||||
}
|
||||
|
||||
const sstring& select_statement::keyspace() const {
|
||||
|
||||
@@ -127,8 +127,7 @@ public:
|
||||
virtual uint32_t get_bound_terms() const override;
|
||||
virtual future<> check_access(service::storage_proxy& proxy, const service::client_state& state) const override;
|
||||
virtual void validate(service::storage_proxy&, const service::client_state& state) const override;
|
||||
virtual bool depends_on_keyspace(const sstring& ks_name) const override;
|
||||
virtual bool depends_on_column_family(const sstring& cf_name) const override;
|
||||
virtual bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const override;
|
||||
|
||||
virtual future<::shared_ptr<cql_transport::messages::result_message>> execute(query_processor& qp,
|
||||
service::query_state& state, const query_options& options) const override;
|
||||
|
||||
@@ -30,13 +30,7 @@ uint32_t service_level_statement::get_bound_terms() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool service_level_statement::depends_on_keyspace(
|
||||
const sstring &ks_name) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool service_level_statement::depends_on_column_family(
|
||||
const sstring &cf_name) const {
|
||||
bool service_level_statement::depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -56,9 +56,7 @@ public:
|
||||
|
||||
uint32_t get_bound_terms() const override;
|
||||
|
||||
bool depends_on_keyspace(const sstring& ks_name) const override;
|
||||
|
||||
bool depends_on_column_family(const sstring& cf_name) const override;
|
||||
bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const override;
|
||||
|
||||
future<> check_access(service::storage_proxy& sp, const service::client_state& state) const override;
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ void sl_prop_defs::validate() {
|
||||
data_value v = duration_type->deserialize(duration_type->from_string(*repr));
|
||||
cql_duration duration = static_pointer_cast<const duration_type_impl>(duration_type)->from_value(v);
|
||||
if (duration.months || duration.days) {
|
||||
throw exceptions::invalid_request_exception("Timeout values cannot be longer than 24h");
|
||||
throw exceptions::invalid_request_exception("Timeout values cannot be expressed in days/months");
|
||||
}
|
||||
if (duration.nanoseconds % 1'000'000 != 0) {
|
||||
throw exceptions::invalid_request_exception("Timeout values must be expressed in millisecond granularity");
|
||||
|
||||
@@ -67,12 +67,7 @@ std::unique_ptr<prepared_statement> truncate_statement::prepare(database& db,cql
|
||||
return std::make_unique<prepared_statement>(::make_shared<truncate_statement>(*this));
|
||||
}
|
||||
|
||||
bool truncate_statement::depends_on_keyspace(const sstring& ks_name) const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool truncate_statement::depends_on_column_family(const sstring& cf_name) const
|
||||
bool truncate_statement::depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -58,9 +58,7 @@ public:
|
||||
|
||||
virtual std::unique_ptr<prepared_statement> prepare(database& db, cql_stats& stats) override;
|
||||
|
||||
virtual bool depends_on_keyspace(const sstring& ks_name) const override;
|
||||
|
||||
virtual bool depends_on_column_family(const sstring& cf_name) const override;
|
||||
virtual bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const override;
|
||||
|
||||
virtual future<> check_access(service::storage_proxy& proxy, const service::client_state& state) const override;
|
||||
|
||||
|
||||
@@ -74,12 +74,7 @@ std::unique_ptr<prepared_statement> use_statement::prepare(database& db, cql_sta
|
||||
|
||||
}
|
||||
|
||||
bool use_statement::depends_on_keyspace(const sstring& ks_name) const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool use_statement::depends_on_column_family(const sstring& cf_name) const
|
||||
bool use_statement::depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -59,9 +59,7 @@ public:
|
||||
|
||||
virtual uint32_t get_bound_terms() const override;
|
||||
|
||||
virtual bool depends_on_keyspace(const seastar::sstring& ks_name) const override;
|
||||
|
||||
virtual bool depends_on_column_family(const seastar::sstring& cf_name) const override;
|
||||
virtual bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const override;
|
||||
|
||||
virtual seastar::future<> check_access(service::storage_proxy& proxy, const service::client_state& state) const override;
|
||||
|
||||
|
||||
@@ -31,6 +31,8 @@
|
||||
#include "types/listlike_partial_deserializing_iterator.hh"
|
||||
#include "utils/managed_bytes.hh"
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include <boost/algorithm/string/trim_all.hpp>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
static inline bool is_control_char(char c) {
|
||||
return c >= 0 && c <= 0x1F;
|
||||
@@ -212,6 +214,17 @@ struct from_json_object_visitor {
|
||||
}
|
||||
bytes operator()(const boolean_type_impl& t) {
|
||||
if (!value.IsBool()) {
|
||||
if (value.IsString()) {
|
||||
std::string str(rjson::to_string_view(value));
|
||||
boost::trim_all(str);
|
||||
boost::to_lower(str);
|
||||
|
||||
if (str == "true") {
|
||||
return t.decompose(true);
|
||||
} else if (str == "false") {
|
||||
return t.decompose(false);
|
||||
}
|
||||
}
|
||||
throw marshal_exception(format("Invalid JSON object {}", value));
|
||||
}
|
||||
return t.decompose(value.GetBool());
|
||||
|
||||
@@ -87,6 +87,13 @@ std::unique_ptr<cql3::statements::raw::select_statement> build_select_statement(
|
||||
/// forbids non-alpha-numeric characters in identifier names.
|
||||
/// Quoting involves wrapping the string in double-quotes ("). A double-quote
|
||||
/// character itself is quoted by doubling it.
|
||||
/// maybe_quote() also quotes reserved CQL keywords (e.g., "to", "where")
|
||||
/// but doesn't quote *unreserved* keywords (like ttl, int or as).
|
||||
/// Note that this means that if new reserved keywords are added to the
|
||||
/// parser, a saved output of maybe_quote() may no longer be parsable by
|
||||
/// parser. To avoid this forward-compatibility issue, use quote() instead
|
||||
/// of maybe_quote() - to unconditionally quote an identifier even if it is
|
||||
/// lowercase and not (yet) a keyword.
|
||||
sstring maybe_quote(const sstring& s);
|
||||
|
||||
// Check whether timestamp is not too far in the future as this probably
|
||||
|
||||
22
database.cc
22
database.cc
@@ -926,10 +926,9 @@ bool database::update_column_family(schema_ptr new_schema) {
|
||||
return columns_changed;
|
||||
}
|
||||
|
||||
future<> database::remove(const column_family& cf) noexcept {
|
||||
void database::remove(const table& cf) noexcept {
|
||||
auto s = cf.schema();
|
||||
auto& ks = find_keyspace(s->ks_name());
|
||||
co_await _querier_cache.evict_all_for_table(s->id());
|
||||
_column_families.erase(s->id());
|
||||
ks.metadata()->remove_column_family(s);
|
||||
_ks_cf_to_uuid.erase(std::make_pair(s->ks_name(), s->cf_name()));
|
||||
@@ -946,13 +945,20 @@ future<> database::drop_column_family(const sstring& ks_name, const sstring& cf_
|
||||
auto& ks = find_keyspace(ks_name);
|
||||
auto uuid = find_uuid(ks_name, cf_name);
|
||||
auto cf = _column_families.at(uuid);
|
||||
co_await remove(*cf);
|
||||
remove(*cf);
|
||||
cf->clear_views();
|
||||
co_return co_await cf->await_pending_ops().then([this, &ks, cf, tsf = std::move(tsf), snapshot] {
|
||||
return truncate(ks, *cf, std::move(tsf), snapshot).finally([this, cf] {
|
||||
return cf->stop();
|
||||
});
|
||||
}).finally([cf] {});
|
||||
co_await cf->await_pending_ops();
|
||||
co_await _querier_cache.evict_all_for_table(cf->schema()->id());
|
||||
std::exception_ptr ex;
|
||||
try {
|
||||
co_await truncate(ks, *cf, std::move(tsf), snapshot);
|
||||
} catch (...) {
|
||||
ex = std::current_exception();
|
||||
}
|
||||
co_await cf->stop();
|
||||
if (ex) {
|
||||
std::rethrow_exception(std::move(ex));
|
||||
}
|
||||
}
|
||||
|
||||
const utils::UUID& database::find_uuid(std::string_view ks, std::string_view cf) const {
|
||||
|
||||
@@ -1384,6 +1384,7 @@ private:
|
||||
Future update_write_metrics(Future&& f);
|
||||
void update_write_metrics_for_timed_out_write();
|
||||
future<> create_keyspace(const lw_shared_ptr<keyspace_metadata>&, bool is_bootstrap, system_keyspace system);
|
||||
void remove(const table&) noexcept;
|
||||
public:
|
||||
static utils::UUID empty_version;
|
||||
|
||||
@@ -1582,7 +1583,6 @@ public:
|
||||
|
||||
bool update_column_family(schema_ptr s);
|
||||
future<> drop_column_family(const sstring& ks_name, const sstring& cf_name, timestamp_func, bool with_snapshot = true);
|
||||
future<> remove(const column_family&) noexcept;
|
||||
|
||||
const logalloc::region_group& dirty_memory_region_group() const {
|
||||
return _dirty_memory_manager.region_group();
|
||||
|
||||
@@ -39,6 +39,7 @@
|
||||
*/
|
||||
|
||||
#include <chrono>
|
||||
#include <exception>
|
||||
#include <seastar/core/future-util.hh>
|
||||
#include <seastar/core/do_with.hh>
|
||||
#include <seastar/core/semaphore.hh>
|
||||
@@ -306,6 +307,7 @@ future<> db::batchlog_manager::replay_all_failed_batches() {
|
||||
} catch (no_such_keyspace& ex) {
|
||||
// should probably ignore and drop the batch
|
||||
} catch (...) {
|
||||
blogger.warn("Replay failed (will retry): {}", std::current_exception());
|
||||
// timeout, overload etc.
|
||||
// Do _not_ remove the batch, assuning we got a node write error.
|
||||
// Since we don't have hints (which origin is satisfied with),
|
||||
|
||||
@@ -860,6 +860,8 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
||||
"Flush tables in the system_schema keyspace after schema modification. This is required for crash recovery, but slows down tests and can be disabled for them")
|
||||
, restrict_replication_simplestrategy(this, "restrict_replication_simplestrategy", liveness::LiveUpdate, value_status::Used, db::tri_mode_restriction_t::mode::FALSE, "Controls whether to disable SimpleStrategy replication. Can be true, false, or warn.")
|
||||
, restrict_dtcs(this, "restrict_dtcs", liveness::LiveUpdate, value_status::Used, db::tri_mode_restriction_t::mode::WARN, "Controls whether to prevent setting DateTieredCompactionStrategy. Can be true, false, or warn.")
|
||||
, cache_index_pages(this, "cache_index_pages", liveness::LiveUpdate, value_status::Used, true,
|
||||
"Keep SSTable index pages in the global cache after a SSTable read. Expected to improve performance for workloads with big partitions, but may degrade performance for workloads with small partitions.")
|
||||
, default_log_level(this, "default_log_level", value_status::Used)
|
||||
, logger_log_level(this, "logger_log_level", value_status::Used)
|
||||
, log_to_stdout(this, "log_to_stdout", value_status::Used)
|
||||
|
||||
@@ -372,6 +372,8 @@ public:
|
||||
named_value<tri_mode_restriction> restrict_replication_simplestrategy;
|
||||
named_value<tri_mode_restriction> restrict_dtcs;
|
||||
|
||||
named_value<bool> cache_index_pages;
|
||||
|
||||
seastar::logging_settings logging_settings(const boost::program_options::variables_map&) const;
|
||||
|
||||
const db::extensions& extensions() const;
|
||||
|
||||
@@ -119,8 +119,9 @@ future<> snapshot_ctl::take_column_family_snapshot(sstring ks_name, std::vector<
|
||||
return check_snapshot_not_exist(ks_name, tag, tables).then([this, ks_name, tables, tag, sf] {
|
||||
return do_with(std::vector<sstring>(std::move(tables)),[this, ks_name, tag, sf](const std::vector<sstring>& tables) {
|
||||
return do_for_each(tables, [ks_name, tag, sf, this] (const sstring& table_name) {
|
||||
if (table_name.find(".") != sstring::npos) {
|
||||
throw std::invalid_argument("Cannot take a snapshot of a secondary index by itself. Run snapshot on the table that owns the index.");
|
||||
auto& cf = _db.local().find_column_family(ks_name, table_name);
|
||||
if (cf.schema()->is_view()) {
|
||||
throw std::invalid_argument("Do not take a snapshot of a materialized view or a secondary index by itself. Run snapshot on the base table instead.");
|
||||
}
|
||||
return _db.invoke_on_all([ks_name, table_name, tag, sf] (database &db) {
|
||||
auto& cf = db.find_column_family(ks_name, table_name);
|
||||
|
||||
@@ -350,7 +350,11 @@ public:
|
||||
view_filter_checking_visitor(const schema& base, const view_info& view)
|
||||
: _base(base)
|
||||
, _view(view)
|
||||
, _selection(cql3::selection::selection::wildcard(_base.shared_from_this()))
|
||||
, _selection(cql3::selection::selection::for_columns(_base.shared_from_this(),
|
||||
boost::copy_range<std::vector<const column_definition*>>(
|
||||
_base.regular_columns() | boost::adaptors::transformed([] (const column_definition& cdef) { return &cdef; }))
|
||||
)
|
||||
)
|
||||
{}
|
||||
|
||||
void accept_new_partition(const partition_key& key, uint64_t row_count) {
|
||||
@@ -887,13 +891,18 @@ void view_updates::generate_update(
|
||||
bool same_row = true;
|
||||
for (auto col_id : col_ids) {
|
||||
auto* after = update.cells().find_cell(col_id);
|
||||
// Note: multi-cell columns can't be part of the primary key.
|
||||
auto& cdef = _base->regular_column_at(col_id);
|
||||
if (existing) {
|
||||
auto* before = existing->cells().find_cell(col_id);
|
||||
// Note that this cell is necessarily atomic, because col_ids are
|
||||
// view key columns, and keys must be atomic.
|
||||
if (before && before->as_atomic_cell(cdef).is_live()) {
|
||||
if (after && after->as_atomic_cell(cdef).is_live()) {
|
||||
auto cmp = compare_atomic_cell_for_merge(before->as_atomic_cell(cdef), after->as_atomic_cell(cdef));
|
||||
// We need to compare just the values of the keys, not
|
||||
// metadata like the timestamp. This is because below,
|
||||
// if the old and new view row have the same key, we need
|
||||
// to be sure to reach the update_entry() case.
|
||||
auto cmp = compare_unsigned(before->as_atomic_cell(cdef).value(), after->as_atomic_cell(cdef).value());
|
||||
if (cmp != 0) {
|
||||
same_row = false;
|
||||
}
|
||||
@@ -913,7 +922,13 @@ void view_updates::generate_update(
|
||||
if (same_row) {
|
||||
update_entry(base_key, update, *existing, now);
|
||||
} else {
|
||||
replace_entry(base_key, update, *existing, now);
|
||||
// This code doesn't work if the old and new view row have the
|
||||
// same key, because if they do we get both data and tombstone
|
||||
// for the same timestamp (now) and the tombstone wins. This
|
||||
// is why we need the "same_row" case above - it's not just a
|
||||
// performance optimization.
|
||||
delete_old_entry(base_key, *existing, update, now);
|
||||
create_entry(base_key, update, now);
|
||||
}
|
||||
} else {
|
||||
delete_old_entry(base_key, *existing, update, now);
|
||||
@@ -1320,7 +1335,7 @@ future<> mutate_MV(
|
||||
auto mut_ptr = remote_endpoints.empty() ? std::make_unique<frozen_mutation>(std::move(mut.fm)) : std::make_unique<frozen_mutation>(mut.fm);
|
||||
tracing::trace(tr_state, "Locally applying view update for {}.{}; base token = {}; view token = {}",
|
||||
mut.s->ks_name(), mut.s->cf_name(), base_token, view_token);
|
||||
local_view_update = service::get_local_storage_proxy().mutate_locally(mut.s, *mut_ptr, std::move(tr_state), db::commitlog::force_sync::no).then_wrapped(
|
||||
local_view_update = service::get_local_storage_proxy().mutate_locally(mut.s, *mut_ptr, tr_state, db::commitlog::force_sync::no).then_wrapped(
|
||||
[s = mut.s, &stats, &cf_stats, tr_state, base_token, view_token, my_address, mut_ptr = std::move(mut_ptr),
|
||||
units = sem_units.split(sem_units.count())] (future<>&& f) {
|
||||
--stats.writes;
|
||||
|
||||
@@ -164,10 +164,7 @@ private:
|
||||
void delete_old_entry(const partition_key& base_key, const clustering_row& existing, const clustering_row& update, gc_clock::time_point now);
|
||||
void do_delete_old_entry(const partition_key& base_key, const clustering_row& existing, const clustering_row& update, gc_clock::time_point now);
|
||||
void update_entry(const partition_key& base_key, const clustering_row& update, const clustering_row& existing, gc_clock::time_point now);
|
||||
void replace_entry(const partition_key& base_key, const clustering_row& update, const clustering_row& existing, gc_clock::time_point now) {
|
||||
create_entry(base_key, update, now);
|
||||
delete_old_entry(base_key, existing, update, now);
|
||||
}
|
||||
void update_entry_for_computed_column(const partition_key& base_key, const clustering_row& update, const std::optional<clustering_row>& existing, gc_clock::time_point now);
|
||||
};
|
||||
|
||||
class view_update_builder {
|
||||
|
||||
@@ -215,6 +215,12 @@ public:
|
||||
});
|
||||
}
|
||||
|
||||
future<flush_permit> get_all_flush_permits() {
|
||||
return get_units(_background_work_flush_serializer, _max_background_work).then([this] (auto&& units) {
|
||||
return this->get_flush_permit(std::move(units));
|
||||
});
|
||||
}
|
||||
|
||||
bool has_extraneous_flushes_requested() const {
|
||||
return _extraneous_flushes > 0;
|
||||
}
|
||||
|
||||
6
dist/common/scripts/scylla_coredump_setup
vendored
6
dist/common/scripts/scylla_coredump_setup
vendored
@@ -127,10 +127,14 @@ WantedBy=multi-user.target
|
||||
# - Storage: /path/to/file (inacessible)
|
||||
# - Storage: /path/to/file
|
||||
#
|
||||
# After systemd-v248, available coredump file output changed like this:
|
||||
# - Storage: /path/to/file (present)
|
||||
# We need to support both versions.
|
||||
#
|
||||
# reference: https://github.com/systemd/systemd/commit/47f50642075a7a215c9f7b600599cbfee81a2913
|
||||
|
||||
corefail = False
|
||||
res = re.findall(r'Storage: (.*)$', coreinfo, flags=re.MULTILINE)
|
||||
res = re.findall(r'Storage: (\S+)(?: \(.+\))?$', coreinfo, flags=re.MULTILINE)
|
||||
# v232 or later
|
||||
if res:
|
||||
corepath = res[0]
|
||||
|
||||
1
dist/docker/debian/build_docker.sh
vendored
1
dist/docker/debian/build_docker.sh
vendored
@@ -101,6 +101,7 @@ run bash -ec "cat /scylla_bashrc >> /etc/bash.bashrc"
|
||||
run mkdir -p /etc/supervisor.conf.d
|
||||
run mkdir -p /var/log/scylla
|
||||
run chown -R scylla:scylla /var/lib/scylla
|
||||
run sed -i -e 's/^SCYLLA_ARGS=".*"$/SCYLLA_ARGS="--log-to-syslog 0 --log-to-stdout 1 --default-log-level info --network-stack posix"/' /etc/default/scylla-server
|
||||
|
||||
run mkdir -p /opt/scylladb/supervisor
|
||||
run touch /opt/scylladb/SCYLLA-CONTAINER-FILE
|
||||
|
||||
41
dist/docker/etc/sysconfig/scylla-server
vendored
41
dist/docker/etc/sysconfig/scylla-server
vendored
@@ -1,41 +0,0 @@
|
||||
# choose following mode: virtio, dpdk, posix
|
||||
NETWORK_MODE=posix
|
||||
|
||||
# tap device name(virtio)
|
||||
TAP=tap0
|
||||
|
||||
# bridge device name (virtio)
|
||||
BRIDGE=virbr0
|
||||
|
||||
# ethernet device name
|
||||
IFNAME=eth0
|
||||
|
||||
# setup NIC's and disks' interrupts, RPS, XPS, nomerges and I/O scheduler (posix)
|
||||
SET_NIC_AND_DISKS=no
|
||||
|
||||
# ethernet device driver (dpdk)
|
||||
ETHDRV=
|
||||
|
||||
# ethernet device PCI ID (dpdk)
|
||||
ETHPCIID=
|
||||
|
||||
# number of hugepages
|
||||
NR_HUGEPAGES=64
|
||||
|
||||
# user for process (must be root for dpdk)
|
||||
USER=scylla
|
||||
|
||||
# group for process
|
||||
GROUP=scylla
|
||||
|
||||
# scylla home dir
|
||||
SCYLLA_HOME=/var/lib/scylla
|
||||
|
||||
# scylla config dir
|
||||
SCYLLA_CONF=/etc/scylla
|
||||
|
||||
# scylla arguments
|
||||
SCYLLA_ARGS="--log-to-syslog 0 --log-to-stdout 1 --default-log-level info --network-stack posix"
|
||||
|
||||
# setup as AMI instance
|
||||
AMI=no
|
||||
@@ -45,7 +45,7 @@
|
||||
logging::logger fmr_logger("flat_mutation_reader");
|
||||
|
||||
flat_mutation_reader& flat_mutation_reader::operator=(flat_mutation_reader&& o) noexcept {
|
||||
if (_impl) {
|
||||
if (_impl && _impl->is_close_required()) {
|
||||
impl* ip = _impl.get();
|
||||
// Abort to enforce calling close() before readers are closed
|
||||
// to prevent leaks and potential use-after-free due to background
|
||||
@@ -58,7 +58,7 @@ flat_mutation_reader& flat_mutation_reader::operator=(flat_mutation_reader&& o)
|
||||
}
|
||||
|
||||
flat_mutation_reader::~flat_mutation_reader() {
|
||||
if (_impl) {
|
||||
if (_impl && _impl->is_close_required()) {
|
||||
impl* ip = _impl.get();
|
||||
// Abort to enforce calling close() before readers are closed
|
||||
// to prevent leaks and potential use-after-free due to background
|
||||
@@ -1344,7 +1344,7 @@ void mutation_fragment_stream_validating_filter::on_end_of_stream() {
|
||||
}
|
||||
|
||||
flat_mutation_reader_v2& flat_mutation_reader_v2::operator=(flat_mutation_reader_v2&& o) noexcept {
|
||||
if (_impl) {
|
||||
if (_impl && _impl->is_close_required()) {
|
||||
impl* ip = _impl.get();
|
||||
// Abort to enforce calling close() before readers are closed
|
||||
// to prevent leaks and potential use-after-free due to background
|
||||
@@ -1357,7 +1357,7 @@ flat_mutation_reader_v2& flat_mutation_reader_v2::operator=(flat_mutation_reader
|
||||
}
|
||||
|
||||
flat_mutation_reader_v2::~flat_mutation_reader_v2() {
|
||||
if (_impl) {
|
||||
if (_impl && _impl->is_close_required()) {
|
||||
impl* ip = _impl.get();
|
||||
// Abort to enforce calling close() before readers are closed
|
||||
// to prevent leaks and potential use-after-free due to background
|
||||
|
||||
@@ -142,6 +142,7 @@ public:
|
||||
private:
|
||||
tracked_buffer _buffer;
|
||||
size_t _buffer_size = 0;
|
||||
bool _close_required = false;
|
||||
protected:
|
||||
size_t max_buffer_size_in_bytes = default_max_buffer_size_in_bytes();
|
||||
bool _end_of_stream = false;
|
||||
@@ -175,6 +176,8 @@ public:
|
||||
bool is_end_of_stream() const { return _end_of_stream; }
|
||||
bool is_buffer_empty() const { return _buffer.empty(); }
|
||||
bool is_buffer_full() const { return _buffer_size >= max_buffer_size_in_bytes; }
|
||||
bool is_close_required() const { return _close_required; }
|
||||
void set_close_required() { _close_required = true; }
|
||||
static constexpr size_t default_max_buffer_size_in_bytes() { return 8 * 1024; }
|
||||
|
||||
mutation_fragment pop_mutation_fragment() {
|
||||
@@ -506,9 +509,15 @@ public:
|
||||
//
|
||||
// Can be used to skip over entire partitions if interleaved with
|
||||
// `operator()()` calls.
|
||||
future<> next_partition() { return _impl->next_partition(); }
|
||||
future<> next_partition() {
|
||||
_impl->set_close_required();
|
||||
return _impl->next_partition();
|
||||
}
|
||||
|
||||
future<> fill_buffer() { return _impl->fill_buffer(); }
|
||||
future<> fill_buffer() {
|
||||
_impl->set_close_required();
|
||||
return _impl->fill_buffer();
|
||||
}
|
||||
|
||||
// Changes the range of partitions to pr. The range can only be moved
|
||||
// forwards. pr.begin() needs to be larger than pr.end() of the previousl
|
||||
@@ -517,6 +526,7 @@ public:
|
||||
// pr needs to be valid until the reader is destroyed or fast_forward_to()
|
||||
// is called again.
|
||||
future<> fast_forward_to(const dht::partition_range& pr) {
|
||||
_impl->set_close_required();
|
||||
return _impl->fast_forward_to(pr);
|
||||
}
|
||||
// Skips to a later range of rows.
|
||||
@@ -546,6 +556,7 @@ public:
|
||||
// In particular one must first enter a partition by fetching a `partition_start`
|
||||
// fragment before calling `fast_forward_to`.
|
||||
future<> fast_forward_to(position_range cr) {
|
||||
_impl->set_close_required();
|
||||
return _impl->fast_forward_to(std::move(cr));
|
||||
}
|
||||
// Closes the reader.
|
||||
|
||||
@@ -177,6 +177,7 @@ public:
|
||||
private:
|
||||
tracked_buffer _buffer;
|
||||
size_t _buffer_size = 0;
|
||||
bool _close_required = false;
|
||||
protected:
|
||||
size_t max_buffer_size_in_bytes = default_max_buffer_size_in_bytes();
|
||||
|
||||
@@ -216,6 +217,8 @@ public:
|
||||
bool is_end_of_stream() const { return _end_of_stream; }
|
||||
bool is_buffer_empty() const { return _buffer.empty(); }
|
||||
bool is_buffer_full() const { return _buffer_size >= max_buffer_size_in_bytes; }
|
||||
bool is_close_required() const { return _close_required; }
|
||||
void set_close_required() { _close_required = true; }
|
||||
static constexpr size_t default_max_buffer_size_in_bytes() { return 8 * 1024; }
|
||||
|
||||
mutation_fragment_v2 pop_mutation_fragment() {
|
||||
@@ -547,9 +550,15 @@ public:
|
||||
//
|
||||
// Can be used to skip over entire partitions if interleaved with
|
||||
// `operator()()` calls.
|
||||
future<> next_partition() { return _impl->next_partition(); }
|
||||
future<> next_partition() {
|
||||
_impl->set_close_required();
|
||||
return _impl->next_partition();
|
||||
}
|
||||
|
||||
future<> fill_buffer() { return _impl->fill_buffer(); }
|
||||
future<> fill_buffer() {
|
||||
_impl->set_close_required();
|
||||
return _impl->fill_buffer();
|
||||
}
|
||||
|
||||
// Changes the range of partitions to pr. The range can only be moved
|
||||
// forwards. pr.begin() needs to be larger than pr.end() of the previousl
|
||||
@@ -558,6 +567,7 @@ public:
|
||||
// pr needs to be valid until the reader is destroyed or fast_forward_to()
|
||||
// is called again.
|
||||
future<> fast_forward_to(const dht::partition_range& pr) {
|
||||
_impl->set_close_required();
|
||||
return _impl->fast_forward_to(pr);
|
||||
}
|
||||
// Skips to a later range of rows.
|
||||
@@ -587,6 +597,7 @@ public:
|
||||
// In particular one must first enter a partition by fetching a `partition_start`
|
||||
// fragment before calling `fast_forward_to`.
|
||||
future<> fast_forward_to(position_range cr) {
|
||||
_impl->set_close_required();
|
||||
return _impl->fast_forward_to(std::move(cr));
|
||||
}
|
||||
// Closes the reader.
|
||||
|
||||
@@ -1672,6 +1672,10 @@ bool gossiper::is_normal(const inet_address& endpoint) const {
|
||||
return get_gossip_status(endpoint) == sstring(versioned_value::STATUS_NORMAL);
|
||||
}
|
||||
|
||||
bool gossiper::is_left(const inet_address& endpoint) const {
|
||||
return get_gossip_status(endpoint) == sstring(versioned_value::STATUS_LEFT);
|
||||
}
|
||||
|
||||
bool gossiper::is_normal_ring_member(const inet_address& endpoint) const {
|
||||
auto status = get_gossip_status(endpoint);
|
||||
return status == sstring(versioned_value::STATUS_NORMAL) || status == sstring(versioned_value::SHUTDOWN);
|
||||
|
||||
@@ -571,6 +571,7 @@ public:
|
||||
bool is_seed(const inet_address& endpoint) const;
|
||||
bool is_shutdown(const inet_address& endpoint) const;
|
||||
bool is_normal(const inet_address& endpoint) const;
|
||||
bool is_left(const inet_address& endpoint) const;
|
||||
// Check if a node is in NORMAL or SHUTDOWN status which means the node is
|
||||
// part of the token ring from the gossip point of view and operates in
|
||||
// normal status or was in normal status but is shutdown.
|
||||
|
||||
@@ -61,6 +61,10 @@ azure_snitch::azure_snitch(const sstring& fname, unsigned io_cpuid) : production
|
||||
}
|
||||
|
||||
future<> azure_snitch::load_config() {
|
||||
if (this_shard_id() != io_cpu_id()) {
|
||||
co_return;
|
||||
}
|
||||
|
||||
sstring region = co_await azure_api_call(REGION_NAME_QUERY_PATH);
|
||||
sstring azure_zone = co_await azure_api_call(ZONE_NAME_QUERY_PATH);
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
#include "locator/ec2_snitch.hh"
|
||||
#include <seastar/core/seastar.hh>
|
||||
#include <seastar/core/sleep.hh>
|
||||
#include <seastar/core/do_with.hh>
|
||||
|
||||
#include <boost/algorithm/string/classification.hpp>
|
||||
#include <boost/algorithm/string/split.hpp>
|
||||
@@ -67,6 +69,30 @@ future<> ec2_snitch::start() {
|
||||
}
|
||||
|
||||
future<sstring> ec2_snitch::aws_api_call(sstring addr, uint16_t port, sstring cmd) {
|
||||
return do_with(int(0), [this, addr, port, cmd] (int& i) {
|
||||
return repeat_until_value([this, addr, port, cmd, &i]() -> future<std::optional<sstring>> {
|
||||
++i;
|
||||
return aws_api_call_once(addr, port, cmd).then([] (auto res) {
|
||||
return make_ready_future<std::optional<sstring>>(std::move(res));
|
||||
}).handle_exception([&i] (auto ep) {
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
} catch (const std::system_error &e) {
|
||||
logger().error(e.what());
|
||||
if (i >= AWS_API_CALL_RETRIES - 1) {
|
||||
logger().error("Maximum number of retries exceeded");
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
return sleep(AWS_API_CALL_RETRY_INTERVAL).then([] {
|
||||
return make_ready_future<std::optional<sstring>>(std::nullopt);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<sstring> ec2_snitch::aws_api_call_once(sstring addr, uint16_t port, sstring cmd) {
|
||||
return connect(socket_address(inet_address{addr}, port))
|
||||
.then([this, addr, cmd] (connected_socket fd) {
|
||||
_sd = std::move(fd);
|
||||
|
||||
@@ -29,6 +29,8 @@ public:
|
||||
static constexpr const char* ZONE_NAME_QUERY_REQ = "/latest/meta-data/placement/availability-zone";
|
||||
static constexpr const char* AWS_QUERY_SERVER_ADDR = "169.254.169.254";
|
||||
static constexpr uint16_t AWS_QUERY_SERVER_PORT = 80;
|
||||
static constexpr int AWS_API_CALL_RETRIES = 5;
|
||||
static constexpr auto AWS_API_CALL_RETRY_INTERVAL = std::chrono::seconds{5};
|
||||
|
||||
ec2_snitch(const sstring& fname = "", unsigned io_cpu_id = 0);
|
||||
virtual future<> start() override;
|
||||
@@ -45,5 +47,6 @@ private:
|
||||
output_stream<char> _out;
|
||||
http_response_parser _parser;
|
||||
sstring _zone_req;
|
||||
future<sstring> aws_api_call_once(sstring addr, uint16_t port, const sstring cmd);
|
||||
};
|
||||
} // namespace locator
|
||||
|
||||
6
main.cc
6
main.cc
@@ -562,6 +562,12 @@ int main(int ac, char** av) {
|
||||
|
||||
cfg->broadcast_to_all_shards().get();
|
||||
|
||||
// We pass this piece of config through a global as a temporary hack.
|
||||
// See the comment at the definition of sstables::global_cache_index_pages.
|
||||
smp::invoke_on_all([&cfg] {
|
||||
sstables::global_cache_index_pages = cfg->cache_index_pages.operator utils::updateable_value<bool>();
|
||||
}).get();
|
||||
|
||||
::sighup_handler sighup_handler(opts, *cfg);
|
||||
auto stop_sighup_handler = defer_verbose_shutdown("sighup", [&] {
|
||||
sighup_handler.stop().get();
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include <seastar/core/io_priority_class.hh>
|
||||
|
||||
class memtable;
|
||||
class reader_permit;
|
||||
class flat_mutation_reader;
|
||||
|
||||
namespace sstables {
|
||||
|
||||
@@ -442,6 +442,8 @@ static constexpr unsigned do_get_rpc_client_idx(messaging_verb verb) {
|
||||
case messaging_verb::GOSSIP_ECHO:
|
||||
case messaging_verb::GOSSIP_GET_ENDPOINT_STATES:
|
||||
case messaging_verb::GET_SCHEMA_VERSION:
|
||||
// ATTN -- if moving GOSSIP_ verbs elsewhere, mind updating the tcp_nodelay
|
||||
// setting in get_rpc_client(), which assumes gossiper verbs live in idx 0
|
||||
return 0;
|
||||
case messaging_verb::PREPARE_MESSAGE:
|
||||
case messaging_verb::PREPARE_DONE_MESSAGE:
|
||||
@@ -689,7 +691,7 @@ shared_ptr<messaging_service::rpc_protocol_client_wrapper> messaging_service::ge
|
||||
}();
|
||||
|
||||
auto must_tcp_nodelay = [&] {
|
||||
if (idx == 1) {
|
||||
if (idx == 0) {
|
||||
return true; // gossip
|
||||
}
|
||||
if (_cfg.tcp_nodelay == tcp_nodelay_what::local) {
|
||||
@@ -710,10 +712,7 @@ shared_ptr<messaging_service::rpc_protocol_client_wrapper> messaging_service::ge
|
||||
}
|
||||
opts.tcp_nodelay = must_tcp_nodelay;
|
||||
opts.reuseaddr = true;
|
||||
// We send cookies only for non-default statement tenant clients.
|
||||
if (idx > 3) {
|
||||
opts.isolation_cookie = _scheduling_info_for_connection_index[idx].isolation_cookie;
|
||||
}
|
||||
opts.isolation_cookie = _scheduling_info_for_connection_index[idx].isolation_cookie;
|
||||
|
||||
auto client = must_encrypt ?
|
||||
::make_shared<rpc_protocol_client_wrapper>(_rpc->protocol(), std::move(opts),
|
||||
|
||||
@@ -283,8 +283,8 @@ public:
|
||||
|
||||
future<> lookup_readers(db::timeout_clock::time_point timeout);
|
||||
|
||||
future<> save_readers(flat_mutation_reader::tracked_buffer unconsumed_buffer, detached_compaction_state compaction_state,
|
||||
std::optional<clustering_key_prefix> last_ckey);
|
||||
future<> save_readers(flat_mutation_reader::tracked_buffer unconsumed_buffer, std::optional<detached_compaction_state> compaction_state,
|
||||
dht::decorated_key last_pkey, std::optional<clustering_key_prefix> last_ckey);
|
||||
|
||||
future<> stop();
|
||||
};
|
||||
@@ -583,19 +583,22 @@ future<> read_context::lookup_readers(db::timeout_clock::time_point timeout) {
|
||||
});
|
||||
}
|
||||
|
||||
future<> read_context::save_readers(flat_mutation_reader::tracked_buffer unconsumed_buffer, detached_compaction_state compaction_state,
|
||||
std::optional<clustering_key_prefix> last_ckey) {
|
||||
future<> read_context::save_readers(flat_mutation_reader::tracked_buffer unconsumed_buffer, std::optional<detached_compaction_state> compaction_state,
|
||||
dht::decorated_key last_pkey, std::optional<clustering_key_prefix> last_ckey) {
|
||||
if (_cmd.query_uuid == utils::UUID{}) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
auto last_pkey = compaction_state.partition_start.key();
|
||||
|
||||
const auto cb_stats = dismantle_combined_buffer(std::move(unconsumed_buffer), last_pkey);
|
||||
tracing::trace(_trace_state, "Dismantled combined buffer: {}", cb_stats);
|
||||
|
||||
const auto cs_stats = dismantle_compaction_state(std::move(compaction_state));
|
||||
tracing::trace(_trace_state, "Dismantled compaction state: {}", cs_stats);
|
||||
auto cs_stats = dismantle_buffer_stats{};
|
||||
if (compaction_state) {
|
||||
cs_stats = dismantle_compaction_state(std::move(*compaction_state));
|
||||
tracing::trace(_trace_state, "Dismantled compaction state: {}", cs_stats);
|
||||
} else {
|
||||
tracing::trace(_trace_state, "No compaction state to dismantle, partition exhausted", cs_stats);
|
||||
}
|
||||
|
||||
return do_with(std::move(last_pkey), std::move(last_ckey), [this] (const dht::decorated_key& last_pkey,
|
||||
const std::optional<clustering_key_prefix>& last_ckey) {
|
||||
@@ -694,16 +697,18 @@ future<typename ResultBuilder::result_type> do_query(
|
||||
ResultBuilder&& result_builder) {
|
||||
auto ctx = seastar::make_shared<read_context>(db, s, cmd, ranges, trace_state, timeout);
|
||||
|
||||
co_await ctx->lookup_readers(timeout);
|
||||
|
||||
std::exception_ptr ex;
|
||||
|
||||
try {
|
||||
co_await ctx->lookup_readers(timeout);
|
||||
|
||||
auto [last_ckey, result, unconsumed_buffer, compaction_state] = co_await read_page<ResultBuilder>(ctx, s, cmd, ranges, trace_state,
|
||||
std::move(result_builder));
|
||||
|
||||
if (compaction_state->are_limits_reached() || result.is_short_read()) {
|
||||
co_await ctx->save_readers(std::move(unconsumed_buffer), std::move(*compaction_state).detach_state(), std::move(last_ckey));
|
||||
// Must call before calling 'detached_state()`.
|
||||
auto last_pkey = *compaction_state->current_partition();
|
||||
co_await ctx->save_readers(std::move(unconsumed_buffer), std::move(*compaction_state).detach_state(), std::move(last_pkey), std::move(last_ckey));
|
||||
}
|
||||
|
||||
co_await ctx->stop();
|
||||
|
||||
@@ -175,6 +175,9 @@ class compact_mutation_state {
|
||||
std::unique_ptr<mutation_compactor_garbage_collector> _collector;
|
||||
|
||||
compaction_stats _stats;
|
||||
|
||||
// Remember if we requested to stop mid-partition.
|
||||
stop_iteration _stop = stop_iteration::no;
|
||||
private:
|
||||
static constexpr bool only_live() {
|
||||
return OnlyLive == emit_only_live_rows::yes;
|
||||
@@ -270,6 +273,7 @@ public:
|
||||
}
|
||||
|
||||
void consume_new_partition(const dht::decorated_key& dk) {
|
||||
_stop = stop_iteration::no;
|
||||
auto& pk = dk.key();
|
||||
_dk = &dk;
|
||||
_return_static_content_on_partition_with_no_rows =
|
||||
@@ -323,9 +327,9 @@ public:
|
||||
_static_row_live = is_live;
|
||||
if (is_live || (!only_live() && !sr.empty())) {
|
||||
partition_is_not_empty(consumer);
|
||||
return consumer.consume(std::move(sr), current_tombstone, is_live);
|
||||
_stop = consumer.consume(std::move(sr), current_tombstone, is_live);
|
||||
}
|
||||
return stop_iteration::no;
|
||||
return _stop;
|
||||
}
|
||||
|
||||
template <typename Consumer, typename GCConsumer>
|
||||
@@ -370,23 +374,22 @@ public:
|
||||
|
||||
if (only_live() && is_live) {
|
||||
partition_is_not_empty(consumer);
|
||||
auto stop = consumer.consume(std::move(cr), t, true);
|
||||
_stop = consumer.consume(std::move(cr), t, true);
|
||||
if (++_rows_in_current_partition == _current_partition_limit) {
|
||||
return stop_iteration::yes;
|
||||
_stop = stop_iteration::yes;
|
||||
}
|
||||
return stop;
|
||||
return _stop;
|
||||
} else if (!only_live()) {
|
||||
auto stop = stop_iteration::no;
|
||||
if (!cr.empty()) {
|
||||
partition_is_not_empty(consumer);
|
||||
stop = consumer.consume(std::move(cr), t, is_live);
|
||||
_stop = consumer.consume(std::move(cr), t, is_live);
|
||||
}
|
||||
if (!sstable_compaction() && is_live && ++_rows_in_current_partition == _current_partition_limit) {
|
||||
return stop_iteration::yes;
|
||||
_stop = stop_iteration::yes;
|
||||
}
|
||||
return stop;
|
||||
return _stop;
|
||||
}
|
||||
return stop_iteration::no;
|
||||
return _stop;
|
||||
}
|
||||
|
||||
template <typename Consumer, typename GCConsumer>
|
||||
@@ -398,13 +401,13 @@ public:
|
||||
if (rt.tomb > _range_tombstones.get_partition_tombstone()) {
|
||||
if (can_purge_tombstone(rt.tomb)) {
|
||||
partition_is_not_empty_for_gc_consumer(gc_consumer);
|
||||
return gc_consumer.consume(std::move(rt));
|
||||
_stop = gc_consumer.consume(std::move(rt));
|
||||
} else {
|
||||
partition_is_not_empty(consumer);
|
||||
return consumer.consume(std::move(rt));
|
||||
_stop = consumer.consume(std::move(rt));
|
||||
}
|
||||
}
|
||||
return stop_iteration::no;
|
||||
return _stop;
|
||||
}
|
||||
|
||||
template <typename Consumer, typename GCConsumer>
|
||||
@@ -492,9 +495,24 @@ public:
|
||||
/// compactor will result in the new compactor being in the same state *this
|
||||
/// is (given the same outside parameters of course). Practically this
|
||||
/// allows the compaction state to be stored in the compacted reader.
|
||||
detached_compaction_state detach_state() && {
|
||||
/// If the currently compacted partition is exhausted a disengaged optional
|
||||
/// is returned -- in this case there is no state to detach.
|
||||
std::optional<detached_compaction_state> detach_state() && {
|
||||
// If we exhausted the partition, there is no need to detach-restore the
|
||||
// compaction state.
|
||||
// We exhausted the partition if `consume_partition_end()` was called
|
||||
// without us requesting the consumption to stop (remembered in _stop)
|
||||
// from one of the consume() overloads.
|
||||
// The consume algorithm calls `consume_partition_end()` in two cases:
|
||||
// * on a partition-end fragment
|
||||
// * consume() requested to stop
|
||||
// In the latter case, the partition is not exhausted. Even if the next
|
||||
// fragment to process is a partition-end, it will not be consumed.
|
||||
if (!_stop) {
|
||||
return {};
|
||||
}
|
||||
partition_start ps(std::move(_last_dk), _range_tombstones.get_partition_tombstone());
|
||||
return {std::move(ps), std::move(_last_static_row), std::move(_range_tombstones).range_tombstones()};
|
||||
return detached_compaction_state{std::move(ps), std::move(_last_static_row), std::move(_range_tombstones).range_tombstones()};
|
||||
}
|
||||
|
||||
const compaction_stats& stats() const { return _stats; }
|
||||
|
||||
@@ -843,7 +843,6 @@ public:
|
||||
|
||||
void apply(shadowable_tombstone deleted_at) {
|
||||
_deleted_at.apply(deleted_at, _marker);
|
||||
maybe_shadow();
|
||||
}
|
||||
|
||||
void apply(row_tombstone deleted_at) {
|
||||
|
||||
@@ -305,14 +305,23 @@ class partition_snapshot_flat_reader : public flat_mutation_reader::impl, public
|
||||
const std::optional<position_in_partition>& last_row,
|
||||
const std::optional<position_in_partition>& last_rts,
|
||||
position_in_partition_view pos) {
|
||||
if (!_rt_stream.empty()) {
|
||||
return _rt_stream.get_next(std::move(pos));
|
||||
}
|
||||
return in_alloc_section([&] () -> mutation_fragment_opt {
|
||||
maybe_refresh_state(ck_range_snapshot, last_row, last_rts);
|
||||
|
||||
position_in_partition::less_compare rt_less(_query_schema);
|
||||
|
||||
// The while below moves range tombstones from partition versions
|
||||
// into _rt_stream, just enough to produce the next range tombstone
|
||||
// The main goal behind moving to _rt_stream is to deoverlap range tombstones
|
||||
// which have the same starting position. This is not in order to satisfy
|
||||
// flat_mutation_reader stream requirements, the reader can emit range tombstones
|
||||
// which have the same position incrementally. This is to guarantee forward
|
||||
// progress in the case iterators get invalidated and maybe_refresh_state()
|
||||
// above needs to restore them. It does so using last_rts, which tracks
|
||||
// the position of the last emitted range tombstone. All range tombstones
|
||||
// with positions <= than last_rts are skipped on refresh. To make progress,
|
||||
// we need to make sure that all range tombstones with duplicated positions
|
||||
// are emitted before maybe_refresh_state().
|
||||
while (has_more_range_tombstones()
|
||||
&& !rt_less(pos, peek_range_tombstone().position())
|
||||
&& (_rt_stream.empty() || !rt_less(_rt_stream.peek_next().position(), peek_range_tombstone().position()))) {
|
||||
|
||||
@@ -325,7 +325,7 @@ public:
|
||||
// When throws, the cursor is invalidated and its position is not changed.
|
||||
bool advance_to(position_in_partition_view lower_bound) {
|
||||
prepare_heap(lower_bound);
|
||||
bool found = no_clustering_row_between(_schema, lower_bound, _heap[0].it->position());
|
||||
bool found = no_clustering_row_between_weak(_schema, lower_bound, _heap[0].it->position());
|
||||
recreate_current_row();
|
||||
return found;
|
||||
}
|
||||
|
||||
@@ -575,6 +575,20 @@ bool no_clustering_row_between(const schema& s, position_in_partition_view a, po
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if and only if there can't be any clustering_row with position >= a and < b.
|
||||
// It is assumed that a <= b.
|
||||
inline
|
||||
bool no_clustering_row_between_weak(const schema& s, position_in_partition_view a, position_in_partition_view b) {
|
||||
clustering_key_prefix::equality eq(s);
|
||||
if (a.has_key() && b.has_key()) {
|
||||
return eq(a.key(), b.key())
|
||||
&& (a.get_bound_weight() == bound_weight::after_all_prefixed
|
||||
|| b.get_bound_weight() != bound_weight::after_all_prefixed);
|
||||
} else {
|
||||
return !a.has_key() && !b.has_key();
|
||||
}
|
||||
}
|
||||
|
||||
// Includes all position_in_partition objects "p" for which: start <= p < end
|
||||
// And only those.
|
||||
class position_range {
|
||||
@@ -659,3 +673,9 @@ inline
|
||||
bool position_range::is_all_clustered_rows(const schema& s) const {
|
||||
return _start.is_before_all_clustered_rows(s) && _end.is_after_all_clustered_rows(s);
|
||||
}
|
||||
|
||||
// Assumes that the bounds of `r` are of 'clustered' type
|
||||
// and that `r` is non-empty (the left bound is smaller than the right bound).
|
||||
//
|
||||
// If `r` does not contain any keys, returns nullopt.
|
||||
std::optional<query::clustering_range> position_range_to_clustering_range(const position_range& r, const schema&);
|
||||
|
||||
49
query.cc
49
query.cc
@@ -379,3 +379,52 @@ foreign_ptr<lw_shared_ptr<query::result>> result_merger::get() {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::optional<query::clustering_range> position_range_to_clustering_range(const position_range& r, const schema& s) {
|
||||
assert(r.start().get_type() == partition_region::clustered);
|
||||
assert(r.end().get_type() == partition_region::clustered);
|
||||
|
||||
if (r.start().has_key() && r.end().has_key()
|
||||
&& clustering_key_prefix::equality(s)(r.start().key(), r.end().key())) {
|
||||
assert(r.start().get_bound_weight() != r.end().get_bound_weight());
|
||||
|
||||
if (r.end().get_bound_weight() == bound_weight::after_all_prefixed
|
||||
&& r.start().get_bound_weight() != bound_weight::after_all_prefixed) {
|
||||
// [before x, after x) and [for x, after x) get converted to [x, x].
|
||||
return query::clustering_range::make_singular(r.start().key());
|
||||
}
|
||||
|
||||
// [before x, for x) does not contain any keys.
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// position_range -> clustering_range
|
||||
// (recall that position_ranges are always left-closed, right opened):
|
||||
// [before x, ...), [for x, ...) -> [x, ...
|
||||
// [after x, ...) -> (x, ...
|
||||
// [..., before x), [..., for x) -> ..., x)
|
||||
// [..., after x) -> ..., x]
|
||||
|
||||
auto to_bound = [&s] (const position_in_partition& p, bool left) -> std::optional<query::clustering_range::bound> {
|
||||
if (p.is_before_all_clustered_rows(s)) {
|
||||
assert(left);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (p.is_after_all_clustered_rows(s)) {
|
||||
assert(!left);
|
||||
return {};
|
||||
}
|
||||
|
||||
assert(p.has_key());
|
||||
|
||||
auto bw = p.get_bound_weight();
|
||||
bool inclusive = left
|
||||
? bw != bound_weight::after_all_prefixed
|
||||
: bw == bound_weight::after_all_prefixed;
|
||||
|
||||
return query::clustering_range::bound{p.key(), inclusive};
|
||||
};
|
||||
|
||||
return query::clustering_range{to_bound(r.start(), true), to_bound(r.end(), false)};
|
||||
}
|
||||
|
||||
@@ -109,7 +109,7 @@ void range_tombstone_list::insert_from(const schema& s,
|
||||
if (cmp(end, it->position()) < 0) {
|
||||
// not overlapping
|
||||
if (it->tombstone().tomb == tomb && cmp(end, it->position()) == 0) {
|
||||
rev.update(it, {std::move(start), std::move(start), tomb});
|
||||
rev.update(it, {std::move(start), std::move(end), tomb});
|
||||
} else {
|
||||
auto rt = construct_range_tombstone_entry(std::move(start), std::move(end), tomb);
|
||||
rev.insert(it, *rt);
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "mutation_fragment.hh"
|
||||
#include "mutation_fragment_v2.hh"
|
||||
#include "converting_mutation_partition_applier.hh"
|
||||
|
||||
// A StreamedMutationTransformer which transforms the stream to a different schema
|
||||
|
||||
2
seastar
2
seastar
Submodule seastar updated: 34e58f9995...6217d6ff4e
@@ -1155,7 +1155,7 @@ private:
|
||||
}
|
||||
index_reader& get_index_reader() {
|
||||
if (!_index_reader) {
|
||||
auto caching = use_caching(!_slice.options.contains(query::partition_slice::option::bypass_cache));
|
||||
auto caching = use_caching(global_cache_index_pages && !_slice.options.contains(query::partition_slice::option::bypass_cache));
|
||||
_index_reader = std::make_unique<index_reader>(_sst, _consumer.permit(), _consumer.io_priority(),
|
||||
_consumer.trace_state(), caching);
|
||||
}
|
||||
|
||||
@@ -1308,7 +1308,7 @@ private:
|
||||
}
|
||||
index_reader& get_index_reader() {
|
||||
if (!_index_reader) {
|
||||
auto caching = use_caching(!_slice.options.contains(query::partition_slice::option::bypass_cache));
|
||||
auto caching = use_caching(global_cache_index_pages && !_slice.options.contains(query::partition_slice::option::bypass_cache));
|
||||
_index_reader = std::make_unique<index_reader>(_sst, _consumer.permit(), _consumer.io_priority(),
|
||||
_consumer.trace_state(), caching);
|
||||
}
|
||||
@@ -1745,9 +1745,7 @@ public:
|
||||
_monitor.on_read_started(_context->reader_position());
|
||||
}
|
||||
public:
|
||||
void on_out_of_clustering_range() override {
|
||||
push_mutation_fragment(mutation_fragment_v2(*_schema, _permit, partition_end()));
|
||||
}
|
||||
void on_out_of_clustering_range() override { }
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr) override {
|
||||
on_internal_error(sstlog, "mx_crawling_sstable_mutation_reader: doesn't support fast_forward_to(const dht::partition_range&)");
|
||||
}
|
||||
|
||||
@@ -68,7 +68,12 @@ private:
|
||||
entry(entry&&) noexcept = default;
|
||||
|
||||
~entry() {
|
||||
assert(!is_referenced());
|
||||
if (is_referenced()) {
|
||||
// Live entry_ptr should keep the entry alive, except when the entry failed on loading.
|
||||
// In that case, entry_ptr holders are not supposed to use the pointer, so it's safe
|
||||
// to nullify those entry_ptrs.
|
||||
assert(!ready());
|
||||
}
|
||||
}
|
||||
|
||||
void on_evicted() noexcept override;
|
||||
|
||||
@@ -400,10 +400,15 @@ void time_series_sstable_set::for_each_sstable(std::function<void(const shared_s
|
||||
|
||||
// O(log n)
|
||||
void time_series_sstable_set::insert(shared_sstable sst) {
|
||||
try {
|
||||
auto min_pos = sst->min_position();
|
||||
auto max_pos_reversed = sst->max_position().reversed();
|
||||
_sstables->emplace(std::move(min_pos), sst);
|
||||
_sstables_reversed->emplace(std::move(max_pos_reversed), std::move(sst));
|
||||
} catch (...) {
|
||||
erase(sst);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
// O(n) worst case, but should be close to O(log n) most of the time
|
||||
|
||||
@@ -94,6 +94,18 @@ thread_local disk_error_signal_type sstable_write_error;
|
||||
|
||||
namespace sstables {
|
||||
|
||||
// The below flag governs the mode of index file page caching used by the index
|
||||
// reader.
|
||||
//
|
||||
// If set to true, the reader will read and/or populate a common global cache,
|
||||
// which shares its capacity with the row cache. If false, the reader will use
|
||||
// BYPASS CACHE semantics for index caching.
|
||||
//
|
||||
// This flag is intended to be a temporary hack. The goal is to eventually
|
||||
// solve index caching problems via a smart cache replacement policy.
|
||||
//
|
||||
thread_local utils::updateable_value<bool> global_cache_index_pages(false);
|
||||
|
||||
logging::logger sstlog("sstable");
|
||||
|
||||
// Because this is a noop and won't hold any state, it is better to use a global than a
|
||||
|
||||
@@ -61,6 +61,7 @@
|
||||
#include "sstables/open_info.hh"
|
||||
#include "query-request.hh"
|
||||
#include "mutation_fragment_stream_validator.hh"
|
||||
#include "utils/updateable_value.hh"
|
||||
|
||||
#include <seastar/util/optimized_optional.hh>
|
||||
|
||||
@@ -70,6 +71,8 @@ class cached_file;
|
||||
|
||||
namespace sstables {
|
||||
|
||||
extern thread_local utils::updateable_value<bool> global_cache_index_pages;
|
||||
|
||||
namespace mc {
|
||||
class writer;
|
||||
}
|
||||
|
||||
3
table.cc
3
table.cc
@@ -1493,13 +1493,14 @@ bool table::can_flush() const {
|
||||
}
|
||||
|
||||
future<> table::clear() {
|
||||
auto permits = co_await _config.dirty_memory_manager->get_all_flush_permits();
|
||||
if (_commitlog) {
|
||||
for (auto& t : *_memtables) {
|
||||
_commitlog->discard_completed_segments(_schema->id(), t->get_and_discard_rp_set());
|
||||
}
|
||||
}
|
||||
_memtables->clear_and_add();
|
||||
return _cache.invalidate(row_cache::external_updater([] { /* There is no underlying mutation source */ }));
|
||||
co_await _cache.invalidate(row_cache::external_updater([] { /* There is no underlying mutation source */ }));
|
||||
}
|
||||
|
||||
// NOTE: does not need to be futurized, but might eventually, depending on
|
||||
|
||||
@@ -107,9 +107,9 @@ def test_describe_table_size(test_table):
|
||||
# Test the ProvisionedThroughput attribute returned by DescribeTable.
|
||||
# This is a very partial test: Our test table is configured without
|
||||
# provisioned throughput, so obviously it will not have interesting settings
|
||||
# for it. DynamoDB returns zeros for some of the attributes, even though
|
||||
# the documentation suggests missing values should have been fine too.
|
||||
@pytest.mark.xfail(reason="DescribeTable does not return provisioned throughput")
|
||||
# for it. But DynamoDB documents that zeros be returned for WriteCapacityUnits
|
||||
# and ReadCapacityUnits, and does this in practice as well - and some
|
||||
# applications assume these numbers are always there (even if 0).
|
||||
def test_describe_table_provisioned_throughput(test_table):
|
||||
got = test_table.meta.client.describe_table(TableName=test_table.name)['Table']
|
||||
assert got['ProvisionedThroughput']['NumberOfDecreasesToday'] == 0
|
||||
|
||||
@@ -438,6 +438,126 @@ def test_gsi_update_second_regular_base_column(test_table_gsi_3):
|
||||
KeyConditions={'a': {'AttributeValueList': [items[3]['a']], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [items[3]['b']], 'ComparisonOperator': 'EQ'}})
|
||||
|
||||
# Test reproducing issue #11801: In issue #5006 we noticed that in the special
|
||||
# case of a GSI with with two non-key attributes as keys (test_table_gsi_3),
|
||||
# an update of the second attribute forgot to delete the old row. We fixed
|
||||
# that bug, but a bug remained for updates which update the value to the *same*
|
||||
# value - in that case the old row shouldn't be deleted, but we did - as
|
||||
# noticed in issue #11801.
|
||||
def test_11801(test_table_gsi_3):
|
||||
p = random_string()
|
||||
a = random_string()
|
||||
b = random_string()
|
||||
item = {'p': p, 'a': a, 'b': b, 'd': random_string()}
|
||||
test_table_gsi_3.put_item(Item=item)
|
||||
assert_index_query(test_table_gsi_3, 'hello', [item],
|
||||
KeyConditions={'a': {'AttributeValueList': [a], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [b], 'ComparisonOperator': 'EQ'}})
|
||||
# Update the attribute 'b' to the same value b that it already had.
|
||||
# This shouldn't change anything in the base table or in the GSI
|
||||
test_table_gsi_3.update_item(Key={'p': p}, AttributeUpdates={'b': {'Value': b, 'Action': 'PUT'}})
|
||||
assert item == test_table_gsi_3.get_item(Key={'p': p}, ConsistentRead=True)['Item']
|
||||
# In issue #11801, the following assertion failed (the view row was
|
||||
# deleted and nothing matched the query).
|
||||
assert_index_query(test_table_gsi_3, 'hello', [item],
|
||||
KeyConditions={'a': {'AttributeValueList': [a], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [b], 'ComparisonOperator': 'EQ'}})
|
||||
# Above we checked that setting 'b' to the same value didn't remove
|
||||
# the old GSI row. But the same update may actually modify the GSI row
|
||||
# (e.g., an unrelated attribute d) - check this modification took place:
|
||||
item['d'] = random_string()
|
||||
test_table_gsi_3.update_item(Key={'p': p},
|
||||
AttributeUpdates={'b': {'Value': b, 'Action': 'PUT'},
|
||||
'd': {'Value': item['d'], 'Action': 'PUT'}})
|
||||
assert item == test_table_gsi_3.get_item(Key={'p': p}, ConsistentRead=True)['Item']
|
||||
assert_index_query(test_table_gsi_3, 'hello', [item],
|
||||
KeyConditions={'a': {'AttributeValueList': [a], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [b], 'ComparisonOperator': 'EQ'}})
|
||||
|
||||
# This test is the same as test_11801, but updating the first attribute (a)
|
||||
# instead of the second (b). This test didn't fail, showing that issue #11801
|
||||
# is - like #5006 - specific to the case of updating the second attribute.
|
||||
def test_11801_variant1(test_table_gsi_3):
|
||||
p = random_string()
|
||||
a = random_string()
|
||||
b = random_string()
|
||||
d = random_string()
|
||||
item = {'p': p, 'a': a, 'b': b, 'd': d}
|
||||
test_table_gsi_3.put_item(Item=item)
|
||||
assert_index_query(test_table_gsi_3, 'hello', [item],
|
||||
KeyConditions={'a': {'AttributeValueList': [a], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [b], 'ComparisonOperator': 'EQ'}})
|
||||
test_table_gsi_3.update_item(Key={'p': p}, AttributeUpdates={'a': {'Value': a, 'Action': 'PUT'}})
|
||||
assert_index_query(test_table_gsi_3, 'hello', [item],
|
||||
KeyConditions={'a': {'AttributeValueList': [a], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [b], 'ComparisonOperator': 'EQ'}})
|
||||
|
||||
# This test is the same as test_11801, but updates b to a different value
|
||||
# (newb) instead of to the same one. This test didn't fail, showing that
|
||||
# issue #11801 is specific to updates to the same value. This test basically
|
||||
# reproduces the already-fixed #5006 (we also have another test above which
|
||||
# reproduces that issue - test_gsi_update_second_regular_base_column())
|
||||
def test_11801_variant2(test_table_gsi_3):
|
||||
p = random_string()
|
||||
a = random_string()
|
||||
b = random_string()
|
||||
item = {'p': p, 'a': a, 'b': b, 'd': random_string()}
|
||||
test_table_gsi_3.put_item(Item=item)
|
||||
assert_index_query(test_table_gsi_3, 'hello', [item],
|
||||
KeyConditions={'a': {'AttributeValueList': [a], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [b], 'ComparisonOperator': 'EQ'}})
|
||||
newb = random_string()
|
||||
item['b'] = newb
|
||||
test_table_gsi_3.update_item(Key={'p': p}, AttributeUpdates={'b': {'Value': newb, 'Action': 'PUT'}})
|
||||
assert_index_query(test_table_gsi_3, 'hello', [],
|
||||
KeyConditions={'a': {'AttributeValueList': [a], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [b], 'ComparisonOperator': 'EQ'}})
|
||||
assert_index_query(test_table_gsi_3, 'hello', [item],
|
||||
KeyConditions={'a': {'AttributeValueList': [a], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [newb], 'ComparisonOperator': 'EQ'}})
|
||||
|
||||
# This test is the same as test_11801, but uses a different table schema
|
||||
# (test_table_gsi_5) where there is only one new key column in the view (x).
|
||||
# This test passed, showing that issue #11801 was specific to the special
|
||||
# case of a view with two new key columns (test_table_gsi_3).
|
||||
def test_11801_variant3(test_table_gsi_5):
|
||||
p = random_string()
|
||||
c = random_string()
|
||||
x = random_string()
|
||||
item = {'p': p, 'c': c, 'x': x, 'd': random_string()}
|
||||
test_table_gsi_5.put_item(Item=item)
|
||||
assert_index_query(test_table_gsi_5, 'hello', [item],
|
||||
KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'},
|
||||
'x': {'AttributeValueList': [x], 'ComparisonOperator': 'EQ'}})
|
||||
test_table_gsi_5.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'x': {'Value': x, 'Action': 'PUT'}})
|
||||
assert item == test_table_gsi_5.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item']
|
||||
assert_index_query(test_table_gsi_5, 'hello', [item],
|
||||
KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'},
|
||||
'x': {'AttributeValueList': [x], 'ComparisonOperator': 'EQ'}})
|
||||
|
||||
# Another test similar to test_11801, but instead of updating a view key
|
||||
# column to the same value it already has, simply don't update it at all
|
||||
# (and just modify some other regular column). This test passed, showing
|
||||
# that issue #11801 is specific to the case of updating a view key column
|
||||
# to the same value it already had.
|
||||
def test_11801_variant4(test_table_gsi_3):
|
||||
p = random_string()
|
||||
a = random_string()
|
||||
b = random_string()
|
||||
item = {'p': p, 'a': a, 'b': b, 'd': random_string()}
|
||||
test_table_gsi_3.put_item(Item=item)
|
||||
assert_index_query(test_table_gsi_3, 'hello', [item],
|
||||
KeyConditions={'a': {'AttributeValueList': [a], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [b], 'ComparisonOperator': 'EQ'}})
|
||||
# An update that doesn't change the GSI keys (a or b), just a regular
|
||||
# column d.
|
||||
item['d'] = random_string()
|
||||
test_table_gsi_3.update_item(Key={'p': p}, AttributeUpdates={'d': {'Value': item['d'], 'Action': 'PUT'}})
|
||||
assert item == test_table_gsi_3.get_item(Key={'p': p}, ConsistentRead=True)['Item']
|
||||
assert_index_query(test_table_gsi_3, 'hello', [item],
|
||||
KeyConditions={'a': {'AttributeValueList': [a], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [b], 'ComparisonOperator': 'EQ'}})
|
||||
|
||||
# Test that when a table has a GSI, if the indexed attribute is missing, the
|
||||
# item is added to the base table but not the index.
|
||||
# This is the same feature we already tested in test_gsi_missing_attribute()
|
||||
|
||||
@@ -374,6 +374,14 @@ def test_getitem_attributes_to_get_duplicate(dynamodb, test_table):
|
||||
with pytest.raises(ClientError, match='ValidationException.*Duplicate'):
|
||||
test_table.get_item(Key={'p': p, 'c': c}, AttributesToGet=['a', 'a'], ConsistentRead=True)
|
||||
|
||||
# Verify that it is forbidden to ask for an empty AttributesToGet
|
||||
# Reproduces issue #10332.
|
||||
def test_getitem_attributes_to_get_empty(dynamodb, test_table):
|
||||
p = random_string()
|
||||
c = random_string()
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.get_item(Key={'p': p, 'c': c}, AttributesToGet=[], ConsistentRead=True)
|
||||
|
||||
# Basic test for DeleteItem, with hash key only
|
||||
def test_delete_item_hash(test_table_s):
|
||||
p = random_string()
|
||||
|
||||
@@ -170,6 +170,13 @@ def test_query_attributes_to_get(dynamodb, test_table):
|
||||
expected_items = [{k: x[k] for k in wanted if k in x} for x in items]
|
||||
assert multiset(expected_items) == multiset(got_items)
|
||||
|
||||
# Verify that it is forbidden to ask for an empty AttributesToGet
|
||||
# Reproduces issue #10332.
|
||||
def test_query_attributes_to_get_empty(dynamodb, test_table):
|
||||
p = random_string()
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
full_query(test_table, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, AttributesToGet=[])
|
||||
|
||||
# Test that in a table with both hash key and sort key, which keys we can
|
||||
# Query by: We can Query by the hash key, by a combination of both hash and
|
||||
# sort keys, but *cannot* query by just the sort key, and obviously not
|
||||
|
||||
@@ -25,6 +25,8 @@
|
||||
#include <deque>
|
||||
#include <random>
|
||||
#include "utils/lsa/chunked_managed_vector.hh"
|
||||
#include "utils/managed_ref.hh"
|
||||
#include "test/lib/log.hh"
|
||||
|
||||
#include <boost/range/algorithm/sort.hpp>
|
||||
#include <boost/range/algorithm/equal.hpp>
|
||||
@@ -216,3 +218,106 @@ SEASTAR_TEST_CASE(tests_reserve_partial) {
|
||||
});
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_clear_and_release) {
|
||||
region region;
|
||||
allocating_section as;
|
||||
|
||||
with_allocator(region.allocator(), [&] {
|
||||
lsa::chunked_managed_vector<managed_ref<uint64_t>> v;
|
||||
|
||||
for (uint64_t i = 1; i < 4000; ++i) {
|
||||
as(region, [&] {
|
||||
v.emplace_back(make_managed<uint64_t>(i));
|
||||
});
|
||||
}
|
||||
|
||||
v.clear_and_release();
|
||||
});
|
||||
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_chunk_reserve) {
|
||||
region region;
|
||||
allocating_section as;
|
||||
|
||||
for (auto conf :
|
||||
{ // std::make_pair(reserve size, push count)
|
||||
std::make_pair(0, 4000),
|
||||
std::make_pair(100, 4000),
|
||||
std::make_pair(200, 4000),
|
||||
std::make_pair(1000, 4000),
|
||||
std::make_pair(2000, 4000),
|
||||
std::make_pair(3000, 4000),
|
||||
std::make_pair(5000, 4000),
|
||||
std::make_pair(500, 8000),
|
||||
std::make_pair(1000, 8000),
|
||||
std::make_pair(2000, 8000),
|
||||
std::make_pair(8000, 500),
|
||||
})
|
||||
{
|
||||
with_allocator(region.allocator(), [&] {
|
||||
auto [reserve_size, push_count] = conf;
|
||||
testlog.info("Testing reserve({}), {}x emplace_back()", reserve_size, push_count);
|
||||
lsa::chunked_managed_vector<managed_ref<uint64_t>> v;
|
||||
v.reserve(reserve_size);
|
||||
uint64_t seed = rand();
|
||||
for (uint64_t i = 0; i < push_count; ++i) {
|
||||
as(region, [&] {
|
||||
v.emplace_back(make_managed<uint64_t>(seed + i));
|
||||
BOOST_REQUIRE(**v.begin() == seed);
|
||||
});
|
||||
}
|
||||
auto v_it = v.begin();
|
||||
for (uint64_t i = 0; i < push_count; ++i) {
|
||||
BOOST_REQUIRE(**v_it++ == seed + i);
|
||||
}
|
||||
v.clear_and_release();
|
||||
});
|
||||
}
|
||||
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
// Tests the case of make_room() invoked with last_chunk_capacity_deficit but _size not in
|
||||
// the last reserved chunk.
|
||||
SEASTAR_TEST_CASE(test_shrinking_and_expansion_involving_chunk_boundary) {
|
||||
region region;
|
||||
allocating_section as;
|
||||
|
||||
with_allocator(region.allocator(), [&] {
|
||||
lsa::chunked_managed_vector<managed_ref<uint64_t>> v;
|
||||
|
||||
// Fill two chunks
|
||||
v.reserve(2000);
|
||||
for (uint64_t i = 0; i < 2000; ++i) {
|
||||
as(region, [&] {
|
||||
v.emplace_back(make_managed<uint64_t>(i));
|
||||
});
|
||||
}
|
||||
|
||||
// Make the last chunk smaller than max size to trigger the last_chunk_capacity_deficit path in make_room()
|
||||
v.shrink_to_fit();
|
||||
|
||||
// Leave the last chunk reserved but empty
|
||||
for (uint64_t i = 0; i < 1000; ++i) {
|
||||
v.pop_back();
|
||||
}
|
||||
|
||||
// Try to reserve more than the currently reserved capacity and trigger last_chunk_capacity_deficit path
|
||||
// with _size not in the last chunk. Should not sigsegv.
|
||||
v.reserve(8000);
|
||||
|
||||
for (uint64_t i = 0; i < 2000; ++i) {
|
||||
as(region, [&] {
|
||||
v.emplace_back(make_managed<uint64_t>(i));
|
||||
});
|
||||
}
|
||||
|
||||
v.clear_and_release();
|
||||
});
|
||||
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
|
||||
@@ -191,3 +191,32 @@ BOOST_AUTO_TEST_CASE(tests_reserve_partial) {
|
||||
BOOST_REQUIRE_EQUAL(v.capacity(), orig_size);
|
||||
}
|
||||
}
|
||||
|
||||
// Tests the case of make_room() invoked with last_chunk_capacity_deficit but _size not in
|
||||
// the last reserved chunk.
|
||||
BOOST_AUTO_TEST_CASE(test_shrinking_and_expansion_involving_chunk_boundary) {
|
||||
using vector_type = utils::chunked_vector<std::unique_ptr<uint64_t>>;
|
||||
vector_type v;
|
||||
|
||||
// Fill two chunks
|
||||
v.reserve(vector_type::max_chunk_capacity() * 3 / 2);
|
||||
for (uint64_t i = 0; i < vector_type::max_chunk_capacity() * 3 / 2; ++i) {
|
||||
v.emplace_back(std::make_unique<uint64_t>(i));
|
||||
}
|
||||
|
||||
// Make the last chunk smaller than max size to trigger the last_chunk_capacity_deficit path in make_room()
|
||||
v.shrink_to_fit();
|
||||
|
||||
// Leave the last chunk reserved but empty
|
||||
for (uint64_t i = 0; i < vector_type::max_chunk_capacity(); ++i) {
|
||||
v.pop_back();
|
||||
}
|
||||
|
||||
// Try to reserve more than the currently reserved capacity and trigger last_chunk_capacity_deficit path
|
||||
// with _size not in the last chunk. Should not sigsegv.
|
||||
v.reserve(vector_type::max_chunk_capacity() * 4);
|
||||
|
||||
for (uint64_t i = 0; i < vector_type::max_chunk_capacity() * 2; ++i) {
|
||||
v.emplace_back(std::make_unique<uint64_t>(i));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,7 +32,7 @@
|
||||
#include "cql3/util.hh"
|
||||
|
||||
//
|
||||
// Test basic CQL string quoting
|
||||
// Test basic CQL identifier quoting
|
||||
//
|
||||
BOOST_AUTO_TEST_CASE(maybe_quote) {
|
||||
std::string s(65536, 'x');
|
||||
@@ -67,6 +67,16 @@ BOOST_AUTO_TEST_CASE(maybe_quote) {
|
||||
BOOST_REQUIRE_EQUAL(cql3::util::maybe_quote("\"\""), "\"\"\"\"\"\"");
|
||||
BOOST_REQUIRE_EQUAL(cql3::util::maybe_quote("\"hell0\""), "\"\"\"hell0\"\"\"");
|
||||
BOOST_REQUIRE_EQUAL(cql3::util::maybe_quote("hello \"my\" world"), "\"hello \"\"my\"\" world\"");
|
||||
|
||||
// Reproducer for issue #9450. Reserved keywords like "to" or "where"
|
||||
// need quoting, but unreserved keywords like "ttl", "int" or "as",
|
||||
// do not.
|
||||
BOOST_REQUIRE_EQUAL(cql3::util::maybe_quote("to"), "\"to\"");
|
||||
BOOST_REQUIRE_EQUAL(cql3::util::maybe_quote("where"), "\"where\"");
|
||||
BOOST_REQUIRE_EQUAL(cql3::util::maybe_quote("ttl"), "ttl");
|
||||
BOOST_REQUIRE_EQUAL(cql3::util::maybe_quote("int"), "int");
|
||||
BOOST_REQUIRE_EQUAL(cql3::util::maybe_quote("as"), "as");
|
||||
BOOST_REQUIRE_EQUAL(cql3::util::maybe_quote("ttl hi"), "\"ttl hi\"");
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
@@ -784,3 +784,38 @@ SEASTAR_TEST_CASE(upgrade_sstables) {
|
||||
}).get();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(database_drop_column_family_clears_querier_cache) {
|
||||
return do_with_cql_env_thread([] (cql_test_env& e) {
|
||||
e.execute_cql("create table ks.cf (k text, v int, primary key (k));").get();
|
||||
auto& db = e.local_db();
|
||||
const auto ts = db_clock::now();
|
||||
auto& tbl = db.find_column_family("ks", "cf");
|
||||
|
||||
auto op = std::optional(tbl.read_in_progress());
|
||||
auto s = tbl.schema();
|
||||
auto q = query::data_querier(
|
||||
tbl.as_mutation_source(),
|
||||
tbl.schema(),
|
||||
database_test(db).get_user_read_concurrency_semaphore().make_tracking_only_permit(s.get(), "test", db::no_timeout),
|
||||
query::full_partition_range,
|
||||
s->full_slice(),
|
||||
default_priority_class(),
|
||||
nullptr);
|
||||
|
||||
auto f = e.db().invoke_on_all([ts] (database& db) {
|
||||
return db.drop_column_family("ks", "cf", [ts] { return make_ready_future<db_clock::time_point>(ts); });
|
||||
});
|
||||
|
||||
// we add a querier to the querier cache while the drop is ongoing
|
||||
auto& qc = db.get_querier_cache();
|
||||
qc.insert(utils::make_random_uuid(), std::move(q), nullptr);
|
||||
BOOST_REQUIRE_EQUAL(qc.get_stats().population, 1);
|
||||
|
||||
op.reset(); // this should allow the drop to finish
|
||||
f.get();
|
||||
|
||||
// the drop should have cleaned up all entries belonging to that table
|
||||
BOOST_REQUIRE_EQUAL(qc.get_stats().population, 0);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -941,3 +941,28 @@ SEASTAR_THREAD_TEST_CASE(test_reverse_reader_is_mutation_source) {
|
||||
};
|
||||
run_mutation_source_tests(populate);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_allow_reader_early_destruction) {
|
||||
struct test_reader_impl : public flat_mutation_reader::impl {
|
||||
using flat_mutation_reader::impl::impl;
|
||||
virtual future<> fill_buffer() override { return make_ready_future<>(); }
|
||||
virtual future<> next_partition() override { return make_ready_future<>(); }
|
||||
virtual future<> fast_forward_to(const dht::partition_range&) override { return make_ready_future<>(); }
|
||||
virtual future<> fast_forward_to(position_range) override { return make_ready_future<>(); }
|
||||
virtual future<> close() noexcept override { return make_ready_future<>(); };
|
||||
};
|
||||
struct test_reader_v2_impl : public flat_mutation_reader_v2::impl {
|
||||
using flat_mutation_reader_v2::impl::impl;
|
||||
virtual future<> fill_buffer() override { return make_ready_future<>(); }
|
||||
virtual future<> next_partition() override { return make_ready_future<>(); }
|
||||
virtual future<> fast_forward_to(const dht::partition_range&) override { return make_ready_future<>(); }
|
||||
virtual future<> fast_forward_to(position_range) override { return make_ready_future<>(); }
|
||||
virtual future<> close() noexcept override { return make_ready_future<>(); };
|
||||
};
|
||||
|
||||
simple_schema s;
|
||||
tests::reader_concurrency_semaphore_wrapper semaphore;
|
||||
// These readers are not closed, but didn't start any operations, so it's safe for them to be destroyed.
|
||||
auto reader = make_flat_mutation_reader<test_reader_impl>(s.schema(), semaphore.make_permit());
|
||||
auto reader_v2 = make_flat_mutation_reader_v2<test_reader_v2_impl>(s.schema(), semaphore.make_permit());
|
||||
}
|
||||
|
||||
@@ -391,3 +391,87 @@ SEASTAR_TEST_CASE(test_loading_cache_reload_during_eviction) {
|
||||
BOOST_REQUIRE_EQUAL(loading_cache.size(), 1);
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_loading_cache_remove_leaves_no_old_entries_behind) {
|
||||
using namespace std::chrono;
|
||||
load_count = 0;
|
||||
|
||||
auto load_v1 = [] (auto key) { return make_ready_future<sstring>("v1"); };
|
||||
auto load_v2 = [] (auto key) { return make_ready_future<sstring>("v2"); };
|
||||
auto load_v3 = [] (auto key) { return make_ready_future<sstring>("v3"); };
|
||||
|
||||
{
|
||||
utils::loading_cache<int, sstring> loading_cache(num_loaders, 100s, testlog);
|
||||
auto stop_cache_reload = seastar::defer([&loading_cache] { loading_cache.stop().get(); });
|
||||
|
||||
//
|
||||
// Test remove() concurrent with loading
|
||||
//
|
||||
|
||||
auto f = loading_cache.get_ptr(0, [&](auto key) {
|
||||
return later().then([&] {
|
||||
return load_v1(key);
|
||||
});
|
||||
});
|
||||
|
||||
loading_cache.remove(0);
|
||||
|
||||
BOOST_REQUIRE_EQUAL(loading_cache.find(0), nullptr);
|
||||
BOOST_REQUIRE_EQUAL(loading_cache.size(), 0);
|
||||
|
||||
auto ptr1 = f.get0();
|
||||
BOOST_REQUIRE_EQUAL(*ptr1, "v1");
|
||||
|
||||
BOOST_REQUIRE_EQUAL(loading_cache.find(0), nullptr);
|
||||
BOOST_REQUIRE_EQUAL(loading_cache.size(), 0);
|
||||
|
||||
ptr1 = loading_cache.get_ptr(0, load_v2).get0();
|
||||
loading_cache.remove(0);
|
||||
BOOST_REQUIRE_EQUAL(*ptr1, "v2");
|
||||
|
||||
//
|
||||
// Test that live ptr1, removed from cache, does not prevent reload of new value
|
||||
//
|
||||
auto ptr2 = loading_cache.get_ptr(0, load_v3).get0();
|
||||
ptr1 = nullptr;
|
||||
BOOST_REQUIRE_EQUAL(*ptr2, "v3");
|
||||
}
|
||||
|
||||
// Test remove_if()
|
||||
{
|
||||
utils::loading_cache<int, sstring> loading_cache(num_loaders, 100s, testlog);
|
||||
auto stop_cache_reload = seastar::defer([&loading_cache] { loading_cache.stop().get(); });
|
||||
|
||||
//
|
||||
// Test remove_if() concurrent with loading
|
||||
//
|
||||
auto f = loading_cache.get_ptr(0, [&](auto key) {
|
||||
return later().then([&] {
|
||||
return load_v1(key);
|
||||
});
|
||||
});
|
||||
|
||||
loading_cache.remove_if([] (auto&& v) { return v == "v1"; });
|
||||
|
||||
BOOST_REQUIRE_EQUAL(loading_cache.find(0), nullptr);
|
||||
BOOST_REQUIRE_EQUAL(loading_cache.size(), 0);
|
||||
|
||||
auto ptr1 = f.get0();
|
||||
BOOST_REQUIRE_EQUAL(*ptr1, "v1");
|
||||
|
||||
BOOST_REQUIRE_EQUAL(loading_cache.find(0), nullptr);
|
||||
BOOST_REQUIRE_EQUAL(loading_cache.size(), 0);
|
||||
|
||||
ptr1 = loading_cache.get_ptr(0, load_v2).get0();
|
||||
loading_cache.remove_if([] (auto&& v) { return v == "v2"; });
|
||||
BOOST_REQUIRE_EQUAL(*ptr1, "v2");
|
||||
|
||||
//
|
||||
// Test that live ptr1, removed from cache, does not prevent reload of new value
|
||||
//
|
||||
auto ptr2 = loading_cache.get_ptr(0, load_v3).get0();
|
||||
ptr1 = nullptr;
|
||||
BOOST_REQUIRE_EQUAL(*ptr2, "v3");
|
||||
ptr2 = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39,6 +39,9 @@
|
||||
#include "test/lib/random_utils.hh"
|
||||
#include "test/lib/log.hh"
|
||||
#include "test/lib/reader_concurrency_semaphore.hh"
|
||||
#include "test/lib/simple_schema.hh"
|
||||
#include "test/lib/make_random_string.hh"
|
||||
#include "utils/error_injection.hh"
|
||||
|
||||
static api::timestamp_type next_timestamp() {
|
||||
static thread_local api::timestamp_type next_timestamp = 1;
|
||||
@@ -528,6 +531,74 @@ SEASTAR_TEST_CASE(test_exception_safety_of_single_partition_reads) {
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_tombstone_merging_with_multiple_versions) {
|
||||
tests::reader_concurrency_semaphore_wrapper semaphore;
|
||||
simple_schema ss;
|
||||
auto s = ss.schema();
|
||||
auto mt = make_lw_shared<memtable>(ss.schema());
|
||||
|
||||
auto pk = ss.make_pkey(0);
|
||||
auto pr = dht::partition_range::make_singular(pk);
|
||||
|
||||
auto t0 = ss.new_tombstone();
|
||||
auto t1 = ss.new_tombstone();
|
||||
auto t2 = ss.new_tombstone();
|
||||
auto t3 = ss.new_tombstone();
|
||||
|
||||
mutation m1(s, pk);
|
||||
ss.delete_range(m1, *position_range_to_clustering_range(position_range(
|
||||
position_in_partition::before_key(ss.make_ckey(0)),
|
||||
position_in_partition::for_key(ss.make_ckey(3))), *s), t1);
|
||||
ss.add_row(m1, ss.make_ckey(0), "v");
|
||||
ss.add_row(m1, ss.make_ckey(1), "v");
|
||||
|
||||
// Fill so that rd1 stays in the partition snapshot
|
||||
int n_rows = 1000;
|
||||
auto v = make_random_string(512);
|
||||
for (int i = 0; i < n_rows; ++i) {
|
||||
ss.add_row(m1, ss.make_ckey(i), v);
|
||||
}
|
||||
|
||||
mutation m2(s, pk);
|
||||
ss.delete_range(m2, *position_range_to_clustering_range(position_range(
|
||||
position_in_partition::before_key(ss.make_ckey(0)),
|
||||
position_in_partition::before_key(ss.make_ckey(1))), *s), t2);
|
||||
ss.delete_range(m2, *position_range_to_clustering_range(position_range(
|
||||
position_in_partition::before_key(ss.make_ckey(1)),
|
||||
position_in_partition::for_key(ss.make_ckey(3))), *s), t3);
|
||||
|
||||
mutation m3(s, pk);
|
||||
ss.delete_range(m3, *position_range_to_clustering_range(position_range(
|
||||
position_in_partition::before_key(ss.make_ckey(0)),
|
||||
position_in_partition::for_key(ss.make_ckey(4))), *s), t0);
|
||||
|
||||
mt->apply(m1);
|
||||
|
||||
auto rd1 = mt->make_flat_reader(s, semaphore.make_permit(), pr, s->full_slice(), default_priority_class(),
|
||||
nullptr, streamed_mutation::forwarding::no, mutation_reader::forwarding::no);
|
||||
auto close_rd1 = defer([&] { rd1.close().get(); });
|
||||
|
||||
rd1.fill_buffer().get();
|
||||
BOOST_REQUIRE(!rd1.is_end_of_stream()); // rd1 must keep the m1 version alive
|
||||
|
||||
mt->apply(m2);
|
||||
|
||||
auto rd2 = mt->make_flat_reader(s, semaphore.make_permit(), pr, s->full_slice(), default_priority_class(),
|
||||
nullptr, streamed_mutation::forwarding::no, mutation_reader::forwarding::no);
|
||||
auto close_r2 = defer([&] { rd2.close().get(); });
|
||||
|
||||
rd2.fill_buffer().get();
|
||||
BOOST_REQUIRE(!rd2.is_end_of_stream()); // rd2 must keep the m1 version alive
|
||||
|
||||
mt->apply(m3);
|
||||
|
||||
assert_that(mt->make_flat_reader(s, semaphore.make_permit(), pr))
|
||||
.has_monotonic_positions();
|
||||
|
||||
assert_that(mt->make_flat_reader(s, semaphore.make_permit(), pr))
|
||||
.produces(m1 + m2 + m3);
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_hash_is_cached) {
|
||||
return seastar::async([] {
|
||||
auto s = schema_builder("ks", "cf")
|
||||
|
||||
@@ -560,7 +560,7 @@ SEASTAR_TEST_CASE(test_apply_to_incomplete_respects_continuity) {
|
||||
static mutation_partition read_using_cursor(partition_snapshot& snap) {
|
||||
tests::reader_concurrency_semaphore_wrapper semaphore;
|
||||
partition_snapshot_row_cursor cur(*snap.schema(), snap);
|
||||
cur.maybe_refresh();
|
||||
cur.advance_to(position_in_partition::before_all_clustered_rows());
|
||||
auto mp = read_partition_from(*snap.schema(), cur);
|
||||
for (auto&& rt : snap.range_tombstones()) {
|
||||
mp.apply_delete(*snap.schema(), rt);
|
||||
|
||||
@@ -763,9 +763,8 @@ SEASTAR_THREAD_TEST_CASE(multi_col_in) {
|
||||
cquery_nofail(e, "insert into t(pk,ck1,ck2,r) values (4,13,23,'a')");
|
||||
require_rows(e, "select pk from t where (ck1,ck2) in ((13,23)) allow filtering", {{I(3)}, {I(4)}});
|
||||
require_rows(e, "select pk from t where (ck1) in ((13),(33),(44)) allow filtering", {{I(3)}, {I(4)}});
|
||||
// TODO: uncomment when #6200 is fixed.
|
||||
// require_rows(e, "select pk from t where (ck1,ck2) in ((13,23)) and r='a' allow filtering",
|
||||
// {{I(4), I(13), F(23), T("a")}});
|
||||
require_rows(e, "select pk from t where (ck1,ck2) in ((13,23)) and r='a' allow filtering",
|
||||
{{I(4), I(13), F(23), T("a")}});
|
||||
cquery_nofail(e, "delete from t where pk=4");
|
||||
require_rows(e, "select pk from t where (ck1,ck2) in ((13,23)) allow filtering", {{I(3)}});
|
||||
auto stmt = e.prepare("select ck1 from t where (ck1,ck2) in ? allow filtering").get0();
|
||||
|
||||
@@ -1242,9 +1242,13 @@ SEASTAR_TEST_CASE(test_update_failure) {
|
||||
class throttle {
|
||||
unsigned _block_counter = 0;
|
||||
promise<> _p; // valid when _block_counter != 0, resolves when goes down to 0
|
||||
std::optional<promise<>> _entered;
|
||||
bool _one_shot;
|
||||
public:
|
||||
// one_shot means whether only the first enter() after block() will block.
|
||||
throttle(bool one_shot = false) : _one_shot(one_shot) {}
|
||||
future<> enter() {
|
||||
if (_block_counter) {
|
||||
if (_block_counter && (!_one_shot || _entered)) {
|
||||
promise<> p1;
|
||||
promise<> p2;
|
||||
|
||||
@@ -1256,16 +1260,21 @@ public:
|
||||
p3.set_value();
|
||||
});
|
||||
_p = std::move(p2);
|
||||
|
||||
if (_entered) {
|
||||
_entered->set_value();
|
||||
_entered.reset();
|
||||
}
|
||||
return f1;
|
||||
} else {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
}
|
||||
|
||||
void block() {
|
||||
future<> block() {
|
||||
++_block_counter;
|
||||
_p = promise<>();
|
||||
_entered = promise<>();
|
||||
return _entered->get_future();
|
||||
}
|
||||
|
||||
void unblock() {
|
||||
@@ -1410,7 +1419,7 @@ SEASTAR_TEST_CASE(test_cache_population_and_update_race) {
|
||||
mt2->apply(m);
|
||||
}
|
||||
|
||||
thr.block();
|
||||
auto f = thr.block();
|
||||
|
||||
auto m0_range = dht::partition_range::make_singular(ring[0].ring_position());
|
||||
auto rd1 = cache.make_reader(s, semaphore.make_permit(), m0_range);
|
||||
@@ -1421,6 +1430,7 @@ SEASTAR_TEST_CASE(test_cache_population_and_update_race) {
|
||||
rd2.set_max_buffer_size(1);
|
||||
auto rd2_fill_buffer = rd2.fill_buffer();
|
||||
|
||||
f.get();
|
||||
sleep(10ms).get();
|
||||
|
||||
// This update should miss on all partitions
|
||||
@@ -1548,12 +1558,13 @@ SEASTAR_TEST_CASE(test_cache_population_and_clear_race) {
|
||||
mt2->apply(m);
|
||||
}
|
||||
|
||||
thr.block();
|
||||
auto f = thr.block();
|
||||
|
||||
auto rd1 = cache.make_reader(s, semaphore.make_permit());
|
||||
rd1.set_max_buffer_size(1);
|
||||
auto rd1_fill_buffer = rd1.fill_buffer();
|
||||
|
||||
f.get();
|
||||
sleep(10ms).get();
|
||||
|
||||
// This update should miss on all partitions
|
||||
@@ -3777,3 +3788,81 @@ SEASTAR_TEST_CASE(test_scans_erase_dummies) {
|
||||
BOOST_REQUIRE_EQUAL(tracker.get_stats().rows, 2);
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_eviction_of_upper_bound_of_population_range) {
|
||||
return seastar::async([] {
|
||||
simple_schema s;
|
||||
tests::reader_concurrency_semaphore_wrapper semaphore;
|
||||
auto cache_mt = make_lw_shared<memtable>(s.schema());
|
||||
|
||||
auto pkey = s.make_pkey("pk");
|
||||
|
||||
mutation m1(s.schema(), pkey);
|
||||
s.add_row(m1, s.make_ckey(1), "v1");
|
||||
s.add_row(m1, s.make_ckey(2), "v2");
|
||||
cache_mt->apply(m1);
|
||||
|
||||
cache_tracker tracker;
|
||||
throttle thr(true);
|
||||
auto cache_source = make_decorated_snapshot_source(snapshot_source([&] { return cache_mt->as_data_source(); }),
|
||||
[&] (mutation_source src) {
|
||||
return throttled_mutation_source(thr, std::move(src));
|
||||
});
|
||||
row_cache cache(s.schema(), cache_source, tracker);
|
||||
|
||||
auto pr = dht::partition_range::make_singular(pkey);
|
||||
|
||||
auto read = [&] (int start, int end) {
|
||||
auto slice = partition_slice_builder(*s.schema())
|
||||
.with_range(query::clustering_range::make(s.make_ckey(start), s.make_ckey(end)))
|
||||
.build();
|
||||
auto rd = cache.make_reader(s.schema(), semaphore.make_permit(), pr, slice);
|
||||
auto close_rd = deferred_close(rd);
|
||||
auto m_cache = read_mutation_from_flat_mutation_reader(rd).get0();
|
||||
close_rd.close_now();
|
||||
rd = cache_mt->make_flat_reader(s.schema(), semaphore.make_permit(), pr, slice);
|
||||
auto close_rd2 = deferred_close(rd);
|
||||
auto m_mt = read_mutation_from_flat_mutation_reader(rd).get0();
|
||||
BOOST_REQUIRE(m_mt);
|
||||
assert_that(m_cache).has_mutation().is_equal_to(*m_mt);
|
||||
};
|
||||
|
||||
// populate [2]
|
||||
{
|
||||
auto slice = partition_slice_builder(*s.schema())
|
||||
.with_range(query::clustering_range::make_singular(s.make_ckey(2)))
|
||||
.build();
|
||||
assert_that(cache.make_reader(s.schema(), semaphore.make_permit(), pr, slice))
|
||||
.has_monotonic_positions();
|
||||
}
|
||||
|
||||
auto arrived = thr.block();
|
||||
|
||||
// Read [0, 2]
|
||||
auto f = seastar::async([&] {
|
||||
read(0, 2);
|
||||
});
|
||||
|
||||
arrived.get();
|
||||
|
||||
// populate (2, 3]
|
||||
{
|
||||
auto slice = partition_slice_builder(*s.schema())
|
||||
.with_range(query::clustering_range::make(query::clustering_range::bound(s.make_ckey(2), false),
|
||||
query::clustering_range::bound(s.make_ckey(3), true)))
|
||||
.build();
|
||||
assert_that(cache.make_reader(s.schema(), semaphore.make_permit(), pr, slice))
|
||||
.has_monotonic_positions();
|
||||
}
|
||||
|
||||
testlog.trace("Evicting");
|
||||
evict_one_row(tracker); // Evicts before(0)
|
||||
evict_one_row(tracker); // Evicts ck(2)
|
||||
testlog.trace("Unblocking");
|
||||
|
||||
thr.unblock();
|
||||
f.get();
|
||||
|
||||
read(0, 3);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -72,6 +72,7 @@
|
||||
#include "test/lib/reader_concurrency_semaphore.hh"
|
||||
#include "test/lib/sstable_utils.hh"
|
||||
#include "test/lib/random_utils.hh"
|
||||
#include "test/lib/random_schema.hh"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
@@ -3003,3 +3004,58 @@ SEASTAR_TEST_CASE(sstable_reader_with_timeout) {
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_crawling_reader_out_of_range_last_range_tombstone_change) {
|
||||
return test_env::do_with_async([] (test_env& env) {
|
||||
simple_schema table;
|
||||
|
||||
auto mut = table.new_mutation("pk0");
|
||||
auto ckeys = table.make_ckeys(4);
|
||||
table.add_row(mut, ckeys[0], "v0");
|
||||
table.add_row(mut, ckeys[1], "v1");
|
||||
table.add_row(mut, ckeys[2], "v2");
|
||||
using bound = query::clustering_range::bound;
|
||||
table.delete_range(mut, query::clustering_range::make(bound{ckeys[3], true}, bound{clustering_key::make_empty(), true}), tombstone(1, gc_clock::now()));
|
||||
|
||||
auto tmp = tmpdir();
|
||||
auto sst_gen = [&env, &table, &tmp] () {
|
||||
return env.make_sstable(table.schema(), tmp.path().string(), 1, sstables::get_highest_sstable_version(), big);
|
||||
};
|
||||
auto sst = make_sstable_containing(sst_gen, {mut});
|
||||
|
||||
assert_that(sst->make_crawling_reader(table.schema(), env.make_reader_permit())).has_monotonic_positions();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_crawling_reader_random_schema_random_mutations) {
|
||||
return test_env::do_with_async([this] (test_env& env) {
|
||||
auto random_spec = tests::make_random_schema_specification(
|
||||
get_name(),
|
||||
std::uniform_int_distribution<size_t>(1, 4),
|
||||
std::uniform_int_distribution<size_t>(2, 4),
|
||||
std::uniform_int_distribution<size_t>(2, 8),
|
||||
std::uniform_int_distribution<size_t>(2, 8));
|
||||
auto random_schema = tests::random_schema{tests::random::get_int<uint32_t>(), *random_spec};
|
||||
auto schema = random_schema.schema();
|
||||
|
||||
testlog.info("Random schema:\n{}", random_schema.cql());
|
||||
|
||||
const auto muts = tests::generate_random_mutations(random_schema, 20).get();
|
||||
|
||||
auto tmp = tmpdir();
|
||||
auto sst_gen = [&env, schema, &tmp] () {
|
||||
return env.make_sstable(schema, tmp.path().string(), 1, sstables::get_highest_sstable_version(), big);
|
||||
};
|
||||
auto sst = make_sstable_containing(sst_gen, muts);
|
||||
|
||||
{
|
||||
auto rd = assert_that(sst->make_crawling_reader(schema, env.make_reader_permit()));
|
||||
|
||||
for (const auto& mut : muts) {
|
||||
rd.produces(mut);
|
||||
}
|
||||
}
|
||||
|
||||
assert_that(sst->make_crawling_reader(schema, env.make_reader_permit())).has_monotonic_positions();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -49,10 +49,18 @@ static void add_entry(logalloc::region& r,
|
||||
|
||||
static partition_index_page make_page0(logalloc::region& r, simple_schema& s) {
|
||||
partition_index_page page;
|
||||
auto destroy_page = defer([&] {
|
||||
with_allocator(r.allocator(), [&] {
|
||||
auto p = std::move(page);
|
||||
});
|
||||
});
|
||||
|
||||
add_entry(r, *s.schema(), page, s.make_pkey(0).key(), 0);
|
||||
add_entry(r, *s.schema(), page, s.make_pkey(1).key(), 1);
|
||||
add_entry(r, *s.schema(), page, s.make_pkey(2).key(), 2);
|
||||
add_entry(r, *s.schema(), page, s.make_pkey(3).key(), 3);
|
||||
|
||||
destroy_page.cancel();
|
||||
return page;
|
||||
}
|
||||
|
||||
@@ -143,6 +151,47 @@ SEASTAR_THREAD_TEST_CASE(test_caching) {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static future<> ignore_result(future<T>&& f) {
|
||||
return f.then_wrapped([] (auto&& f) {
|
||||
try {
|
||||
f.get();
|
||||
} catch (...) {
|
||||
// expected, silence warnings about ignored failed futures
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_exception_while_loading) {
|
||||
::lru lru;
|
||||
simple_schema s;
|
||||
logalloc::region r;
|
||||
partition_index_cache cache(lru, r);
|
||||
|
||||
auto clear_lru = defer([&] {
|
||||
with_allocator(r.allocator(), [&] {
|
||||
lru.evict_all();
|
||||
});
|
||||
});
|
||||
|
||||
auto page0_loader = [&] (partition_index_cache::key_type k) {
|
||||
return later().then([&] {
|
||||
return make_page0(r, s);
|
||||
});
|
||||
};
|
||||
|
||||
memory::with_allocation_failures([&] {
|
||||
cache.evict_gently().get();
|
||||
auto f0 = ignore_result(cache.get_or_load(0, page0_loader));
|
||||
auto f1 = ignore_result(cache.get_or_load(0, page0_loader));
|
||||
f0.get();
|
||||
f1.get();
|
||||
});
|
||||
|
||||
auto ptr = cache.get_or_load(0, page0_loader).get0();
|
||||
has_page0(ptr);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_auto_clear) {
|
||||
::lru lru;
|
||||
simple_schema s;
|
||||
|
||||
@@ -19,6 +19,7 @@ from cassandra.cluster import ConsistencyLevel
|
||||
from cassandra.query import SimpleStatement
|
||||
|
||||
from util import new_test_table
|
||||
from nodetool import flush
|
||||
|
||||
def test_cdc_log_entries_use_cdc_streams(scylla_only, cql, test_keyspace):
|
||||
'''Test that the stream IDs chosen for CDC log entries come from the CDC generation
|
||||
@@ -44,3 +45,16 @@ def test_cdc_log_entries_use_cdc_streams(scylla_only, cql, test_keyspace):
|
||||
|
||||
assert(log_stream_ids.issubset(stream_ids))
|
||||
|
||||
|
||||
# Test for #10473 - reading logs (from sstable) after dropping
|
||||
# column in base.
|
||||
def test_cdc_alter_table_drop_column(scylla_only, cql, test_keyspace):
|
||||
schema = "pk int primary key, v int"
|
||||
extra = " with cdc = {'enabled': true}"
|
||||
with new_test_table(cql, test_keyspace, schema, extra) as table:
|
||||
cql.execute(f"insert into {table} (pk, v) values (0, 0)")
|
||||
cql.execute(f"insert into {table} (pk, v) values (1, null)")
|
||||
flush(cql, table)
|
||||
flush(cql, table + "_scylla_cdc_log")
|
||||
cql.execute(f"alter table {table} drop v")
|
||||
cql.execute(f"select * from {table}_scylla_cdc_log")
|
||||
|
||||
@@ -24,9 +24,11 @@
|
||||
# is or isn't necessary.
|
||||
|
||||
import pytest
|
||||
import re
|
||||
from util import new_test_table
|
||||
from cassandra.protocol import InvalidRequest
|
||||
from cassandra.connection import DRIVER_NAME, DRIVER_VERSION
|
||||
from cassandra.query import UNSET_VALUE
|
||||
|
||||
# When filtering for "x > 0" or "x < 0", rows with an unset value for x
|
||||
# should not match the filter.
|
||||
@@ -141,3 +143,118 @@ def test_index_with_in_relation(scylla_only, cql, test_keyspace):
|
||||
cql.execute(f"insert into {table} (p,c,v) values ({p}, {c}, {v})")
|
||||
res = cql.execute(f"select * from {table} where p in (0,1) and v = False ALLOW FILTERING")
|
||||
assert set(res) == set([(0,1,False),(0,3,False),(1,1,False), (1,3,False)])
|
||||
|
||||
# Test that LIKE operator works fine as a filter when the filtered column
|
||||
# has descending order. Regression test for issue #10183, when it was incorrectly
|
||||
# rejected as a "non-string" column.
|
||||
def test_filter_like_on_desc_column(cql, test_keyspace):
|
||||
with new_test_table(cql, test_keyspace, "a int, b text, primary key(a, b)",
|
||||
extra="with clustering order by (b desc)") as table:
|
||||
cql.execute(f"INSERT INTO {table} (a, b) VALUES (1, 'one')")
|
||||
res = cql.execute(f"SELECT b FROM {table} WHERE b LIKE '%%%' ALLOW FILTERING")
|
||||
assert res.one().b == "one"
|
||||
|
||||
# Test that IN restrictions are supported with filtering and return the
|
||||
# correct results.
|
||||
# We mark this test "cassandra_bug" because Cassandra could support this
|
||||
# feature but doesn't yet: It reports "IN predicates on non-primary-key
|
||||
# columns (v) is not yet supported" when v is a regular column, or "IN
|
||||
# restrictions are not supported when the query involves filtering" on
|
||||
# partition-key columns p1 or p2. By the way, it does support IN restrictions
|
||||
# on a clustering-key column.
|
||||
def test_filtering_with_in_relation(cql, test_keyspace, cassandra_bug):
|
||||
schema = 'p1 int, p2 int, c int, v int, primary key ((p1, p2),c)'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
cql.execute(f"INSERT INTO {table} (p1, p2, c, v) VALUES (1, 2, 3, 4)")
|
||||
cql.execute(f"INSERT INTO {table} (p1, p2, c, v) VALUES (2, 3, 4, 5)")
|
||||
cql.execute(f"INSERT INTO {table} (p1, p2, c, v) VALUES (3, 4, 5, 6)")
|
||||
cql.execute(f"INSERT INTO {table} (p1, p2, c, v) VALUES (4, 5, 6, 7)")
|
||||
res = cql.execute(f"select * from {table} where p1 in (2,4) ALLOW FILTERING")
|
||||
assert set(res) == set([(2,3,4,5), (4,5,6,7)])
|
||||
res = cql.execute(f"select * from {table} where p2 in (2,4) ALLOW FILTERING")
|
||||
assert set(res) == set([(1,2,3,4), (3,4,5,6)])
|
||||
res = cql.execute(f"select * from {table} where c in (3,5) ALLOW FILTERING")
|
||||
assert set(res) == set([(1,2,3,4), (3,4,5,6)])
|
||||
res = cql.execute(f"select * from {table} where v in (5,7) ALLOW FILTERING")
|
||||
assert set(res) == set([(2,3,4,5), (4,5,6,7)])
|
||||
|
||||
# Test that subscripts in expressions work as expected. They should only work
|
||||
# on map columns, and must have the correct type. Test that they also work
|
||||
# as expected for null or unset subscripts.
|
||||
# Cassandra considers the null subscript 'm[null]' to be an invalid request.
|
||||
# In Scylla we decided to it differently (we think better): m[null] is simply
|
||||
# a null, so the filter 'WHERE m[null] = 2' is not an error - it just doesn't
|
||||
# match anything. This is more consistent with our usual null handling
|
||||
# (null[2] and null < 2 are both defined as returning null), and will also
|
||||
# allow us in the future to support non-constant subscript - for example m[a]
|
||||
# where the column a can be null for some rows and non-null for other rows.
|
||||
# Because we decided that our behavior is better than Cassandra's, this test
|
||||
# fails on Cassandra and is marked with cassandra_bug.
|
||||
# This test is a superset of test test_null.py::test_map_subscript_null which
|
||||
# tests only the special case of a null subscript.
|
||||
# Reproduces #10361
|
||||
def test_filtering_with_subscript(cql, test_keyspace, cassandra_bug):
|
||||
with new_test_table(cql, test_keyspace,
|
||||
"p int, m1 map<int, int>, m2 map<text, text>, s set<int>, PRIMARY KEY (p)") as table:
|
||||
# Check for *errors* in subscript expressions - such as wrong type or
|
||||
# null - with an empty table. This will force the implementation to
|
||||
# check for these errors before actually evaluating the filter
|
||||
# expression - because there will be no rows to filter.
|
||||
|
||||
# A subscript is not allowed on a non-map column (in this case, a set)
|
||||
with pytest.raises(InvalidRequest, match='cannot be used as a map'):
|
||||
cql.execute(f"SELECT p FROM {table} WHERE s[2] = 3 ALLOW FILTERING")
|
||||
# A wrong type is passed for the subscript is not allowed
|
||||
with pytest.raises(InvalidRequest, match=re.escape('key(m1)')):
|
||||
cql.execute(f"select p from {table} where m1['black'] = 2 ALLOW FILTERING")
|
||||
with pytest.raises(InvalidRequest, match=re.escape('key(m2)')):
|
||||
cql.execute(f"select p from {table} where m2[1] = 2 ALLOW FILTERING")
|
||||
# See discussion of m1[null] above. Reproduces #10361, and fails
|
||||
# on Cassandra (Cassandra deliberately returns an error here -
|
||||
# an InvalidRequest with "Unsupported null map key for column m1"
|
||||
assert list(cql.execute(f"select p from {table} where m1[null] = 2 ALLOW FILTERING")) == []
|
||||
assert list(cql.execute(f"select p from {table} where m2[null] = 'hi' ALLOW FILTERING")) == []
|
||||
# Similar to above checks, but using a prepared statement. We can't
|
||||
# cause the driver to send the wrong type to a bound variable, so we
|
||||
# can't check that case unfortunately, but we have a new UNSET_VALUE
|
||||
# case.
|
||||
stmt = cql.prepare(f"select p from {table} where m1[?] = 2 ALLOW FILTERING")
|
||||
assert list(cql.execute(stmt, [None])) == []
|
||||
# The expression m1[UNSET_VALUE] should be an error, but because the
|
||||
# table is empty, we do not actually need to evaluate the expression
|
||||
# and the error might might not be caught. So this test is commented
|
||||
# out. We'll do it below, after we add some data to ensure that the
|
||||
# expression does need to be evaluated.
|
||||
#with pytest.raises(InvalidRequest, match='Unsupported unset map key for column m1'):
|
||||
# cql.execute(stmt, [UNSET_VALUE])
|
||||
|
||||
# Finally, check for sucessful filtering with subscripts. For that we
|
||||
# need to add some data:
|
||||
cql.execute("INSERT INTO "+table+" (p, m1, m2) VALUES (1, {1:2, 3:4}, {'dog':'cat', 'hi':'hello'})")
|
||||
cql.execute("INSERT INTO "+table+" (p, m1, m2) VALUES (2, {2:3, 4:5}, {'man':'woman', 'black':'white'})")
|
||||
res = cql.execute(f"select p from {table} where m1[1] = 2 ALLOW FILTERING")
|
||||
assert list(res) == [(1,)]
|
||||
res = cql.execute(f"select p from {table} where m2['black'] = 'white' ALLOW FILTERING")
|
||||
assert list(res) == [(2,)]
|
||||
res = cql.execute(stmt, [1])
|
||||
assert list(res) == [(1,)]
|
||||
|
||||
# Try again the null-key request (reproduces #10361) that we did
|
||||
# earlier when there was no data in the table. Now there is, and
|
||||
# the scan brings up several rows, it may exercise different code
|
||||
# paths.
|
||||
assert list(cql.execute(f"select p from {table} where m1[null] = 2 ALLOW FILTERING")) == []
|
||||
with pytest.raises(InvalidRequest, match='Unsupported unset map key for column m1'):
|
||||
cql.execute(stmt, [UNSET_VALUE])
|
||||
|
||||
# Beyond the tests of map subscript expressions above, also test what happens
|
||||
# when the expression is fine (e.g., m[2] = 3) but the *data* itself is null.
|
||||
# We used to have a bug there where we attempted to incorrectly deserialize
|
||||
# this null and get marshaling errors or even crashes - see issue #10417.
|
||||
# This test reproduces #10417, but not always - run with "--count" to
|
||||
# reproduce failures.
|
||||
def test_filtering_null_map_with_subscript(cql, test_keyspace):
|
||||
schema = 'p text primary key, m map<int, int>'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
cql.execute(f"INSERT INTO {table} (p) VALUES ('dog')")
|
||||
assert list(cql.execute(f"SELECT p FROM {table} WHERE m[2] = 3 ALLOW FILTERING")) == []
|
||||
|
||||
@@ -98,3 +98,51 @@ def test_mv_empty_string_partition_key(cql, test_keyspace):
|
||||
# because Cassandra forbids an empty partition key on select
|
||||
with pytest.raises(InvalidRequest, match='Key may not be empty'):
|
||||
cql.execute(f"SELECT * FROM {mv} WHERE v=''")
|
||||
|
||||
# Refs #10851. The code used to create a wildcard selection for all columns,
|
||||
# which erroneously also includes static columns if such are present in the
|
||||
# base table. Currently views only operate on regular columns and the filtering
|
||||
# code assumes that. TODO: once we implement static column support for materialized
|
||||
# views, this test case will be a nice regression test to ensure that everything still
|
||||
# works if the static columns are *not* used in the view.
|
||||
def test_filter_with_unused_static_column(cql, test_keyspace):
|
||||
schema = 'p int, c int, v int, s int static, primary key (p,c)'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
with new_materialized_view(cql, table, select='p,c,v', pk='p,c,v', where='p IS NOT NULL and c IS NOT NULL and v = 44') as mv:
|
||||
cql.execute(f"INSERT INTO {table} (p,c,v) VALUES (42,43,44)")
|
||||
cql.execute(f"INSERT INTO {table} (p,c,v) VALUES (1,2,3)")
|
||||
assert list(cql.execute(f"SELECT * FROM {mv}")) == [(42, 43, 44)]
|
||||
|
||||
# Reproducer for issue #9450 - when a view's key column name is a (quoted)
|
||||
# keyword, writes used to fail because they generated internally broken CQL
|
||||
# with the column name not quoted.
|
||||
def test_mv_quoted_column_names(cql, test_keyspace):
|
||||
for colname in ['"dog"', '"Dog"', 'DOG', '"to"', 'int']:
|
||||
with new_test_table(cql, test_keyspace, f'p int primary key, {colname} int') as table:
|
||||
with new_materialized_view(cql, table, '*', f'{colname}, p', f'{colname} is not null and p is not null') as mv:
|
||||
cql.execute(f'INSERT INTO {table} (p, {colname}) values (1, 2)')
|
||||
# Validate that not only the write didn't fail, it actually
|
||||
# write the right thing to the view. NOTE: on a single-node
|
||||
# Scylla, view update is synchronous so we can just read and
|
||||
# don't need to wait or retry.
|
||||
assert list(cql.execute(f'SELECT * from {mv}')) == [(2, 1)]
|
||||
|
||||
# Same as test_mv_quoted_column_names above (reproducing issue #9450), just
|
||||
# check *view building* - i.e., pre-existing data in the base table that
|
||||
# needs to be copied to the view. The view building cannot return an error
|
||||
# to the user, but can fail to write the desired data into the view.
|
||||
def test_mv_quoted_column_names_build(cql, test_keyspace):
|
||||
for colname in ['"dog"', '"Dog"', 'DOG', '"to"', 'int']:
|
||||
with new_test_table(cql, test_keyspace, f'p int primary key, {colname} int') as table:
|
||||
cql.execute(f'INSERT INTO {table} (p, {colname}) values (1, 2)')
|
||||
with new_materialized_view(cql, table, '*', f'{colname}, p', f'{colname} is not null and p is not null') as mv:
|
||||
# When Scylla's view builder fails as it did in issue #9450,
|
||||
# there is no way to tell this state apart from a view build
|
||||
# that simply hasn't completed (besides looking at the logs,
|
||||
# which we don't). This means, unfortunately, that a failure
|
||||
# of this test is slow - it needs to wait for a timeout.
|
||||
start_time = time.time()
|
||||
while time.time() < start_time + 30:
|
||||
if list(cql.execute(f'SELECT * from {mv}')) == [(2, 1)]:
|
||||
break
|
||||
assert list(cql.execute(f'SELECT * from {mv}')) == [(2, 1)]
|
||||
|
||||
@@ -28,7 +28,7 @@ from util import unique_name, random_string, new_test_table
|
||||
@pytest.fixture(scope="module")
|
||||
def table1(cql, test_keyspace):
|
||||
table = test_keyspace + "." + unique_name()
|
||||
cql.execute(f"CREATE TABLE {table} (p text, c text, v text, primary key (p, c))")
|
||||
cql.execute(f"CREATE TABLE {table} (p text, c text, v text, i int, s set<int>, m map<int, int>, primary key (p, c))")
|
||||
yield table
|
||||
cql.execute("DROP TABLE " + table)
|
||||
|
||||
@@ -188,3 +188,65 @@ def test_empty_string_key2(cql, test_keyspace):
|
||||
cql.execute(f"INSERT INTO {table} (p1,p2,c,v) VALUES ('', '', '', 'cat')")
|
||||
cql.execute(f"INSERT INTO {table} (p1,p2,c,v) VALUES ('x', 'y', 'z', 'dog')")
|
||||
assert list(cql.execute(f"SELECT v FROM {table} WHERE p1='' AND p2='' AND c=''")) == [('cat',)]
|
||||
|
||||
# Cassandra considers the null subscript 'm[null]' to be an invalid request.
|
||||
# In Scylla we decided to it differently (we think better): m[null] is simply
|
||||
# a null, so the filter 'WHERE m[null] = 3' is not an error - it just doesn't
|
||||
# match anything. This is more consistent with our usual null handling (null[2]
|
||||
# and null < 2 are both defined as returning null), and will also allow us
|
||||
# in the future to support non-constant subscript - for example m[a] where
|
||||
# the column a can be null for some rows and non-null for other rows.
|
||||
# Before we implemented the above decision, we had multiple bugs in this case,
|
||||
# resulting in bizarre errors and even crashes (see #10361, #10399 and #10417).
|
||||
#
|
||||
# Because this test uses a shared table (table1), then depending on how it's
|
||||
# run, it sometimes sees an empty table and sometimes a table with data
|
||||
# (and null values for the map m...), so this test mixes several different
|
||||
# concerns and problems. The same problems are better covered separately
|
||||
# by test_filtering.py::test_filtering_with_subscript and
|
||||
# test_filtering.py::test_filtering_null_map_with_subscript so this test
|
||||
# should eventually be deleted.
|
||||
def test_map_subscript_null(cql, table1, cassandra_bug):
|
||||
assert list(cql.execute(f"SELECT p FROM {table1} WHERE m[null] = 3 ALLOW FILTERING")) == []
|
||||
assert list(cql.execute(cql.prepare(f"SELECT p FROM {table1} WHERE m[?] = 3 ALLOW FILTERING"), [None])) == []
|
||||
|
||||
# Similarly, CONTAINS restriction with NULL should also match nothing.
|
||||
# Reproduces #10359.
|
||||
@pytest.mark.xfail(reason="Issue #10359")
|
||||
def test_filtering_contains_null(cassandra_bug, cql, table1):
|
||||
p = unique_key_string()
|
||||
cql.execute(f"INSERT INTO {table1} (p,c,s) VALUES ('{p}', '1', {{1, 2}})")
|
||||
cql.execute(f"INSERT INTO {table1} (p,c,s) VALUES ('{p}', '2', {{3, 4}})")
|
||||
cql.execute(f"INSERT INTO {table1} (p,c) VALUES ('{p}', '3')")
|
||||
assert list(cql.execute(f"SELECT c FROM {table1} WHERE p='{p}' AND s CONTAINS NULL ALLOW FILTERING")) == []
|
||||
|
||||
# Similarly, CONTAINS KEY restriction with NULL should also match nothing.
|
||||
# Reproduces #10359.
|
||||
@pytest.mark.xfail(reason="Issue #10359")
|
||||
def test_filtering_contains_key_null(cassandra_bug, cql, table1):
|
||||
p = unique_key_string()
|
||||
cql.execute(f"INSERT INTO {table1} (p,c,m) VALUES ('{p}', '1', {{1: 2}})")
|
||||
cql.execute(f"INSERT INTO {table1} (p,c,m) VALUES ('{p}', '2', {{3: 4}})")
|
||||
cql.execute(f"INSERT INTO {table1} (p,c) VALUES ('{p}', '3')")
|
||||
assert list(cql.execute(f"SELECT c FROM {table1} WHERE p='{p}' AND m CONTAINS KEY NULL ALLOW FILTERING")) == []
|
||||
|
||||
# The above tests test_filtering_eq_null and test_filtering_inequality_null
|
||||
# have WHERE x=NULL or x>NULL where "x" is a regular column. Such a
|
||||
# comparison requires ALLOW FILTERING for non-NULL parameters, so we also
|
||||
# require it for NULL. Unlike the previous tests, this one also passed on
|
||||
# Cassandra.
|
||||
def test_filtering_null_comparison_no_filtering(cql, table1):
|
||||
with pytest.raises(InvalidRequest, match='ALLOW FILTERING'):
|
||||
cql.execute(f"SELECT c FROM {table1} WHERE p='x' AND i=NULL")
|
||||
with pytest.raises(InvalidRequest, match='ALLOW FILTERING'):
|
||||
cql.execute(f"SELECT c FROM {table1} WHERE p='x' AND i>NULL")
|
||||
with pytest.raises(InvalidRequest, match='ALLOW FILTERING'):
|
||||
cql.execute(f"SELECT c FROM {table1} WHERE p='x' AND i>=NULL")
|
||||
with pytest.raises(InvalidRequest, match='ALLOW FILTERING'):
|
||||
cql.execute(f"SELECT c FROM {table1} WHERE p='x' AND i<NULL")
|
||||
with pytest.raises(InvalidRequest, match='ALLOW FILTERING'):
|
||||
cql.execute(f"SELECT c FROM {table1} WHERE p='x' AND i<=NULL")
|
||||
with pytest.raises(InvalidRequest, match='ALLOW FILTERING'):
|
||||
cql.execute(f"SELECT c FROM {table1} WHERE p='x' AND s CONTAINS NULL")
|
||||
with pytest.raises(InvalidRequest, match='ALLOW FILTERING'):
|
||||
cql.execute(f"SELECT c FROM {table1} WHERE p='x' AND m CONTAINS KEY NULL")
|
||||
|
||||
64
test/cql-pytest/test_scan.py
Normal file
64
test/cql-pytest/test_scan.py
Normal file
@@ -0,0 +1,64 @@
|
||||
# Copyright 2022-present ScyllaDB
|
||||
#
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
#############################################################################
|
||||
# Tests for scanning SELECT requests (which read many rows and/or many
|
||||
# partitions).
|
||||
# We have a separate test file test_filtering.py for scans which also involve
|
||||
# filtering, and test_allow_filtering.py for checking when "ALLOW FILTERING"
|
||||
# is needed in scan. test_secondary_index.py also contains tests for scanning
|
||||
# using a secondary index.
|
||||
#############################################################################
|
||||
|
||||
import pytest
|
||||
from util import new_test_table
|
||||
from cassandra.query import SimpleStatement
|
||||
|
||||
# Regression test for #9482
|
||||
def test_scan_ending_with_static_row(cql, test_keyspace):
|
||||
with new_test_table(cql, test_keyspace, "pk int, ck int, s int STATIC, v int, PRIMARY KEY (pk, ck)") as table:
|
||||
stmt = cql.prepare(f"UPDATE {table} SET s = ? WHERE pk = ?")
|
||||
for pk in range(100):
|
||||
cql.execute(stmt, (0, pk))
|
||||
|
||||
statement = SimpleStatement(f"SELECT * FROM {table}", fetch_size=10)
|
||||
# This will trigger an error in either processing or building the query
|
||||
# results. The success criteria for this test is the query finishing
|
||||
# without errors.
|
||||
res = list(cql.execute(statement))
|
||||
|
||||
|
||||
# Test that if we have multi-column restrictions on the clustering key
|
||||
# and additional filtering on regular columns, both restrictions are obeyed.
|
||||
# Reproduces #6200.
|
||||
def test_multi_column_restrictions_and_filtering(cql, test_keyspace):
|
||||
with new_test_table(cql, test_keyspace, "p int, c1 int, c2 int, r int, PRIMARY KEY (p, c1, c2)") as table:
|
||||
stmt = cql.prepare(f"INSERT INTO {table} (p, c1, c2, r) VALUES (1, ?, ?, ?)")
|
||||
for i in range(2):
|
||||
for j in range(2):
|
||||
cql.execute(stmt, [i, j, j])
|
||||
assert list(cql.execute(f"SELECT c1,c2,r FROM {table} WHERE p=1 AND (c1, c2) = (0,1)")) == [(0,1,1)]
|
||||
# Since in that result r=1, adding "AND r=1" should return the same
|
||||
# result, and adding "AND r=0" should return nothing.
|
||||
assert list(cql.execute(f"SELECT c1,c2,r FROM {table} WHERE p=1 AND (c1, c2) = (0,1) AND r=1 ALLOW FILTERING")) == [(0,1,1)]
|
||||
# Reproduces #6200:
|
||||
assert list(cql.execute(f"SELECT c1,c2,r FROM {table} WHERE p=1 AND (c1, c2) = (0,1) AND r=0 ALLOW FILTERING")) == []
|
||||
|
||||
# Test that if we have a range multi-column restrictions on the clustering key
|
||||
# and additional filtering on regular columns, both restrictions are obeyed.
|
||||
# Similar to test_multi_column_restrictions_and_filtering, but uses a range
|
||||
# restriction on the clustering key columns.
|
||||
# Reproduces #12014, the code is taken from a reproducer provided by a user.
|
||||
def test_multi_column_range_restrictions_and_filtering(cql, test_keyspace):
|
||||
with new_test_table(cql, test_keyspace, "pk int, ts timestamp, id int, processed boolean, PRIMARY KEY (pk, ts, id)") as table:
|
||||
cql.execute(f"INSERT INTO {table} (pk, ts, id, processed) VALUES (0, currentTimestamp(), 0, true)")
|
||||
cql.execute(f"INSERT INTO {table} (pk, ts, id, processed) VALUES (0, currentTimestamp(), 1, true)")
|
||||
cql.execute(f"INSERT INTO {table} (pk, ts, id, processed) VALUES (0, currentTimestamp(), 2, false)")
|
||||
cql.execute(f"INSERT INTO {table} (pk, ts, id, processed) VALUES (0, currentTimestamp(), 3, false)")
|
||||
# This select doesn't use multi-column restrictions, the result shouldn't change when it does.
|
||||
rows1 = list(cql.execute(f"SELECT id, processed FROM {table} WHERE pk = 0 AND ts >= 0 AND processed = false ALLOW FILTERING"))
|
||||
assert rows1 == [(2, False), (3, False)]
|
||||
# Reproduces #12014
|
||||
rows2 = list(cql.execute(f"SELECT id, processed FROM {table} WHERE pk = 0 AND (ts, id) >= (0, 0) AND processed = false ALLOW FILTERING"))
|
||||
assert rows1 == rows2
|
||||
@@ -102,6 +102,19 @@ def new_materialized_view(cql, table, select, pk, where):
|
||||
finally:
|
||||
cql.execute(f"DROP MATERIALIZED VIEW {mv}")
|
||||
|
||||
# A utility function for creating a new temporary secondary index of
|
||||
# an existing table.
|
||||
@contextmanager
|
||||
def new_secondary_index(cql, table, column, name='', extra=''):
|
||||
keyspace = table.split('.')[0]
|
||||
if not name:
|
||||
name = unique_name()
|
||||
cql.execute(f"CREATE INDEX {name} ON {table} ({column}) {extra}")
|
||||
try:
|
||||
yield f"{keyspace}.{name}"
|
||||
finally:
|
||||
cql.execute(f"DROP INDEX {keyspace}.{name}")
|
||||
|
||||
def project(column_name_string, rows):
|
||||
"""Returns a list of column values from each of the rows."""
|
||||
return [getattr(r, column_name_string) for r in rows]
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user