mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-12 19:02:12 +00:00
Compare commits
51 Commits
fast-serve
...
ykaul/late
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4d44ee3bb4 | ||
|
|
8ca0f2dd54 | ||
|
|
4c3a86c515 | ||
|
|
ccd92c0b6b | ||
|
|
a7f204ee45 | ||
|
|
e2217c143f | ||
|
|
3219786ab8 | ||
|
|
a30ce98bc4 | ||
|
|
ab12083525 | ||
|
|
454a8e6966 | ||
|
|
eb241a7048 | ||
|
|
5385df02ec | ||
|
|
25fd1001c2 | ||
|
|
ec4b483e88 | ||
|
|
cab043323d | ||
|
|
d6346e68c1 | ||
|
|
1dccfeb988 | ||
|
|
8d22ef3058 | ||
|
|
321006ecbd | ||
|
|
65eabda833 | ||
|
|
207de967fb | ||
|
|
4722be1289 | ||
|
|
5c5306c692 | ||
|
|
df69a5c79b | ||
|
|
c44625ebdf | ||
|
|
797bc28aae | ||
|
|
c00fee0316 | ||
|
|
9f692857be | ||
|
|
efe0e39d85 | ||
|
|
b69d00b0a7 | ||
|
|
5cdfdd9ba3 | ||
|
|
b70beb3e13 | ||
|
|
8d65a03951 | ||
|
|
5895dff03b | ||
|
|
d2b695aa64 | ||
|
|
385915c101 | ||
|
|
e88ce09372 | ||
|
|
11b838e71e | ||
|
|
3875d79ac6 | ||
|
|
918130befd | ||
|
|
a97502920b | ||
|
|
0b9a2844bd | ||
|
|
ac89cffc9f | ||
|
|
01b4292f87 | ||
|
|
923ff9abc9 | ||
|
|
6a25f52473 | ||
|
|
baca685629 | ||
|
|
716202b839 | ||
|
|
a4ebe16517 | ||
|
|
4b637226a7 | ||
|
|
7c09f35ddf |
@@ -1362,6 +1362,33 @@ static int get_dimensions(const rjson::value& vector_attribute, std::string_view
|
||||
return dimensions_v->GetInt();
|
||||
}
|
||||
|
||||
// As noted in issue #5052, in Alternator the CreateTable and UpdateTable are
|
||||
// currently synchronous - they return only after the operation is complete.
|
||||
// After announce() of the new schema finished, the schema change is committed
|
||||
// and a majority of nodes know it - but it's possible that some live nodes
|
||||
// have not yet applied the new schema. If we return to the user now, and the
|
||||
// user sends a node request that relies on the new schema, it might fail.
|
||||
// So before returning, we must verify that *all* nodes have applied the new
|
||||
// schema. This is what wait_for_schema_agreement_after_ddl() does.
|
||||
//
|
||||
// Note that wait_for_schema_agreement_after_ddl() has a timeout (currently
|
||||
// hard-coded to 30 seconds). If the timeout is reached an InternalServerError
|
||||
// is returned. The user, who doesn't know if the CreateTable succeeded or not,
|
||||
// can retry the request and will get a ResourceInUseException and know the
|
||||
// table already exists. So a CreateTable that returns a ResourceInUseException
|
||||
// should also call wait_for_schema_agreement_after_ddl().
|
||||
//
|
||||
// When issue #5052 is resolved, this function can be removed - we will need
|
||||
// to check if we reached schema agreement, but not to *wait* for it.
|
||||
static future<> wait_for_schema_agreement_after_ddl(service::migration_manager& mm, const replica::database& db) {
|
||||
static constexpr auto schema_agreement_seconds = 30;
|
||||
try {
|
||||
co_await mm.wait_for_schema_agreement(db, db::timeout_clock::now() + std::chrono::seconds(schema_agreement_seconds), nullptr);
|
||||
} catch (const service::migration_manager::schema_agreement_timeout&) {
|
||||
throw api_error::internal(fmt::format("The operation was successful, but unable to confirm cluster-wide schema agreement after {} seconds. Please retry the operation, and wait for the retry to report an error since the operation was already done.", schema_agreement_seconds));
|
||||
}
|
||||
}
|
||||
|
||||
future<executor::request_return_type> executor::create_table_on_shard0(service::client_state&& client_state, tracing::trace_state_ptr trace_state, rjson::value request, bool enforce_authorization, bool warn_authorization,
|
||||
const db::tablets_mode_t::mode tablets_mode, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
throwing_assert(this_shard_id() == 0);
|
||||
@@ -1695,13 +1722,26 @@ future<executor::request_return_type> executor::create_table_on_shard0(service::
|
||||
}
|
||||
}
|
||||
}
|
||||
bool table_already_exists = false;
|
||||
try {
|
||||
schema_mutations = service::prepare_new_keyspace_announcement(_proxy.local_db(), ksm, ts);
|
||||
} catch (exceptions::already_exists_exception&) {
|
||||
if (_proxy.data_dictionary().has_schema(keyspace_name, table_name)) {
|
||||
co_return api_error::resource_in_use(fmt::format("Table {} already exists", table_name));
|
||||
table_already_exists = true;
|
||||
}
|
||||
}
|
||||
if (table_already_exists) {
|
||||
// The user may have retried a CreateTable operation after it timed
|
||||
// out in wait_for_schema_agreement_after_ddl(). So before we may
|
||||
// return ResourceInUseException (which can lead the user to start
|
||||
// using the table which it now knows exists), we need to wait for
|
||||
// schema agreement, just like the original CreateTable did. Again
|
||||
// we fail with InternalServerError if schema agreement still cannot
|
||||
// be reached. We can release group0_guard before waiting.
|
||||
release_guard(std::move(group0_guard));
|
||||
co_await wait_for_schema_agreement_after_ddl(_mm, _proxy.local_db());
|
||||
co_return api_error::resource_in_use(fmt::format("Table {} already exists", table_name));
|
||||
}
|
||||
if (_proxy.data_dictionary().try_find_table(schema->id())) {
|
||||
// This should never happen, the ID is supposed to be unique
|
||||
co_return api_error::internal(format("Table with ID {} already exists", schema->id()));
|
||||
@@ -1750,7 +1790,7 @@ future<executor::request_return_type> executor::create_table_on_shard0(service::
|
||||
}
|
||||
}
|
||||
|
||||
co_await _mm.wait_for_schema_agreement(_proxy.local_db(), db::timeout_clock::now() + 10s, nullptr);
|
||||
co_await wait_for_schema_agreement_after_ddl(_mm, _proxy.local_db());
|
||||
rjson::value status = rjson::empty_object();
|
||||
executor::supplement_table_info(request, *schema, _proxy);
|
||||
rjson::add(status, "TableDescription", std::move(request));
|
||||
@@ -2189,7 +2229,7 @@ future<executor::request_return_type> executor::update_table(client_state& clien
|
||||
throw;
|
||||
}
|
||||
}
|
||||
co_await mm.wait_for_schema_agreement(p.local().local_db(), db::timeout_clock::now() + 10s, nullptr);
|
||||
co_await wait_for_schema_agreement_after_ddl(mm, p.local().local_db());
|
||||
|
||||
rjson::value status = rjson::empty_object();
|
||||
supplement_table_info(request, *schema, p.local());
|
||||
|
||||
@@ -185,24 +185,14 @@ future<lw_shared_ptr<cache::role_record>> cache::fetch_role(const role_name_t& r
|
||||
static const sstring q = format("SELECT role, name, value FROM {}.{} WHERE role = ?", db::system_keyspace::NAME, ROLE_ATTRIBUTES_CF);
|
||||
auto rs = co_await fetch(q);
|
||||
for (const auto& r : *rs) {
|
||||
if (!r.has("value")) {
|
||||
continue;
|
||||
}
|
||||
rec->attributes[r.get_as<sstring>("name")] =
|
||||
r.get_as<sstring>("value");
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
}
|
||||
// permissions
|
||||
{
|
||||
static const sstring q = format("SELECT role, resource, permissions FROM {}.{} WHERE role = ?", db::system_keyspace::NAME, PERMISSIONS_CF);
|
||||
auto rs = co_await fetch(q);
|
||||
for (const auto& r : *rs) {
|
||||
auto resource = r.get_as<sstring>("resource");
|
||||
auto perms_strings = r.get_set<sstring>("permissions");
|
||||
std::unordered_set<sstring> perms_set(perms_strings.begin(), perms_strings.end());
|
||||
auto pset = permissions::from_strings(perms_set);
|
||||
rec->permissions[std::move(resource)] = std::move(pset);
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
}
|
||||
co_return rec;
|
||||
}
|
||||
|
||||
|
||||
@@ -44,7 +44,6 @@ public:
|
||||
std::unordered_set<role_name_t> members;
|
||||
sstring salted_hash;
|
||||
std::unordered_map<sstring, sstring, sstring_hash, sstring_eq> attributes;
|
||||
std::unordered_map<sstring, permission_set, sstring_hash, sstring_eq> permissions;
|
||||
private:
|
||||
friend cache;
|
||||
// cached permissions include effects of role's inheritance
|
||||
|
||||
@@ -76,7 +76,11 @@ default_authorizer::authorize(const role_or_anonymous& maybe_role, const resourc
|
||||
if (results->empty()) {
|
||||
co_return permissions::NONE;
|
||||
}
|
||||
co_return permissions::from_strings(results->one().get_set<sstring>(PERMISSIONS_NAME));
|
||||
const auto& row = results->one();
|
||||
if (!row.has(PERMISSIONS_NAME)) {
|
||||
co_return permissions::NONE;
|
||||
}
|
||||
co_return permissions::from_strings(row.get_set<sstring>(PERMISSIONS_NAME));
|
||||
}
|
||||
|
||||
future<>
|
||||
|
||||
@@ -136,9 +136,9 @@ public:
|
||||
{}
|
||||
|
||||
future<> insert(auth::authenticated_user user, cql3::prepared_cache_key_type prep_cache_key, value_type v) noexcept {
|
||||
return _cache.get_ptr(key_type(std::move(user), std::move(prep_cache_key)), [v = std::move(v)] (const cache_key_type&) mutable {
|
||||
return _cache.insert(key_type(std::move(user), std::move(prep_cache_key)), [v = std::move(v)] (const cache_key_type&) mutable {
|
||||
return make_ready_future<value_type>(std::move(v));
|
||||
}).discard_result();
|
||||
});
|
||||
}
|
||||
|
||||
value_ptr find(const auth::authenticated_user& user, const cql3::prepared_cache_key_type& prep_cache_key) {
|
||||
|
||||
@@ -275,9 +275,12 @@ future<shared_ptr<cql_transport::messages::result_message>> batch_statement::do_
|
||||
_statements[i].statement->restrictions().validate_primary_key(options.for_statement(i));
|
||||
}
|
||||
|
||||
// Set the histogram for deferred latency marking.
|
||||
auto& stats = qp.proxy().get_stats();
|
||||
if (_has_conditions) {
|
||||
++_stats.cas_batches;
|
||||
_stats.statements_in_cas_batches += _statements.size();
|
||||
query_state.set_latency_histogram(stats.cas_write);
|
||||
return execute_with_conditions(qp, options, query_state).then([guardrail_state, cl] (auto result) {
|
||||
if (guardrail_state == query_processor::write_consistency_guardrail_state::WARN) {
|
||||
result->add_warning(format("Using write consistency level {} listed on the "
|
||||
@@ -290,10 +293,13 @@ future<shared_ptr<cql_transport::messages::result_message>> batch_statement::do_
|
||||
++_stats.batches;
|
||||
_stats.statements_in_batches += _statements.size();
|
||||
|
||||
query_state.set_latency_histogram(stats.write);
|
||||
|
||||
auto timeout = db::timeout_clock::now() + get_timeout(query_state.get_client_state(), options);
|
||||
auto defer_latency = query_state.has_deferred_latency();
|
||||
return get_mutations(qp, options, timeout, local, now, query_state).then([this, &qp, cl, timeout, tr_state = query_state.get_trace_state(),
|
||||
permit = query_state.get_permit()] (utils::chunked_vector<mutation> ms) mutable {
|
||||
return execute_without_conditions(qp, std::move(ms), cl, timeout, std::move(tr_state), std::move(permit));
|
||||
permit = query_state.get_permit(), defer_latency] (utils::chunked_vector<mutation> ms) mutable {
|
||||
return execute_without_conditions(qp, std::move(ms), cl, timeout, std::move(tr_state), std::move(permit), defer_latency);
|
||||
}).then([guardrail_state, cl] (coordinator_result<> res) {
|
||||
if (!res) {
|
||||
return make_ready_future<shared_ptr<cql_transport::messages::result_message>>(
|
||||
@@ -314,7 +320,8 @@ future<coordinator_result<>> batch_statement::execute_without_conditions(
|
||||
db::consistency_level cl,
|
||||
db::timeout_clock::time_point timeout,
|
||||
tracing::trace_state_ptr tr_state,
|
||||
service_permit permit) const
|
||||
service_permit permit,
|
||||
bool defer_coordinator_latency_mark) const
|
||||
{
|
||||
// FIXME: do we need to do this?
|
||||
#if 0
|
||||
@@ -341,7 +348,9 @@ future<coordinator_result<>> batch_statement::execute_without_conditions(
|
||||
mutate_atomic = false;
|
||||
}
|
||||
}
|
||||
return qp.proxy().mutate_with_triggers(std::move(mutations), cl, timeout, mutate_atomic, std::move(tr_state), std::move(permit), db::allow_per_partition_rate_limit::yes);
|
||||
return qp.proxy().mutate_with_triggers(std::move(mutations), cl, timeout, mutate_atomic, std::move(tr_state), std::move(permit), db::allow_per_partition_rate_limit::yes, false, {
|
||||
.defer_coordinator_latency_mark = defer_coordinator_latency_mark,
|
||||
});
|
||||
}
|
||||
|
||||
future<shared_ptr<cql_transport::messages::result_message>> batch_statement::execute_with_conditions(
|
||||
@@ -402,7 +411,7 @@ future<shared_ptr<cql_transport::messages::result_message>> batch_statement::exe
|
||||
|
||||
auto* request_ptr = request.get();
|
||||
return qp.proxy().cas(schema, std::move(cas_shard), *request_ptr, request->read_command(qp), request->key(),
|
||||
{read_timeout, qs.get_permit(), qs.get_client_state(), qs.get_trace_state()},
|
||||
{read_timeout, qs.get_permit(), qs.get_client_state(), qs.get_trace_state(), {}, {}, service::node_local_only::no, qs.has_deferred_latency()},
|
||||
std::move(cl_for_paxos).assume_value(), cl_for_learn, batch_timeout, cas_timeout).then([this, request = std::move(request)] (bool is_applied) {
|
||||
return request->build_cas_result_set(_metadata, _columns_of_cas_result_set, is_applied);
|
||||
});
|
||||
@@ -490,4 +499,3 @@ audit::statement_category batch_statement::category() const {
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -140,7 +140,8 @@ private:
|
||||
db::consistency_level cl,
|
||||
db::timeout_clock::time_point timeout,
|
||||
tracing::trace_state_ptr tr_state,
|
||||
service_permit permit) const;
|
||||
service_permit permit,
|
||||
bool defer_coordinator_latency_mark) const;
|
||||
|
||||
future<shared_ptr<cql_transport::messages::result_message>> execute_with_conditions(
|
||||
query_processor& qp,
|
||||
|
||||
@@ -281,7 +281,10 @@ modification_statement::do_execute(query_processor& qp, service::query_state& qs
|
||||
|
||||
_restrictions->validate_primary_key(options);
|
||||
|
||||
// Set the histogram for deferred latency marking.
|
||||
auto& stats = qp.proxy().get_stats();
|
||||
if (has_conditions()) {
|
||||
qs.set_latency_histogram(stats.cas_write);
|
||||
auto result = co_await execute_with_condition(qp, qs, options);
|
||||
if (guardrail_state == query_processor::write_consistency_guardrail_state::WARN) {
|
||||
result->add_warning(format("Using write consistency level {} listed on the "
|
||||
@@ -290,6 +293,8 @@ modification_statement::do_execute(query_processor& qp, service::query_state& qs
|
||||
co_return result;
|
||||
}
|
||||
|
||||
qs.set_latency_histogram(stats.write);
|
||||
|
||||
json_cache_opt json_cache = maybe_prepare_json_cache(options);
|
||||
std::vector<dht::partition_range> keys = build_partition_keys(options, json_cache);
|
||||
|
||||
@@ -334,7 +339,8 @@ modification_statement::execute_without_condition(query_processor& qp, service::
|
||||
}
|
||||
|
||||
return qp.proxy().mutate_with_triggers(std::move(mutations), cl, timeout, false, qs.get_trace_state(), qs.get_permit(), db::allow_per_partition_rate_limit::yes, this->is_raw_counter_shard_write(), {
|
||||
.node_local_only = options.get_specific_options().node_local_only
|
||||
.node_local_only = options.get_specific_options().node_local_only,
|
||||
.defer_coordinator_latency_mark = qs.has_deferred_latency(),
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -450,7 +456,7 @@ modification_statement::execute_with_condition(query_processor& qp, service::que
|
||||
}
|
||||
|
||||
return qp.proxy().cas(s, std::move(cas_shard), *request_ptr, request->read_command(qp), request->key(),
|
||||
{read_timeout, qs.get_permit(), qs.get_client_state(), qs.get_trace_state()},
|
||||
{read_timeout, qs.get_permit(), qs.get_client_state(), qs.get_trace_state(), {}, {}, service::node_local_only::no, qs.has_deferred_latency()},
|
||||
std::move(cl_for_paxos).assume_value(), cl_for_learn, statement_timeout, cas_timeout).then([this, request = std::move(request), tablet_replicas = std::move(tablet_info->tablet_replicas), token_range = tablet_info->token_range] (bool is_applied) {
|
||||
auto result = request->build_cas_result_set(_metadata, _columns_of_cas_result_set, is_applied);
|
||||
result->add_tablet_info(tablet_replicas, token_range);
|
||||
|
||||
@@ -482,6 +482,19 @@ select_statement::do_execute(query_processor& qp,
|
||||
|
||||
auto key_ranges = _restrictions->get_partition_key_ranges(options);
|
||||
|
||||
// Set the histogram for deferred latency marking.
|
||||
// Serial consistency reads go through paxos (cas_read histogram),
|
||||
// non-serial reads use read or range histograms.
|
||||
{
|
||||
auto& stats = qp.proxy().get_stats();
|
||||
if (db::is_serial_consistency(options.get_consistency())) {
|
||||
state.set_latency_histogram(stats.cas_read);
|
||||
} else {
|
||||
bool is_range = key_ranges.empty() || !query::is_single_partition(key_ranges.front());
|
||||
state.set_latency_histogram(is_range ? stats.range : stats.read);
|
||||
}
|
||||
}
|
||||
|
||||
auto token = dht::token();
|
||||
std::optional<locator::tablet_routing_info> tablet_info = {};
|
||||
|
||||
@@ -765,7 +778,7 @@ view_indexed_table_select_statement::do_execute_base_query(
|
||||
if (previous_result_size < query::result_memory_limiter::maximum_result_size && concurrency < max_base_table_query_concurrency) {
|
||||
concurrency *= 2;
|
||||
}
|
||||
coordinator_result<service::storage_proxy::coordinator_query_result> rqr = co_await qp.proxy().query_result(_schema, command, std::move(prange), options.get_consistency(), {timeout, state.get_permit(), state.get_client_state(), state.get_trace_state()});
|
||||
coordinator_result<service::storage_proxy::coordinator_query_result> rqr = co_await qp.proxy().query_result(_schema, command, std::move(prange), options.get_consistency(), {timeout, state.get_permit(), state.get_client_state(), state.get_trace_state(), {}, {}, service::node_local_only::no, state.has_deferred_latency()});
|
||||
if (!rqr.has_value()) {
|
||||
co_return std::move(rqr).as_failure();
|
||||
}
|
||||
@@ -837,7 +850,7 @@ view_indexed_table_select_statement::do_execute_base_query(
|
||||
command->slice._row_ranges.push_back(query::clustering_range::make_singular(key.clustering));
|
||||
}
|
||||
coordinator_result<service::storage_proxy::coordinator_query_result> rqr
|
||||
= co_await qp.proxy().query_result(_schema, command, {dht::partition_range::make_singular(key.partition)}, options.get_consistency(), {timeout, state.get_permit(), state.get_client_state(), state.get_trace_state()});
|
||||
= co_await qp.proxy().query_result(_schema, command, {dht::partition_range::make_singular(key.partition)}, options.get_consistency(), {timeout, state.get_permit(), state.get_client_state(), state.get_trace_state(), {}, {}, service::node_local_only::no, state.has_deferred_latency()});
|
||||
if (!rqr.has_value()) {
|
||||
co_return std::move(rqr).as_failure();
|
||||
}
|
||||
@@ -912,7 +925,7 @@ select_statement::execute_without_checking_exception_message_non_aggregate_unpag
|
||||
command,
|
||||
std::move(prange),
|
||||
options.get_consistency(),
|
||||
{timeout, state.get_permit(), state.get_client_state(), state.get_trace_state(), {}, {}, options.get_specific_options().node_local_only},
|
||||
{timeout, state.get_permit(), state.get_client_state(), state.get_trace_state(), {}, {}, options.get_specific_options().node_local_only, state.has_deferred_latency()},
|
||||
cas_shard).then(utils::result_wrap([] (service::storage_proxy::coordinator_query_result qr) {
|
||||
return make_ready_future<coordinator_result<foreign_ptr<lw_shared_ptr<query::result>>>>(std::move(qr.query_result));
|
||||
}));
|
||||
@@ -921,7 +934,7 @@ select_statement::execute_without_checking_exception_message_non_aggregate_unpag
|
||||
return this->process_results(std::move(result), cmd, options, now);
|
||||
}));
|
||||
} else {
|
||||
return qp.proxy().query_result(_query_schema, cmd, std::move(partition_ranges), options.get_consistency(), {timeout, state.get_permit(), state.get_client_state(), state.get_trace_state(), {}, {}, options.get_specific_options().node_local_only}, std::move(cas_shard))
|
||||
return qp.proxy().query_result(_query_schema, cmd, std::move(partition_ranges), options.get_consistency(), {timeout, state.get_permit(), state.get_client_state(), state.get_trace_state(), {}, {}, options.get_specific_options().node_local_only, state.has_deferred_latency()}, std::move(cas_shard))
|
||||
.then(wrap_result_to_error_message([this, &options, now, cmd] (service::storage_proxy::coordinator_query_result qr) {
|
||||
return this->process_results(std::move(qr.query_result), cmd, options, now);
|
||||
}));
|
||||
@@ -1199,6 +1212,10 @@ view_indexed_table_select_statement::actually_do_execute(query_processor& qp,
|
||||
|
||||
validate_for_read(cl);
|
||||
|
||||
// Secondary index reads always go through proxy().query_result().
|
||||
// Mark as read since these are always single-partition lookups by primary key.
|
||||
state.set_latency_histogram(qp.proxy().get_stats().read);
|
||||
|
||||
auto now = gc_clock::now();
|
||||
|
||||
++_stats.secondary_index_reads;
|
||||
@@ -1431,7 +1448,7 @@ view_indexed_table_select_statement::read_posting_list(query_processor& qp,
|
||||
|
||||
int32_t page_size = options.get_page_size();
|
||||
if (page_size <= 0 || !service::pager::query_pagers::may_need_paging(*_view_schema, page_size, *cmd, partition_ranges)) {
|
||||
return qp.proxy().query_result(_view_schema, cmd, std::move(partition_ranges), options.get_consistency(), {timeout, state.get_permit(), state.get_client_state(), state.get_trace_state()})
|
||||
return qp.proxy().query_result(_view_schema, cmd, std::move(partition_ranges), options.get_consistency(), {timeout, state.get_permit(), state.get_client_state(), state.get_trace_state(), {}, {}, service::node_local_only::no, state.has_deferred_latency()})
|
||||
.then(utils::result_wrap([this, now, &options, selection = std::move(selection), partition_slice = std::move(partition_slice)] (service::storage_proxy::coordinator_query_result qr)
|
||||
-> coordinator_result<::shared_ptr<cql_transport::messages::result_message::rows>> {
|
||||
cql3::selection::result_set_builder builder(*selection, now, &options);
|
||||
@@ -1883,7 +1900,7 @@ mutation_fragments_select_statement::do_execute(query_processor& qp, service::qu
|
||||
|| !service::pager::query_pagers::may_need_paging(*_schema, page_size,
|
||||
*command, key_ranges))) {
|
||||
return do_query(erm_keepalive, {}, qp.proxy(), _schema, command, std::move(key_ranges), cl,
|
||||
{timeout, state.get_permit(), state.get_client_state(), state.get_trace_state(), {}, {}})
|
||||
{timeout, state.get_permit(), state.get_client_state(), state.get_trace_state(), {}, {}, service::node_local_only::no, state.has_deferred_latency()})
|
||||
.then(wrap_result_to_error_message([this, erm_keepalive, now, &options, slice = command->slice] (service::storage_proxy_coordinator_query_result&& qr) mutable {
|
||||
cql3::selection::result_set_builder builder(*_selection, now, &options);
|
||||
query::result_view::consume(*qr.query_result, std::move(slice),
|
||||
@@ -2116,6 +2133,9 @@ future<shared_ptr<cql_transport::messages::result_message>> vector_indexed_table
|
||||
tracing::add_table_name(state.get_trace_state(), keyspace(), column_family());
|
||||
validate_for_read(options.get_consistency());
|
||||
|
||||
// Vector index reads go through proxy().query_result() with single-partition lookups.
|
||||
state.set_latency_histogram(qp.proxy().get_stats().read);
|
||||
|
||||
_query_start_time_point = gc_clock::now();
|
||||
|
||||
update_stats();
|
||||
@@ -2208,7 +2228,7 @@ future<::shared_ptr<cql_transport::messages::result_message>> vector_indexed_tab
|
||||
cmd->slice._row_ranges = query::clustering_row_ranges{query::clustering_range::make_singular(key.clustering)};
|
||||
coordinator_result<service::storage_proxy::coordinator_query_result> rqr =
|
||||
co_await qp.proxy().query_result(_schema, cmd, {dht::partition_range::make_singular(key.partition)}, options.get_consistency(),
|
||||
{timeout, state.get_permit(), state.get_client_state(), state.get_trace_state()});
|
||||
{timeout, state.get_permit(), state.get_client_state(), state.get_trace_state(), {}, {}, service::node_local_only::no, state.has_deferred_latency()});
|
||||
if (!rqr) {
|
||||
co_return std::move(rqr).as_failure();
|
||||
}
|
||||
@@ -2228,7 +2248,7 @@ future<::shared_ptr<cql_transport::messages::result_message>> vector_indexed_tab
|
||||
|
||||
co_return co_await qp.proxy()
|
||||
.query_result(_query_schema, command, std::move(partition_ranges), options.get_consistency(),
|
||||
{timeout, state.get_permit(), state.get_client_state(), state.get_trace_state(), {}, {}, options.get_specific_options().node_local_only},
|
||||
{timeout, state.get_permit(), state.get_client_state(), state.get_trace_state(), {}, {}, options.get_specific_options().node_local_only, state.has_deferred_latency()},
|
||||
std::nullopt)
|
||||
.then(wrap_result_to_error_message([this, &options, command](service::storage_proxy::coordinator_query_result qr) {
|
||||
command->set_row_limit(get_limit(options, _limit));
|
||||
|
||||
@@ -776,7 +776,7 @@ class db::commitlog::segment : public enable_shared_from_this<segment>, public c
|
||||
friend std::ostream& operator<<(std::ostream&, const segment&);
|
||||
friend class segment_manager;
|
||||
|
||||
size_t sector_overhead(size_t size) const {
|
||||
constexpr size_t sector_overhead(size_t size) const {
|
||||
return (size / (_alignment - detail::sector_overhead_size)) * detail::sector_overhead_size;
|
||||
}
|
||||
|
||||
@@ -1028,18 +1028,21 @@ public:
|
||||
co_return me;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate a new buffer
|
||||
*/
|
||||
void new_buffer(size_t s) {
|
||||
SCYLLA_ASSERT(_buffer.empty());
|
||||
|
||||
std::tuple<size_t, size_t> buffer_usage_size(size_t s) const {
|
||||
auto overhead = segment_overhead_size;
|
||||
if (_file_pos == 0) {
|
||||
overhead += descriptor_header_size;
|
||||
}
|
||||
|
||||
s += overhead;
|
||||
return {s + overhead, overhead};
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate a new buffer
|
||||
*/
|
||||
void new_buffer(size_t size_in) {
|
||||
SCYLLA_ASSERT(_buffer.empty());
|
||||
auto [s, overhead] = buffer_usage_size(size_in);
|
||||
// add bookkeep data reqs.
|
||||
auto a = align_up(s + sector_overhead(s), _alignment);
|
||||
auto k = std::max(a, default_size);
|
||||
@@ -1427,6 +1430,9 @@ public:
|
||||
|
||||
position_type next_position(size_t size) const {
|
||||
auto used = _buffer_ostream_size - _buffer_ostream.size();
|
||||
if (used == 0) { // new chunk/segment
|
||||
std::tie(size, std::ignore) = buffer_usage_size(size);
|
||||
}
|
||||
used += size;
|
||||
return _file_pos + used + sector_overhead(used);
|
||||
}
|
||||
@@ -1570,7 +1576,6 @@ future<> db::commitlog::segment_manager::oversized_allocation(entry_writer& writ
|
||||
clogger.debug("Attempting oversized alloc of {} entry writer", writer.num_entries);
|
||||
|
||||
auto size = writer.size();
|
||||
auto max_file_size = cfg.commitlog_segment_size_in_mb * 1024 * 1024;
|
||||
|
||||
// check if this cannot be written at all...
|
||||
if (!cfg.allow_going_over_size_limit) {
|
||||
@@ -1579,11 +1584,11 @@ future<> db::commitlog::segment_manager::oversized_allocation(entry_writer& writ
|
||||
// more worst case
|
||||
auto size_with_meta_overhead = size_with_sector_overhead
|
||||
+ (1 + size_with_sector_overhead/max_mutation_size) * (segment::entry_overhead_size + segment::fragmented_entry_overhead_size + segment::segment_overhead_size)
|
||||
* (1 + size_with_sector_overhead/max_file_size) * segment::descriptor_header_size
|
||||
* (1 + size_with_sector_overhead/max_size) * segment::descriptor_header_size
|
||||
;
|
||||
// this is not really true. We could have some space in current segment,
|
||||
// but again, lets be conservative.
|
||||
auto max_file_size_avail = max_disk_size - max_file_size;
|
||||
auto max_file_size_avail = max_disk_size - max_size;
|
||||
|
||||
if (size_with_meta_overhead > max_file_size_avail) {
|
||||
throw std::invalid_argument(fmt::format("Mutation of {} bytes is too large for potentially available disk space of {}", size, max_file_size_avail));
|
||||
@@ -1770,11 +1775,13 @@ future<> db::commitlog::segment_manager::oversized_allocation(entry_writer& writ
|
||||
co_await s->close();
|
||||
s = co_await get_segment();
|
||||
}
|
||||
// bytes not counting overhead
|
||||
auto buf_rem = std::min(max_size - s->position(), s->_buffer_ostream.size());
|
||||
// bytes not counting overhead
|
||||
auto pos = s->position();
|
||||
auto max = std::max<size_t>(pos, max_size);
|
||||
auto buf_rem = std::min(max_size - max, s->_buffer_ostream.size());
|
||||
|
||||
size_t avail;
|
||||
if (buf_rem > align) {
|
||||
if (buf_rem >= align) {
|
||||
auto rem2 = buf_rem - (1 + buf_rem/sector_size) * detail::sector_overhead_size;
|
||||
avail = std::min(rem2, max_mutation_size)
|
||||
- segment::entry_overhead_size
|
||||
@@ -1784,7 +1791,7 @@ future<> db::commitlog::segment_manager::oversized_allocation(entry_writer& writ
|
||||
} else {
|
||||
co_await s->cycle();
|
||||
auto pos = s->position();
|
||||
auto max = std::max<size_t>(pos, max_file_size);
|
||||
auto max = std::max<size_t>(pos, max_size);
|
||||
auto file_rem = max - pos;
|
||||
|
||||
if (file_rem < align) {
|
||||
|
||||
@@ -965,8 +965,7 @@ future<> gossiper::failure_detector_loop_for_node(locator::host_id host_id, gene
|
||||
diff = now - last;
|
||||
if (!failed) {
|
||||
last = now;
|
||||
}
|
||||
if (diff > max_duration) {
|
||||
} else if (diff > max_duration) {
|
||||
logger.info("failure_detector_loop: Mark node {}/{} as DOWN", host_id, node);
|
||||
co_await container().invoke_on(0, [host_id] (gms::gossiper& g) {
|
||||
return g.convict(host_id);
|
||||
|
||||
@@ -16,7 +16,6 @@ Usage:
|
||||
import argparse, os, sys
|
||||
from typing import Sequence
|
||||
|
||||
from test.pylib.driver_utils import safe_driver_shutdown
|
||||
|
||||
def read_statements(path: str) -> list[tuple[int, str]]:
|
||||
stms: list[tuple[int, str]] = []
|
||||
@@ -58,7 +57,7 @@ def exec_statements(statements: list[tuple[int, str]], socket_path: str, timeout
|
||||
print(f"ERROR executing statement from file line {lineno}: {s}\n{e}", file=sys.stderr)
|
||||
return 1
|
||||
finally:
|
||||
safe_driver_shutdown(cluster)
|
||||
cluster.shutdown()
|
||||
return 0
|
||||
|
||||
def main(argv: Sequence[str]) -> int:
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:524c54493b72c5e1b783f14dfa49d733e21b24cc2ec776e9c6e578095073162d
|
||||
size 6646304
|
||||
oid sha256:8b22f9a548a03c88250d31e97ea3e8f77b4d90c502bcf74336c24056557f947f
|
||||
size 6698412
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fec2bb253d43139da954cee3441fc8bc74824246b080f23bf1f824714d0adc45
|
||||
size 6646576
|
||||
oid sha256:31e515a62f006649b0dc4671b51b2643fba9a70884c09b90fbc2237044954254
|
||||
size 6707108
|
||||
|
||||
@@ -239,7 +239,10 @@ private:
|
||||
|
||||
// Drop waiter that we lost track of, can happen due to a snapshot transfer,
|
||||
// or a leader removed from cluster while some entries added on it are uncommitted.
|
||||
void drop_waiters(std::optional<index_t> idx = {});
|
||||
// When `snp` is provided (snapshot transfer case), waiters whose term matches
|
||||
// the snapshot term are resolved successfully, since the snapshot-term match proves
|
||||
// they were committed and included in the snapshot (by the Log Matching Property).
|
||||
void drop_waiters(const snapshot_descriptor* snp = nullptr);
|
||||
|
||||
// Wake up all waiter that wait for entries with idx smaller of equal to the one provided
|
||||
// to be applied.
|
||||
@@ -556,12 +559,10 @@ future<> server_impl::wait_for_entry(entry_id eid, wait_type type, seastar::abor
|
||||
auto snap_term = _fsm->log_term_for(snap_idx);
|
||||
SCYLLA_ASSERT(snap_term);
|
||||
SCYLLA_ASSERT(snap_idx >= eid.idx);
|
||||
if (type == wait_type::committed && snap_term == eid.term) {
|
||||
if (snap_term == eid.term) {
|
||||
logger.trace("[{}] wait_for_entry {}.{}: entry got truncated away, but has the snapshot's term"
|
||||
" (snapshot index: {})", id(), eid.term, eid.idx, snap_idx);
|
||||
co_return;
|
||||
|
||||
// We don't do this for `wait_type::applied` - see below why.
|
||||
}
|
||||
|
||||
logger.trace("[{}] wait_for_entry {}.{}: entry got truncated away", id(), eid.term, eid.idx);
|
||||
@@ -572,20 +573,6 @@ future<> server_impl::wait_for_entry(entry_id eid, wait_type type, seastar::abor
|
||||
throw dropped_entry();
|
||||
}
|
||||
|
||||
if (type == wait_type::applied && _fsm->log_last_snapshot_idx() >= eid.idx) {
|
||||
// We know the entry was committed but the wait type is `applied`
|
||||
// and we don't know if the entry was applied with `state_machine::apply`
|
||||
// (we may've loaded a snapshot before we managed to apply the entry).
|
||||
// As specified by `add_entry`, throw `commit_status_unknown` in this case.
|
||||
//
|
||||
// FIXME: replace this with a different exception type - `commit_status_unknown`
|
||||
// gives too much uncertainty while we know that the entry was committed
|
||||
// and had to be applied on at least one server. Some callers of `add_entry`
|
||||
// need to know only that the current state includes that entry, whether it was done
|
||||
// through `apply` on this server or through receiving a snapshot.
|
||||
throw commit_status_unknown();
|
||||
}
|
||||
|
||||
co_return;
|
||||
}
|
||||
}
|
||||
@@ -760,6 +747,8 @@ future<> server_impl::add_entry(command command, wait_type type, seastar::abort_
|
||||
throw not_a_leader{leader};
|
||||
}
|
||||
auto eid = co_await add_entry_on_leader(std::move(command), as);
|
||||
co_await utils::get_local_injector().inject("block_raft_add_entry_before_wait_for_entry",
|
||||
utils::wait_for_message(std::chrono::minutes(5)));
|
||||
co_return co_await wait_for_entry(eid, type, as);
|
||||
}
|
||||
|
||||
@@ -995,17 +984,24 @@ void server_impl::notify_waiters(std::map<index_t, op_status>& waiters,
|
||||
}
|
||||
}
|
||||
|
||||
void server_impl::drop_waiters(std::optional<index_t> idx) {
|
||||
void server_impl::drop_waiters(const snapshot_descriptor* snp) {
|
||||
auto drop = [&] (std::map<index_t, op_status>& waiters) {
|
||||
while (waiters.size() != 0) {
|
||||
auto it = waiters.begin();
|
||||
if (idx && it->first > *idx) {
|
||||
if (snp && it->first > snp->idx) {
|
||||
break;
|
||||
}
|
||||
auto [entry_idx, status] = std::move(*it);
|
||||
waiters.erase(it);
|
||||
status.done.set_exception(commit_status_unknown());
|
||||
_stats.waiters_dropped++;
|
||||
if (snp && status.term == snp->term) {
|
||||
// entry_idx <= snapshot index and the entry's term matches the snapshot term.
|
||||
// By the Log Matching Property the entry was committed and included in the snapshot.
|
||||
status.done.set_value();
|
||||
_stats.waiters_awoken++;
|
||||
} else {
|
||||
status.done.set_exception(commit_status_unknown());
|
||||
_stats.waiters_dropped++;
|
||||
}
|
||||
}
|
||||
};
|
||||
drop(_awaited_commits);
|
||||
@@ -1431,7 +1427,7 @@ future<> server_impl::applier_fiber() {
|
||||
// Apply snapshot it to the state machine
|
||||
logger.trace("[{}] apply_fiber applying snapshot {}", _id, snp.id);
|
||||
co_await _state_machine->load_snapshot(snp.id);
|
||||
drop_waiters(snp.idx);
|
||||
drop_waiters(&snp);
|
||||
_applied_idx = snp.idx;
|
||||
_applied_index_changed.broadcast();
|
||||
_stats.sm_load_snapshot++;
|
||||
|
||||
@@ -79,18 +79,18 @@ public:
|
||||
// The caller may pass a pointer to an abort_source to make the operation abortable.
|
||||
// If it passes nullptr, the operation is unabortable.
|
||||
//
|
||||
// Successful `add_entry` with `wait_type::committed` does not guarantee that `state_machine::apply` will be called
|
||||
// locally for this entry. Between the commit and the application we may receive a snapshot containing this entry,
|
||||
// so the state machine's state 'jumps' forward in time, skipping the entry application.
|
||||
// However, for `wait_type::applied`, we guarantee that the entry will be applied locally with `state_machine::apply`.
|
||||
// If a snapshot causes the state machine to jump over the entry, `add_entry` will return `commit_status_unknown`
|
||||
// (even if the snapshot included that entry).
|
||||
// Successful `add_entry` does not guarantee that `state_machine::apply` will be called
|
||||
// locally for this entry. Between the commit and the application we may load a snapshot
|
||||
// containing this entry, so the state machine's state 'jumps' forward in time, skipping
|
||||
// the local entry application. For `wait_type::applied` this should be fine, because
|
||||
// state machine implementations shouldn't care whether an entry was applied via
|
||||
// `state_machine::apply` or via a snapshot load.
|
||||
//
|
||||
// Exceptions:
|
||||
// raft::commit_status_unknown
|
||||
// Thrown if the leader has changed and the log entry has either
|
||||
// been replaced by the new leader or the server has lost track of it.
|
||||
// It may also be thrown in case of a transport error while forwarding add_entry to the leader.L
|
||||
// It may also be thrown in case of a transport error while forwarding add_entry to the leader.
|
||||
// raft::dropped_entry
|
||||
// Thrown if the entry was replaced because of a leader change.
|
||||
// raft::request_aborted
|
||||
|
||||
@@ -239,7 +239,7 @@ future<> migration_manager::wait_for_schema_agreement(const replica::database& d
|
||||
as->check();
|
||||
}
|
||||
if (db::timeout_clock::now() > deadline) {
|
||||
throw std::runtime_error("Unable to reach schema agreement");
|
||||
throw schema_agreement_timeout();
|
||||
}
|
||||
co_await (as ? sleep_abortable(std::chrono::milliseconds(500), *as) : sleep(std::chrono::milliseconds(500)));
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "gms/endpoint_state.hh"
|
||||
#include <seastar/core/abort_source.hh>
|
||||
#include <seastar/core/gate.hh>
|
||||
#include <seastar/core/timed_out_error.hh>
|
||||
#include "gms/inet_address.hh"
|
||||
#include "gms/feature.hh"
|
||||
#include "gms/i_endpoint_state_change_subscriber.hh"
|
||||
@@ -133,6 +134,19 @@ public:
|
||||
* Known peers in the cluster have the same schema version as us.
|
||||
*/
|
||||
bool have_schema_agreement();
|
||||
// Thrown by wait_for_schema_agreement() when the deadline is reached.
|
||||
struct schema_agreement_timeout : public seastar::timed_out_error {
|
||||
const char* what() const noexcept override {
|
||||
return "Unable to reach schema agreement";
|
||||
}
|
||||
};
|
||||
/**
|
||||
* Waits until all known live peers have the same schema version as this
|
||||
* node. Returns normally once agreement is reached, or throws
|
||||
* schema_agreement_timeout if the deadline is reached before agreement.
|
||||
* If as != nullptr, can also throw abort_requested_exception if the abort
|
||||
* source fires.
|
||||
*/
|
||||
future<> wait_for_schema_agreement(const replica::database& db, db::timeout_clock::time_point deadline, seastar::abort_source* as);
|
||||
|
||||
// Maximum number of retries one should attempt when trying to perform
|
||||
|
||||
@@ -197,7 +197,7 @@ future<result<service::storage_proxy::coordinator_query_result>> query_pager::do
|
||||
std::move(command),
|
||||
std::move(ranges),
|
||||
_options.get_consistency(),
|
||||
{timeout, _state.get_permit(), _state.get_client_state(), _state.get_trace_state(), std::move(_last_replicas), _query_read_repair_decision, _options.get_specific_options().node_local_only},
|
||||
{timeout, _state.get_permit(), _state.get_client_state(), _state.get_trace_state(), std::move(_last_replicas), _query_read_repair_decision, _options.get_specific_options().node_local_only, _state.has_deferred_latency()},
|
||||
std::move(cas_shard));
|
||||
}
|
||||
|
||||
|
||||
@@ -14,17 +14,35 @@
|
||||
#include "tracing/tracing.hh"
|
||||
#include "tracing/trace_state.hh"
|
||||
#include "service_permit.hh"
|
||||
#include "utils/latency.hh"
|
||||
|
||||
namespace utils {
|
||||
class timed_rate_moving_average_summary_and_histogram;
|
||||
}
|
||||
|
||||
namespace qos {
|
||||
class service_level_controller;
|
||||
}
|
||||
namespace service {
|
||||
|
||||
// Carries a started latency counter and a pointer to the histogram
|
||||
// that should be marked when the response is flushed to the client.
|
||||
// The counter is started at the transport layer when the request
|
||||
// arrives and the histogram pointer is set by the statement layer
|
||||
// once the operation type (read/write/range/cas) is known.
|
||||
// The transport layer stops the counter and marks the histogram
|
||||
// after the response has been flushed to the OS socket.
|
||||
struct deferred_latency_mark {
|
||||
utils::latency_counter lc;
|
||||
utils::timed_rate_moving_average_summary_and_histogram* histogram = nullptr;
|
||||
};
|
||||
|
||||
class query_state final {
|
||||
private:
|
||||
client_state& _client_state;
|
||||
tracing::trace_state_ptr _trace_state_ptr;
|
||||
service_permit _permit;
|
||||
std::optional<deferred_latency_mark> _deferred_latency;
|
||||
|
||||
public:
|
||||
query_state(client_state& client_state, service_permit permit)
|
||||
@@ -70,6 +88,33 @@ public:
|
||||
return _client_state.get_service_level_controller();
|
||||
}
|
||||
|
||||
// Start the latency counter. Called from the transport layer
|
||||
// when the request first arrives.
|
||||
void start_latency() {
|
||||
_deferred_latency.emplace();
|
||||
_deferred_latency->lc.start();
|
||||
}
|
||||
|
||||
// Set the histogram that should be marked when the response
|
||||
// is flushed. Called from the statement layer once the
|
||||
// operation type is known.
|
||||
void set_latency_histogram(utils::timed_rate_moving_average_summary_and_histogram& hist) {
|
||||
if (_deferred_latency) {
|
||||
_deferred_latency->histogram = &hist;
|
||||
}
|
||||
}
|
||||
|
||||
bool has_deferred_latency() const {
|
||||
return _deferred_latency.has_value();
|
||||
}
|
||||
|
||||
// Extract the deferred latency mark. The transport layer
|
||||
// calls this to take ownership and mark the histogram after
|
||||
// the response is flushed.
|
||||
std::optional<deferred_latency_mark> take_deferred_latency() {
|
||||
return std::exchange(_deferred_latency, std::nullopt);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "service/session.hh"
|
||||
#include "utils/log.hh"
|
||||
#include <seastar/core/coroutine.hh>
|
||||
#include <seastar/core/timer.hh>
|
||||
|
||||
namespace service {
|
||||
|
||||
@@ -58,18 +59,35 @@ void session_manager::initiate_close_of_sessions_except(const std::unordered_set
|
||||
}
|
||||
|
||||
future<> session_manager::drain_closing_sessions() {
|
||||
slogger.info("drain_closing_sessions: waiting for lock");
|
||||
seastar::timer<lowres_clock> lock_timer([this] {
|
||||
slogger.warn("drain_closing_sessions: still waiting for lock, available units {}",
|
||||
_session_drain_sem.available_units());
|
||||
});
|
||||
lock_timer.arm_periodic(std::chrono::minutes(5));
|
||||
auto lock = co_await get_units(_session_drain_sem, 1);
|
||||
lock_timer.cancel();
|
||||
auto n = std::distance(_closing_sessions.begin(), _closing_sessions.end());
|
||||
slogger.info("drain_closing_sessions: acquired lock, {} sessions to drain", n);
|
||||
auto i = _closing_sessions.begin();
|
||||
while (i != _closing_sessions.end()) {
|
||||
session& s = *i;
|
||||
++i;
|
||||
auto id = s.id();
|
||||
slogger.debug("draining session {}", id);
|
||||
slogger.info("drain_closing_sessions: waiting for session {} to close, gate count {}", id, s.gate_count());
|
||||
std::optional<seastar::timer<lowres_clock>> warn_timer;
|
||||
warn_timer.emplace([&s, id] {
|
||||
slogger.warn("drain_closing_sessions: session {} still not closed, gate count {}",
|
||||
id, s.gate_count());
|
||||
});
|
||||
warn_timer->arm_periodic(std::chrono::minutes(5));
|
||||
co_await s.close();
|
||||
warn_timer.reset();
|
||||
if (_sessions.erase(id)) {
|
||||
slogger.debug("session {} closed", id);
|
||||
slogger.info("drain_closing_sessions: session {} closed", id);
|
||||
}
|
||||
}
|
||||
slogger.info("drain_closing_sessions: done");
|
||||
}
|
||||
|
||||
} // namespace service
|
||||
|
||||
@@ -95,6 +95,10 @@ public:
|
||||
return _id;
|
||||
}
|
||||
|
||||
size_t gate_count() const {
|
||||
return _gate.get_count();
|
||||
}
|
||||
|
||||
/// Post-condition of successfully resolved future: There are no guards alive for this session, and
|
||||
/// and it's impossible to create more such guards later.
|
||||
/// Can be called concurrently.
|
||||
|
||||
@@ -3979,9 +3979,8 @@ future<result<>> storage_proxy::mutate_begin(unique_response_handler_vector ids,
|
||||
|
||||
// this function should be called with a future that holds result of mutation attempt (usually
|
||||
// future returned by mutate_begin()). The future should be ready when function is called.
|
||||
future<result<>> storage_proxy::mutate_end(future<result<>> mutate_result, utils::latency_counter lc, write_stats& stats, tracing::trace_state_ptr trace_state) {
|
||||
future<result<>> storage_proxy::mutate_end(future<result<>> mutate_result, write_stats& stats, tracing::trace_state_ptr trace_state) {
|
||||
SCYLLA_ASSERT(mutate_result.available());
|
||||
stats.write.mark(lc.stop().latency());
|
||||
|
||||
return utils::result_futurize_try([&] {
|
||||
auto&& res = mutate_result.get();
|
||||
@@ -4208,14 +4207,27 @@ future<> storage_proxy::mutate(utils::chunked_vector<mutation> mutations, db::co
|
||||
}
|
||||
|
||||
future<result<>> storage_proxy::mutate_result(utils::chunked_vector<mutation> mutations, db::consistency_level cl, clock_type::time_point timeout, tracing::trace_state_ptr tr_state, service_permit permit, db::allow_per_partition_rate_limit allow_limit, bool raw_counters, coordinator_mutate_options options) {
|
||||
std::optional<utils::latency_counter> lc;
|
||||
if (!options.defer_coordinator_latency_mark) {
|
||||
lc.emplace();
|
||||
lc->start();
|
||||
}
|
||||
if (_cdc && _cdc->needs_cdc_augmentation(mutations)) {
|
||||
return _cdc->augment_mutation_call(timeout, std::move(mutations), tr_state, cl, std::move(options.cdc_options)).then([this, cl, timeout, tr_state, permit = std::move(permit), raw_counters, cdc = _cdc->shared_from_this(), allow_limit, options = std::move(options)](std::tuple<utils::chunked_vector<mutation>, lw_shared_ptr<cdc::operation_result_tracker>>&& t) mutable {
|
||||
return _cdc->augment_mutation_call(timeout, std::move(mutations), tr_state, cl, std::move(options.cdc_options)).then([this, cl, timeout, tr_state, permit = std::move(permit), raw_counters, cdc = _cdc->shared_from_this(), allow_limit, options = std::move(options), lc = std::move(lc)](std::tuple<utils::chunked_vector<mutation>, lw_shared_ptr<cdc::operation_result_tracker>>&& t) mutable {
|
||||
auto mutations = std::move(std::get<0>(t));
|
||||
auto tracker = std::move(std::get<1>(t));
|
||||
return _mutate_stage(this, std::move(mutations), cl, timeout, std::move(tr_state), std::move(permit), raw_counters, allow_limit, std::move(tracker), std::move(options));
|
||||
return _mutate_stage(this, std::move(mutations), cl, timeout, std::move(tr_state), std::move(permit), raw_counters, allow_limit, std::move(tracker), std::move(options)).finally([this, lc = std::move(lc)] () mutable {
|
||||
if (lc) {
|
||||
get_stats().write.mark(lc->stop().latency());
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
return _mutate_stage(this, std::move(mutations), cl, timeout, std::move(tr_state), std::move(permit), raw_counters, allow_limit, nullptr, std::move(options));
|
||||
return _mutate_stage(this, std::move(mutations), cl, timeout, std::move(tr_state), std::move(permit), raw_counters, allow_limit, nullptr, std::move(options)).finally([this, lc = std::move(lc)] () mutable {
|
||||
if (lc) {
|
||||
get_stats().write.mark(lc->stop().latency());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
future<result<>> storage_proxy::do_mutate(utils::chunked_vector<mutation> mutations, db::consistency_level cl, clock_type::time_point timeout, tracing::trace_state_ptr tr_state, service_permit permit, bool raw_counters, db::allow_per_partition_rate_limit allow_limit, lw_shared_ptr<cdc::operation_result_tracker> cdc_tracker, coordinator_mutate_options options) {
|
||||
@@ -4262,15 +4274,12 @@ storage_proxy::mutate_internal(Range mutations, db::consistency_level cl, tracin
|
||||
// special handling, e.g. counters. otherwise, a default type is used.
|
||||
auto type = type_opt.value_or(std::next(std::begin(mutations)) == std::end(mutations) ? db::write_type::SIMPLE : db::write_type::UNLOGGED_BATCH);
|
||||
|
||||
utils::latency_counter lc;
|
||||
lc.start();
|
||||
|
||||
return mutate_prepare(mutations, cl, type, tr_state, std::move(permit), allow_limit, std::move(options)).then(utils::result_wrap([this, cl, timeout_opt, tracker = std::move(cdc_tracker),
|
||||
tr_state] (storage_proxy::unique_response_handler_vector ids) mutable {
|
||||
register_cdc_operation_result_tracker(ids, tracker);
|
||||
return mutate_begin(std::move(ids), cl, tr_state, timeout_opt);
|
||||
})).then_wrapped([this, p = shared_from_this(), lc, tr_state] (future<result<>> f) mutable {
|
||||
return p->mutate_end(std::move(f), lc, get_stats(), std::move(tr_state));
|
||||
})).then_wrapped([this, p = shared_from_this(), tr_state] (future<result<>> f) mutable {
|
||||
return p->mutate_end(std::move(f), get_stats(), std::move(tr_state));
|
||||
});
|
||||
}
|
||||
|
||||
@@ -4379,8 +4388,6 @@ static host_id_vector_replica_set endpoint_filter(
|
||||
|
||||
future<result<>>
|
||||
storage_proxy::mutate_atomically_result(utils::chunked_vector<mutation> mutations, db::consistency_level cl, clock_type::time_point timeout, tracing::trace_state_ptr tr_state, service_permit permit, coordinator_mutate_options options) {
|
||||
utils::latency_counter lc;
|
||||
lc.start();
|
||||
|
||||
class context {
|
||||
storage_proxy& _p;
|
||||
@@ -4494,8 +4501,8 @@ storage_proxy::mutate_atomically_result(utils::chunked_vector<mutation> mutation
|
||||
return make_exception_future<lw_shared_ptr<context>>(std::current_exception());
|
||||
}
|
||||
};
|
||||
auto cleanup = [p = shared_from_this(), lc, tr_state] (future<result<>> f) mutable {
|
||||
return p->mutate_end(std::move(f), lc, p->get_stats(), std::move(tr_state));
|
||||
auto cleanup = [p = shared_from_this(), tr_state] (future<result<>> f) mutable {
|
||||
return p->mutate_end(std::move(f), p->get_stats(), std::move(tr_state));
|
||||
};
|
||||
|
||||
if (_cdc && _cdc->needs_cdc_augmentation(mutations)) {
|
||||
@@ -4532,9 +4539,6 @@ future<> storage_proxy::send_to_endpoint(
|
||||
write_stats& stats,
|
||||
allow_hints allow_hints,
|
||||
is_cancellable cancellable) {
|
||||
utils::latency_counter lc;
|
||||
lc.start();
|
||||
|
||||
std::optional<clock_type::time_point> timeout;
|
||||
db::consistency_level cl = allow_hints ? db::consistency_level::ANY : db::consistency_level::ONE;
|
||||
if (type == db::write_type::VIEW) {
|
||||
@@ -4543,6 +4547,9 @@ future<> storage_proxy::send_to_endpoint(
|
||||
timeout = clock_type::now() + 5min;
|
||||
}
|
||||
|
||||
utils::latency_counter lc;
|
||||
lc.start();
|
||||
|
||||
return mutate_prepare(std::array{std::move(m)},
|
||||
[this, tr_state, erm = std::move(ermp), target = std::array{target}, pending_endpoints, &stats, cancellable, cl, type, /* does view building should hold a real permit */ permit = empty_service_permit()] (std::unique_ptr<mutation_holder>& m) mutable {
|
||||
host_id_vector_replica_set targets;
|
||||
@@ -4570,8 +4577,11 @@ future<> storage_proxy::send_to_endpoint(
|
||||
cancellable);
|
||||
}).then(utils::result_wrap([this, cl, tr_state = std::move(tr_state), timeout = std::move(timeout)] (unique_response_handler_vector ids) mutable {
|
||||
return mutate_begin(std::move(ids), cl, std::move(tr_state), std::move(timeout));
|
||||
})).then_wrapped([p = shared_from_this(), lc, &stats] (future<result<>> f) {
|
||||
return p->mutate_end(std::move(f), lc, stats, nullptr).then(utils::result_into_future<result<>>);
|
||||
})).then_wrapped([p = shared_from_this(), lc, &stats] (future<result<>> f) mutable {
|
||||
// Internal writes (hints, view updates) don't go through the CQL
|
||||
// transport layer, so mark latency here directly.
|
||||
stats.write.mark(lc.stop().latency());
|
||||
return p->mutate_end(std::move(f), stats, nullptr).then(utils::result_into_future<result<>>);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -6739,28 +6749,41 @@ storage_proxy::do_query(schema_ptr s,
|
||||
auto f = do_query_with_paxos(std::move(s), std::move(cmd), std::move(partition_ranges), cl, std::move(query_options), std::move(*cas_shard));
|
||||
return utils::then_ok_result<result<storage_proxy::coordinator_query_result>>(std::move(f));
|
||||
} else {
|
||||
utils::latency_counter lc;
|
||||
lc.start();
|
||||
std::optional<utils::latency_counter> lc;
|
||||
if (!query_options.defer_coordinator_latency_mark) {
|
||||
lc.emplace();
|
||||
lc->start();
|
||||
}
|
||||
auto p = shared_from_this();
|
||||
|
||||
if (query::is_single_partition(partition_ranges[0])) { // do not support mixed partitions (yet?)
|
||||
try {
|
||||
return query_singular(cmd,
|
||||
auto f = query_singular(cmd,
|
||||
std::move(partition_ranges),
|
||||
cl,
|
||||
std::move(query_options)).finally([lc, p] () mutable {
|
||||
std::move(query_options));
|
||||
if (!lc) {
|
||||
return f;
|
||||
}
|
||||
return std::move(f).finally([lc = std::move(*lc), p] () mutable {
|
||||
p->get_stats().read.mark(lc.stop().latency());
|
||||
});
|
||||
} catch (const replica::no_such_column_family&) {
|
||||
get_stats().read.mark(lc.stop().latency());
|
||||
if (lc) {
|
||||
get_stats().read.mark(lc->stop().latency());
|
||||
}
|
||||
return make_empty();
|
||||
}
|
||||
}
|
||||
|
||||
return query_partition_key_range(cmd,
|
||||
auto f = query_partition_key_range(cmd,
|
||||
std::move(partition_ranges),
|
||||
cl,
|
||||
std::move(query_options)).finally([lc, p] () mutable {
|
||||
std::move(query_options));
|
||||
if (!lc) {
|
||||
return f;
|
||||
}
|
||||
return std::move(f).finally([lc = std::move(*lc), p] () mutable {
|
||||
p->get_stats().range.mark(lc.stop().latency());
|
||||
});
|
||||
}
|
||||
@@ -6929,15 +6952,21 @@ future<bool> storage_proxy::cas(schema_ptr schema, cas_shard cas_shard, cas_requ
|
||||
|
||||
unsigned contentions = 0;
|
||||
|
||||
utils::latency_counter lc;
|
||||
lc.start();
|
||||
std::optional<utils::latency_counter> lc;
|
||||
if (!query_options.defer_coordinator_latency_mark) {
|
||||
lc.emplace();
|
||||
lc->start();
|
||||
}
|
||||
|
||||
bool condition_met;
|
||||
|
||||
try {
|
||||
auto update_stats = seastar::defer ([&] {
|
||||
get_stats().cas_foreground--;
|
||||
write ? get_stats().cas_write.mark(lc.stop().latency()) : get_stats().cas_read.mark(lc.stop().latency());
|
||||
if (lc) {
|
||||
auto latency = lc->stop().latency();
|
||||
(write ? get_stats().cas_write : get_stats().cas_read).mark(latency);
|
||||
}
|
||||
if (contentions > 0) {
|
||||
write ? get_stats().cas_write_contention.add(contentions) : get_stats().cas_read_contention.add(contentions);
|
||||
}
|
||||
@@ -6964,7 +6993,11 @@ future<bool> storage_proxy::cas(schema_ptr schema, cas_shard cas_shard, cas_requ
|
||||
++get_stats().cas_failed_read_round_optimization;
|
||||
|
||||
auto pr = partition_ranges; // cannot move original because it can be reused during retry
|
||||
auto cqr = co_await query(schema, cmd, std::move(pr), cl, query_options);
|
||||
// Always defer read latency marking for the internal read inside CAS;
|
||||
// CAS has its own latency histogram (cas_read/cas_write) marked in update_stats above.
|
||||
auto internal_query_options = query_options;
|
||||
internal_query_options.defer_coordinator_latency_mark = true;
|
||||
auto cqr = co_await query(schema, cmd, std::move(pr), cl, std::move(internal_query_options));
|
||||
qr = std::move(cqr.query_result);
|
||||
}
|
||||
|
||||
|
||||
@@ -130,6 +130,7 @@ public:
|
||||
replicas_per_token_range preferred_replicas;
|
||||
std::optional<db::read_repair_decision> read_repair_decision;
|
||||
node_local_only node_local_only;
|
||||
bool defer_coordinator_latency_mark = false;
|
||||
|
||||
storage_proxy_coordinator_query_options(storage_proxy_clock_type::time_point timeout,
|
||||
service_permit permit_,
|
||||
@@ -137,14 +138,16 @@ public:
|
||||
tracing::trace_state_ptr trace_state = nullptr,
|
||||
replicas_per_token_range preferred_replicas = { },
|
||||
std::optional<db::read_repair_decision> read_repair_decision = { },
|
||||
service::node_local_only node_local_only_ = service::node_local_only::no)
|
||||
service::node_local_only node_local_only_ = service::node_local_only::no,
|
||||
bool defer_coordinator_latency_mark_ = false)
|
||||
: _timeout(timeout)
|
||||
, permit(std::move(permit_))
|
||||
, cstate(client_state_)
|
||||
, trace_state(std::move(trace_state))
|
||||
, preferred_replicas(std::move(preferred_replicas))
|
||||
, read_repair_decision(read_repair_decision)
|
||||
, node_local_only(node_local_only_) {
|
||||
, node_local_only(node_local_only_)
|
||||
, defer_coordinator_latency_mark(defer_coordinator_latency_mark_) {
|
||||
}
|
||||
|
||||
storage_proxy_clock_type::time_point timeout(storage_proxy& sp) const {
|
||||
@@ -169,6 +172,7 @@ struct storage_proxy_coordinator_query_result {
|
||||
struct storage_proxy_coordinator_mutate_options {
|
||||
cdc::per_request_options cdc_options;
|
||||
node_local_only node_local_only = node_local_only::no;
|
||||
bool defer_coordinator_latency_mark = false;
|
||||
};
|
||||
|
||||
class cas_request;
|
||||
@@ -467,7 +471,7 @@ private:
|
||||
template<typename Range>
|
||||
future<result<unique_response_handler_vector>> mutate_prepare(Range&& mutations, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state, service_permit permit, db::allow_per_partition_rate_limit allow_limit, coordinator_mutate_options options);
|
||||
future<result<>> mutate_begin(unique_response_handler_vector ids, db::consistency_level cl, tracing::trace_state_ptr trace_state, std::optional<clock_type::time_point> timeout_opt = { });
|
||||
future<result<>> mutate_end(future<result<>> mutate_result, utils::latency_counter, write_stats& stats, tracing::trace_state_ptr trace_state);
|
||||
future<result<>> mutate_end(future<result<>> mutate_result, write_stats& stats, tracing::trace_state_ptr trace_state);
|
||||
future<result<>> schedule_repair(locator::effective_replication_map_ptr ermp, mutations_per_partition_key_map diffs, db::consistency_level cl, tracing::trace_state_ptr trace_state, service_permit permit);
|
||||
bool need_throttle_writes() const;
|
||||
void unthrottle();
|
||||
|
||||
@@ -4494,10 +4494,20 @@ future<> storage_service::local_topology_barrier() {
|
||||
version, current_version)));
|
||||
}
|
||||
|
||||
co_await ss._shared_token_metadata.stale_versions_in_use();
|
||||
rtlogger.info("raft_topology_cmd::barrier_and_drain version {}: waiting for stale token metadata versions to be released", version);
|
||||
{
|
||||
seastar::timer<lowres_clock> warn_timer([&ss, version] {
|
||||
rtlogger.warn("raft_topology_cmd::barrier_and_drain version {}: still waiting for stale versions, "
|
||||
"stale versions (version: use_count): {}",
|
||||
version, ss._shared_token_metadata.describe_stale_versions());
|
||||
});
|
||||
warn_timer.arm_periodic(std::chrono::minutes(5));
|
||||
co_await ss._shared_token_metadata.stale_versions_in_use();
|
||||
}
|
||||
rtlogger.info("raft_topology_cmd::barrier_and_drain version {}: stale versions released, draining closing sessions", version);
|
||||
co_await get_topology_session_manager().drain_closing_sessions();
|
||||
|
||||
rtlogger.info("raft_topology_cmd::barrier_and_drain done");
|
||||
rtlogger.info("raft_topology_cmd::barrier_and_drain version {}: done", version);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -4509,7 +4519,9 @@ future<raft_topology_cmd_result> storage_service::raft_topology_cmd_handler(raft
|
||||
auto& raft_server = _group0->group0_server();
|
||||
auto group0_holder = _group0->hold_group0_gate();
|
||||
// do barrier to make sure we always see the latest topology
|
||||
rtlogger.info("topology cmd rpc {} index={}: starting read_barrier, term={}", cmd.cmd, cmd_index, term);
|
||||
co_await raft_server.read_barrier(&_group0_as);
|
||||
rtlogger.info("topology cmd rpc {} index={}: read_barrier completed", cmd.cmd, cmd_index);
|
||||
if (raft_server.get_current_term() != term) {
|
||||
// Return an error since the command is from outdated leader
|
||||
co_return result;
|
||||
|
||||
@@ -122,7 +122,7 @@ SEASTAR_TEST_CASE(test_reclaimed_bloom_filter_deletion_from_disk) {
|
||||
|
||||
auto mut1 = mutation(s, pks[0]);
|
||||
mut1.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp());
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)});
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)}).get();
|
||||
auto sst_test = sstables::test(sst);
|
||||
|
||||
const auto filter_path = (env.tempdir().path() / sst_test.filename(component_type::Filter)).native();
|
||||
@@ -269,7 +269,7 @@ SEASTAR_TEST_CASE(test_bloom_filter_reload_after_unlink) {
|
||||
mut.partition().apply_insert(*schema, ss.make_ckey(1), ss.new_timestamp());
|
||||
|
||||
// bloom filter will be reclaimed automatically due to low memory
|
||||
auto sst = make_sstable_containing(env.make_sstable(schema), {mut});
|
||||
auto sst = make_sstable_containing(env.make_sstable(schema), {mut}).get();
|
||||
auto& sst_mgr = env.manager();
|
||||
BOOST_REQUIRE_EQUAL(sst->filter_memory_size(), 0);
|
||||
|
||||
@@ -325,7 +325,7 @@ SEASTAR_TEST_CASE(test_bloom_filter_reclaim_after_unlink) {
|
||||
}
|
||||
|
||||
// create one sst; there is sufficient memory for the bloom filter, so it won't be reclaimed
|
||||
auto sst1 = make_sstable_containing(env.make_sstable(schema), mutations);
|
||||
auto sst1 = make_sstable_containing(env.make_sstable(schema), mutations).get();
|
||||
auto& sst_mgr = env.manager();
|
||||
auto sst1_filename = sst1->get_filename();
|
||||
BOOST_REQUIRE(sst1->filter_memory_size() != 0);
|
||||
@@ -358,7 +358,7 @@ SEASTAR_TEST_CASE(test_bloom_filter_reclaim_after_unlink) {
|
||||
|
||||
// create another sst and unlink it to trigger reload of components.
|
||||
// the reload should not attempt to load sst'1 bloom filter into memory depsite its presence in the _active list.
|
||||
auto sst2 = make_sstable_containing(env.make_sstable(schema), {mutations[0]});
|
||||
auto sst2 = make_sstable_containing(env.make_sstable(schema), {mutations[0]}).get();
|
||||
sst2->unlink().get();
|
||||
sst2.release();
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@ static sstables::shared_sstable generate_sstable(schema_ptr s, std::function<sha
|
||||
muts.push_back(make_insert(k));
|
||||
}
|
||||
}
|
||||
return make_sstable_containing(sst_gen, std::move(muts));
|
||||
return make_sstable_containing(sst_gen, std::move(muts)).get();
|
||||
}
|
||||
|
||||
static sstables::shared_sstable sstable_that_needs_split(schema_ptr s, std::function<shared_sstable()> sst_gen) {
|
||||
|
||||
@@ -176,7 +176,7 @@ SEASTAR_TEST_CASE(incremental_compaction_test) {
|
||||
// Generate 4 sstable runs composed of 4 fragments each after 4 compactions.
|
||||
// All fragments non-overlapping.
|
||||
for (auto i = 0U; i < tokens.size(); i++) {
|
||||
auto sst = make_sstable_containing(sst_gen, { make_insert(tokens[i]) });
|
||||
auto sst = make_sstable_containing(sst_gen, { make_insert(tokens[i]) }).get();
|
||||
sst->set_sstable_level(1);
|
||||
BOOST_REQUIRE(sst->get_sstable_level() == 1);
|
||||
column_family_test(cf).add_sstable(sst).get();
|
||||
@@ -338,7 +338,7 @@ SEASTAR_TEST_CASE(basic_garbage_collection_test) {
|
||||
auto sst = env.make_sstable(s, tmp.path().string(), env.new_generation(), sstables::get_highest_sstable_version(), big);
|
||||
return sst;
|
||||
};
|
||||
auto sst = make_sstable_containing(creator, std::move(mutations));
|
||||
auto sst = make_sstable_containing(creator, std::move(mutations)).get();
|
||||
column_family_test(cf).add_sstable(sst).get();
|
||||
|
||||
const auto& stats = sst->get_stats_metadata();
|
||||
@@ -445,7 +445,7 @@ SEASTAR_TEST_CASE(ics_reshape_test) {
|
||||
std::vector<sstables::shared_sstable> sstables;
|
||||
sstables.reserve(sstable_count);
|
||||
for (unsigned i = 0; i < sstable_count; i++) {
|
||||
auto sst = make_sstable_containing(sst_gen, {make_row(0)});
|
||||
auto sst = make_sstable_containing(sst_gen, {make_row(0)}).get();
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
|
||||
@@ -460,7 +460,7 @@ SEASTAR_TEST_CASE(ics_reshape_test) {
|
||||
std::vector<sstables::shared_sstable> sstables;
|
||||
sstables.reserve(disjoint_sstable_count);
|
||||
for (unsigned i = 0; i < disjoint_sstable_count; i++) {
|
||||
auto sst = make_sstable_containing(sst_gen, {make_row(i)});
|
||||
auto sst = make_sstable_containing(sst_gen, {make_row(i)}).get();
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
|
||||
@@ -474,7 +474,7 @@ SEASTAR_TEST_CASE(ics_reshape_test) {
|
||||
std::vector<sstables::shared_sstable> sstables;
|
||||
sstables.reserve(disjoint_sstable_count);
|
||||
for (unsigned i = 0; i < disjoint_sstable_count; i++) {
|
||||
auto sst = make_sstable_containing(sst_gen, {make_row(i)});
|
||||
auto sst = make_sstable_containing(sst_gen, {make_row(i)}).get();
|
||||
sstables::test(sst).set_run_identifier(sstable_run_id);
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
@@ -488,7 +488,7 @@ SEASTAR_TEST_CASE(ics_reshape_test) {
|
||||
std::vector<sstables::shared_sstable> sstables;
|
||||
sstables.reserve(disjoint_sstable_count);
|
||||
for (unsigned i = 0; i < disjoint_sstable_count; i++) {
|
||||
auto sst = make_sstable_containing(sst_gen, {make_row(0)});
|
||||
auto sst = make_sstable_containing(sst_gen, {make_row(0)}).get();
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
|
||||
@@ -512,7 +512,7 @@ SEASTAR_TEST_CASE(gc_tombstone_with_grace_seconds_test) {
|
||||
mutation mut(schema, tests::generate_partition_key(schema, local_shard_only::yes));
|
||||
auto live_cell = atomic_cell::make_live(*byte_type, 0, to_bytes("a"), gc_clock::time_point(gc_clock::duration(expiration_time)), gc_clock::duration(1));
|
||||
mut.set_clustered_cell(clustering_key::make_empty(), *schema->get_column_definition("value"), std::move(live_cell));
|
||||
auto sst = make_sstable_containing(env.make_sst_factory(schema), {mut});
|
||||
auto sst = make_sstable_containing(env.make_sst_factory(schema), {mut}).get();
|
||||
|
||||
table_for_tests cf = env.make_table_for_tests(schema);
|
||||
auto close_cf = deferred_stop(cf);
|
||||
@@ -575,7 +575,7 @@ SEASTAR_TEST_CASE(gc_sstable_incremental_release_test) {
|
||||
bool expired = (key_idx % 4) == 0;
|
||||
mutations.push_back(make_mutation(key_idx, !expired));
|
||||
}
|
||||
auto sst = make_sstable_containing(env.make_sst_factory(schema), std::move(mutations));
|
||||
auto sst = make_sstable_containing(env.make_sst_factory(schema), std::move(mutations)).get();
|
||||
sstables::test(sst).set_run_identifier(run_id);
|
||||
column_family_test(cf).add_sstable(sst).get();
|
||||
input_sstables.push_back(std::move(sst));
|
||||
@@ -668,7 +668,7 @@ SEASTAR_TEST_CASE(gc_sstable_no_premature_release_with_overlapping_inputs_test)
|
||||
}
|
||||
mutations.push_back(std::move(mut));
|
||||
}
|
||||
auto sst = make_sstable_containing(env.make_sst_factory(schema), std::move(mutations));
|
||||
auto sst = make_sstable_containing(env.make_sst_factory(schema), std::move(mutations)).get();
|
||||
sstables::test(sst).set_run_identifier(run_id);
|
||||
column_family_test(cf).add_sstable(sst).get();
|
||||
input_sstables.push_back(std::move(sst));
|
||||
@@ -685,7 +685,7 @@ SEASTAR_TEST_CASE(gc_sstable_no_premature_release_with_overlapping_inputs_test)
|
||||
mut.set_clustered_cell(ck, cdef, std::move(live_cell));
|
||||
mutations.push_back(std::move(mut));
|
||||
}
|
||||
auto sst = make_sstable_containing(env.make_sst_factory(schema), std::move(mutations));
|
||||
auto sst = make_sstable_containing(env.make_sst_factory(schema), std::move(mutations)).get();
|
||||
sstables::test(sst).set_run_identifier(run_id);
|
||||
column_family_test(cf).add_sstable(sst).get();
|
||||
input_sstables.push_back(std::move(sst));
|
||||
|
||||
@@ -823,4 +823,42 @@ SEASTAR_TEST_CASE(test_prepared_statement_small_cache) {
|
||||
}, small_cache_config);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_loading_cache_insert) {
|
||||
using namespace std::chrono;
|
||||
loader loader;
|
||||
loading_cache_for_test<int, sstring> loading_cache(num_loaders, 1h, testlog);
|
||||
auto stop_cache = seastar::defer([&loading_cache] { loading_cache.stop().get(); });
|
||||
|
||||
// insert() must populate the cache and invoke the loader exactly once.
|
||||
loading_cache.insert(0, loader.get()).get();
|
||||
BOOST_REQUIRE_EQUAL(loader.load_count(), 1);
|
||||
BOOST_REQUIRE_EQUAL(loading_cache.size(), 1);
|
||||
auto vp = loading_cache.find(0);
|
||||
BOOST_REQUIRE(vp != nullptr);
|
||||
BOOST_REQUIRE_EQUAL(*vp, test_string);
|
||||
|
||||
// A second insert() for the same key must not re-invoke the loader.
|
||||
loading_cache.insert(0, loader.get()).get();
|
||||
BOOST_REQUIRE_EQUAL(loader.load_count(), 1);
|
||||
BOOST_REQUIRE_EQUAL(loading_cache.size(), 1);
|
||||
}
|
||||
|
||||
// Regression test for SCYLLADB-1699: insert() on a cache constructed with
|
||||
// expiry == 0 (caching disabled) must be a no-op rather than asserting in
|
||||
// loading_cache::get_ptr().
|
||||
SEASTAR_THREAD_TEST_CASE(test_loading_cache_insert_caching_disabled) {
|
||||
using namespace std::chrono;
|
||||
loader loader;
|
||||
loading_cache_for_test<int, sstring> loading_cache(num_loaders, 0ms, testlog);
|
||||
auto stop_cache = seastar::defer([&loading_cache] { loading_cache.stop().get(); });
|
||||
|
||||
auto f = loading_cache.insert(0, loader.get());
|
||||
loading_cache.insert(0, loader.get()).get();
|
||||
|
||||
// The loader must not have been invoked and the cache must remain empty.
|
||||
BOOST_REQUIRE_EQUAL(loader.load_count(), 0);
|
||||
BOOST_REQUIRE_EQUAL(loading_cache.size(), 0);
|
||||
BOOST_REQUIRE(loading_cache.find(0) == nullptr);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
||||
|
||||
@@ -80,7 +80,7 @@ utils::chunked_vector<mutation> make_ring(schema_ptr s, int n_mutations) {
|
||||
SEASTAR_TEST_CASE(test_memtable_conforms_to_mutation_source) {
|
||||
return seastar::async([] {
|
||||
run_mutation_source_tests([](schema_ptr s, const utils::chunked_vector<mutation>& partitions) {
|
||||
auto mt = make_memtable(s, partitions);
|
||||
auto mt = make_memtable(s, partitions).get();
|
||||
logalloc::shard_tracker().full_compaction();
|
||||
return mt->as_data_source();
|
||||
});
|
||||
@@ -481,8 +481,8 @@ SEASTAR_TEST_CASE(test_fast_forward_to_after_memtable_is_flushed) {
|
||||
tests::reader_concurrency_semaphore_wrapper semaphore;
|
||||
|
||||
utils::chunked_vector<mutation> ring = make_ring(s, 5);
|
||||
auto mt = make_memtable(s, ring);
|
||||
auto mt2 = make_memtable(s, ring);
|
||||
auto mt = make_memtable(s, ring).get();
|
||||
auto mt2 = make_memtable(s, ring).get();
|
||||
|
||||
auto rd = assert_that(mt->make_mutation_reader(s, semaphore.make_permit()));
|
||||
rd.produces(ring[0]);
|
||||
@@ -501,7 +501,7 @@ SEASTAR_TEST_CASE(test_exception_safety_of_partition_range_reads) {
|
||||
tests::reader_concurrency_semaphore_wrapper semaphore;
|
||||
utils::chunked_vector<mutation> ms = gen(2);
|
||||
|
||||
auto mt = make_memtable(s, ms);
|
||||
auto mt = make_memtable(s, ms).get();
|
||||
memory::with_allocation_failures([&] {
|
||||
assert_that(mt->make_mutation_reader(s, semaphore.make_permit(), query::full_partition_range))
|
||||
.produces(ms);
|
||||
@@ -516,7 +516,7 @@ SEASTAR_TEST_CASE(test_exception_safety_of_flush_reads) {
|
||||
tests::reader_concurrency_semaphore_wrapper semaphore;
|
||||
utils::chunked_vector<mutation> ms = gen(2);
|
||||
|
||||
auto mt = make_memtable(s, ms);
|
||||
auto mt = make_memtable(s, ms).get();
|
||||
memory::with_allocation_failures([&] {
|
||||
auto revert = defer([&] {
|
||||
mt->revert_flushed_memory();
|
||||
@@ -534,7 +534,7 @@ SEASTAR_TEST_CASE(test_exception_safety_of_single_partition_reads) {
|
||||
tests::reader_concurrency_semaphore_wrapper semaphore;
|
||||
utils::chunked_vector<mutation> ms = gen(2);
|
||||
|
||||
auto mt = make_memtable(s, ms);
|
||||
auto mt = make_memtable(s, ms).get();
|
||||
memory::with_allocation_failures([&] {
|
||||
assert_that(mt->make_mutation_reader(s, semaphore.make_permit(), dht::partition_range::make_singular(ms[1].decorated_key())))
|
||||
.produces(ms[1]);
|
||||
|
||||
@@ -771,11 +771,11 @@ SEASTAR_TEST_CASE(combined_mutation_reader_test) {
|
||||
};
|
||||
|
||||
std::vector<sstables::shared_sstable> sstable_list = {
|
||||
make_sstable_containing(sst_factory(0), std::move(sstable_level_0_0_mutations)),
|
||||
make_sstable_containing(sst_factory(1), std::move(sstable_level_1_0_mutations)),
|
||||
make_sstable_containing(sst_factory(1), std::move(sstable_level_1_1_mutations)),
|
||||
make_sstable_containing(sst_factory(2), std::move(sstable_level_2_0_mutations)),
|
||||
make_sstable_containing(sst_factory(2), std::move(sstable_level_2_1_mutations)),
|
||||
make_sstable_containing(sst_factory(0), std::move(sstable_level_0_0_mutations)).get(),
|
||||
make_sstable_containing(sst_factory(1), std::move(sstable_level_1_0_mutations)).get(),
|
||||
make_sstable_containing(sst_factory(1), std::move(sstable_level_1_1_mutations)).get(),
|
||||
make_sstable_containing(sst_factory(2), std::move(sstable_level_2_0_mutations)).get(),
|
||||
make_sstable_containing(sst_factory(2), std::move(sstable_level_2_1_mutations)).get(),
|
||||
};
|
||||
|
||||
auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::leveled, {});
|
||||
@@ -1062,7 +1062,7 @@ SEASTAR_TEST_CASE(test_fast_forwarding_combined_reader_is_consistent_with_slicin
|
||||
combined[j++].apply(m);
|
||||
}
|
||||
}
|
||||
mutation_source ds = make_sstable_containing(env.make_sstable(s), muts)->as_mutation_source();
|
||||
mutation_source ds = make_sstable_containing(env.make_sstable(s), muts).get()->as_mutation_source();
|
||||
reader_ranges.push_back(dht::partition_range::make({keys[0]}, {keys[0]}));
|
||||
readers.push_back(ds.make_mutation_reader(s,
|
||||
permit,
|
||||
@@ -1133,8 +1133,8 @@ SEASTAR_TEST_CASE(test_combined_reader_slicing_with_overlapping_range_tombstones
|
||||
|
||||
std::vector<mutation_reader> readers;
|
||||
|
||||
mutation_source ds1 = make_sstable_containing(env.make_sstable(s), {m1})->as_mutation_source();
|
||||
mutation_source ds2 = make_sstable_containing(env.make_sstable(s), {m2})->as_mutation_source();
|
||||
mutation_source ds1 = make_sstable_containing(env.make_sstable(s), {m1}).get()->as_mutation_source();
|
||||
mutation_source ds2 = make_sstable_containing(env.make_sstable(s), {m2}).get()->as_mutation_source();
|
||||
|
||||
// upper bound ends before the row in m2, so that the raw is fetched after next fast forward.
|
||||
auto range = ss.make_ckey_range(0, 3);
|
||||
@@ -2580,7 +2580,7 @@ SEASTAR_THREAD_TEST_CASE(test_queue_reader) {
|
||||
SEASTAR_THREAD_TEST_CASE(test_compacting_reader_as_mutation_source) {
|
||||
auto make_populate = [] (bool single_fragment_buffer) {
|
||||
return [single_fragment_buffer] (schema_ptr s, const utils::chunked_vector<mutation>& mutations, gc_clock::time_point query_time) mutable {
|
||||
auto mt = make_memtable(s, mutations);
|
||||
auto mt = make_memtable(s, mutations).get();
|
||||
return mutation_source([=] (
|
||||
schema_ptr s,
|
||||
reader_permit permit,
|
||||
@@ -2705,7 +2705,7 @@ SEASTAR_THREAD_TEST_CASE(test_compacting_reader_is_consistent_with_compaction) {
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_auto_paused_evictable_reader_is_mutation_source) {
|
||||
auto make_populate = [] (schema_ptr s, const utils::chunked_vector<mutation>& mutations, gc_clock::time_point query_time) {
|
||||
auto mt = make_memtable(s, mutations);
|
||||
auto mt = make_memtable(s, mutations).get();
|
||||
return mutation_source([=] (
|
||||
schema_ptr s,
|
||||
reader_permit permit,
|
||||
@@ -2782,7 +2782,7 @@ SEASTAR_THREAD_TEST_CASE(test_manual_paused_evictable_reader_is_mutation_source)
|
||||
};
|
||||
|
||||
auto make_populate = [] (schema_ptr s, const utils::chunked_vector<mutation>& mutations, gc_clock::time_point query_time) {
|
||||
auto mt = make_memtable(s, mutations);
|
||||
auto mt = make_memtable(s, mutations).get();
|
||||
return mutation_source([=] (
|
||||
schema_ptr s,
|
||||
reader_permit permit,
|
||||
@@ -3947,7 +3947,7 @@ static future<> do_test_clustering_order_merger_sstable_set(bool reversed) {
|
||||
for (auto& mb: scenario.readers_data) {
|
||||
sstables::shared_sstable sst;
|
||||
if (mb.m) {
|
||||
sst = make_sstable_containing(sst_factory, {*mb.m});
|
||||
sst = make_sstable_containing(sst_factory, {*mb.m}).get();
|
||||
sst_set.insert(sst);
|
||||
} else {
|
||||
// We want to have an sstable that won't return any fragments when we query it
|
||||
@@ -3956,7 +3956,7 @@ static future<> do_test_clustering_order_merger_sstable_set(bool reversed) {
|
||||
auto pk = pkeys[1];
|
||||
SCYLLA_ASSERT(!pk.equal(*g._s, g._pk));
|
||||
|
||||
sst = make_sstable_containing(sst_factory, {mutation(table_schema, pk)});
|
||||
sst = make_sstable_containing(sst_factory, {mutation(table_schema, pk)}).get();
|
||||
sst_set.insert(sst);
|
||||
}
|
||||
|
||||
|
||||
@@ -526,8 +526,8 @@ SEASTAR_TEST_CASE(reader_concurrency_semaphore_timeout) {
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(reader_concurrency_semaphore_abort) {
|
||||
const auto preemptive_abort_factor = 0.5f;
|
||||
reader_concurrency_semaphore semaphore(reader_concurrency_semaphore::for_tests{}, get_name(), 1, replica::new_reader_base_cost,
|
||||
const auto preemptive_abort_factor = 1.0f;
|
||||
reader_concurrency_semaphore semaphore(reader_concurrency_semaphore::for_tests{}, get_name(), 1, 2 * replica::new_reader_base_cost,
|
||||
100, utils::updateable_value(std::numeric_limits<uint32_t>::max()), utils::updateable_value(std::numeric_limits<uint32_t>::max()),
|
||||
utils::updateable_value<uint32_t>(1), utils::updateable_value<float>(preemptive_abort_factor));
|
||||
auto stop_sem = deferred_stop(semaphore);
|
||||
@@ -535,26 +535,26 @@ SEASTAR_THREAD_TEST_CASE(reader_concurrency_semaphore_abort) {
|
||||
{
|
||||
BOOST_REQUIRE(semaphore.get_stats().total_reads_shed_due_to_overload == 0);
|
||||
|
||||
auto timeout = db::timeout_clock::now() + 500ms;
|
||||
auto timeout = db::timeout_clock::now() + 60min;
|
||||
|
||||
reader_permit_opt permit1 = semaphore.obtain_permit(nullptr, "permit1", replica::new_reader_base_cost, timeout, {}).get();
|
||||
|
||||
auto permit2_fut = semaphore.obtain_permit(nullptr, "permit2", replica::new_reader_base_cost, timeout, {});
|
||||
BOOST_REQUIRE_EQUAL(semaphore.get_stats().waiters, 1);
|
||||
|
||||
// The permits are rejected when the remaining time is less than half of its timeout when arrived to the semaphore.
|
||||
// Hence, sleep 300ms to reject the permits in the waitlist during admission.
|
||||
seastar::sleep(300ms).get();
|
||||
BOOST_REQUIRE_EQUAL(semaphore.get_stats().reads_enqueued_for_admission, 1);
|
||||
BOOST_REQUIRE_EQUAL(semaphore.get_stats().reads_enqueued_for_memory, 0);
|
||||
BOOST_REQUIRE_EQUAL(semaphore.get_stats().reads_queued_because_count_resources, 1);
|
||||
BOOST_REQUIRE_EQUAL(semaphore.get_stats().reads_queued_because_memory_resources, 0);
|
||||
BOOST_REQUIRE_EQUAL(semaphore.available_resources().memory, replica::new_reader_base_cost);
|
||||
|
||||
permit1 = {};
|
||||
const auto futures_failed = eventually_true([&] { return permit2_fut.failed(); });
|
||||
BOOST_CHECK(futures_failed);
|
||||
BOOST_CHECK_THROW(std::rethrow_exception(permit2_fut.get_exception()), semaphore_aborted);
|
||||
BOOST_REQUIRE(eventually_true([&] { return permit2_fut.available(); }));
|
||||
BOOST_REQUIRE_THROW(permit2_fut.get(), semaphore_aborted);
|
||||
BOOST_CHECK(semaphore.get_stats().total_reads_shed_due_to_overload > 0);
|
||||
}
|
||||
|
||||
// All units should have been deposited back.
|
||||
REQUIRE_EVENTUALLY_EQUAL<ssize_t>([&] { return semaphore.available_resources().memory; }, replica::new_reader_base_cost);
|
||||
REQUIRE_EVENTUALLY_EQUAL<ssize_t>([&] { return semaphore.available_resources().memory; }, 2 * replica::new_reader_base_cost);
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(reader_concurrency_semaphore_max_queue_length) {
|
||||
|
||||
@@ -93,7 +93,7 @@ repair_rows_on_wire make_random_repair_rows_on_wire(random_mutation_generator& g
|
||||
|
||||
for (mutation& mut : muts) {
|
||||
partition_key pk = mut.key();
|
||||
auto m2 = make_memtable(s, {mut});
|
||||
auto m2 = make_memtable(s, {mut}).get();
|
||||
m->apply(mut);
|
||||
auto reader = mutation_fragment_v1_stream(m2->make_mutation_reader(s, permit));
|
||||
auto close_reader = deferred_close(reader);
|
||||
|
||||
@@ -385,7 +385,7 @@ SEASTAR_TEST_CASE(test_cache_delegates_to_underlying_only_once_multiple_mutation
|
||||
std::move(all_partitions.begin() + 1, all_partitions.end() - 1, std::back_inserter(partitions));
|
||||
|
||||
cache_tracker tracker;
|
||||
auto mt = make_memtable(s, partitions);
|
||||
auto mt = make_memtable(s, partitions).get();
|
||||
|
||||
auto make_cache = [&tracker, &mt](schema_ptr s, int& secondary_calls_count) -> lw_shared_ptr<row_cache> {
|
||||
auto secondary = mutation_source([&mt, &secondary_calls_count] (schema_ptr s, reader_permit permit, const dht::partition_range& range,
|
||||
@@ -557,7 +557,7 @@ SEASTAR_TEST_CASE(test_query_of_incomplete_range_goes_to_underlying) {
|
||||
|
||||
utils::chunked_vector<mutation> mutations = make_ring(s, 3);
|
||||
|
||||
auto mt = make_memtable(s, mutations);
|
||||
auto mt = make_memtable(s, mutations).get();
|
||||
|
||||
cache_tracker tracker;
|
||||
row_cache cache(s, snapshot_source_from_snapshot(mt->as_data_source()), tracker);
|
||||
@@ -603,7 +603,7 @@ SEASTAR_TEST_CASE(test_single_key_queries_after_population_in_reverse_order) {
|
||||
tests::reader_concurrency_semaphore_wrapper semaphore;
|
||||
|
||||
utils::chunked_vector<mutation> mutations = make_ring(s, 3);
|
||||
auto mt = make_memtable(s, mutations);
|
||||
auto mt = make_memtable(s, mutations).get();
|
||||
|
||||
cache_tracker tracker;
|
||||
row_cache cache(s, snapshot_source_from_snapshot(mt->as_data_source()), tracker);
|
||||
@@ -639,7 +639,7 @@ SEASTAR_TEST_CASE(test_partition_range_population_with_concurrent_memtable_flush
|
||||
tests::reader_concurrency_semaphore_wrapper semaphore;
|
||||
|
||||
utils::chunked_vector<mutation> mutations = make_ring(s, 3);
|
||||
auto mt = make_memtable(s, mutations);
|
||||
auto mt = make_memtable(s, mutations).get();
|
||||
|
||||
cache_tracker tracker;
|
||||
row_cache cache(s, snapshot_source_from_snapshot(mt->as_data_source()), tracker);
|
||||
@@ -694,7 +694,7 @@ SEASTAR_TEST_CASE(test_row_cache_conforms_to_mutation_source) {
|
||||
cache_tracker tracker;
|
||||
|
||||
run_mutation_source_tests([&tracker](schema_ptr s, const utils::chunked_vector<mutation>& mutations) -> mutation_source {
|
||||
auto mt = make_memtable(s, mutations);
|
||||
auto mt = make_memtable(s, mutations).get();
|
||||
auto cache = make_lw_shared<row_cache>(s, snapshot_source_from_snapshot(mt->as_data_source()), tracker);
|
||||
return mutation_source([cache] (schema_ptr s,
|
||||
reader_permit permit,
|
||||
@@ -1275,7 +1275,7 @@ SEASTAR_TEST_CASE(test_continuity_flag_and_invalidate_race) {
|
||||
tests::reader_concurrency_semaphore_wrapper semaphore;
|
||||
|
||||
auto ring = make_ring(s, 4);
|
||||
auto mt = make_memtable(s, ring);
|
||||
auto mt = make_memtable(s, ring).get();
|
||||
|
||||
cache_tracker tracker;
|
||||
row_cache cache(s, snapshot_source_from_snapshot(mt->as_data_source()), tracker);
|
||||
@@ -1328,7 +1328,7 @@ SEASTAR_TEST_CASE(test_cache_invalidation_with_filter) {
|
||||
tests::reader_concurrency_semaphore_wrapper semaphore;
|
||||
|
||||
auto ring = make_ring(s, 5);
|
||||
auto mt = make_memtable(s, ring);
|
||||
auto mt = make_memtable(s, ring).get();
|
||||
|
||||
cache_tracker tracker;
|
||||
|
||||
@@ -1469,13 +1469,13 @@ SEASTAR_TEST_CASE(test_cache_population_and_update_race) {
|
||||
cache_tracker tracker;
|
||||
|
||||
auto ring = make_ring(s, 3);
|
||||
auto mt1 = make_memtable(s, ring);
|
||||
auto mt1 = make_memtable(s, ring).get();
|
||||
memtables.apply(*mt1);
|
||||
|
||||
row_cache cache(s, cache_source, tracker);
|
||||
|
||||
auto ring2 = updated_ring(ring);
|
||||
auto mt2 = make_memtable(s, ring2);
|
||||
auto mt2 = make_memtable(s, ring2).get();
|
||||
|
||||
auto f = thr.block();
|
||||
|
||||
@@ -1602,13 +1602,13 @@ SEASTAR_TEST_CASE(test_cache_population_and_clear_race) {
|
||||
cache_tracker tracker;
|
||||
|
||||
auto ring = make_ring(s, 3);
|
||||
auto mt1 = make_memtable(s, ring);
|
||||
auto mt1 = make_memtable(s, ring).get();
|
||||
memtables.apply(*mt1);
|
||||
|
||||
row_cache cache(s, std::move(cache_source), tracker);
|
||||
|
||||
auto ring2 = updated_ring(ring);
|
||||
auto mt2 = make_memtable(s, ring2);
|
||||
auto mt2 = make_memtable(s, ring2).get();
|
||||
|
||||
auto f = thr.block();
|
||||
|
||||
@@ -1768,7 +1768,7 @@ SEASTAR_TEST_CASE(test_slicing_mutation_reader) {
|
||||
to_bytes("v"), data_value(i), api::new_timestamp());
|
||||
}
|
||||
|
||||
auto mt = make_memtable(s, {m});
|
||||
auto mt = make_memtable(s, {m}).get();
|
||||
cache_tracker tracker;
|
||||
row_cache cache(s, snapshot_source_from_snapshot(mt->as_data_source()), tracker);
|
||||
|
||||
@@ -2667,7 +2667,7 @@ SEASTAR_TEST_CASE(test_exception_safety_of_update_from_memtable) {
|
||||
.produces_end_of_stream();
|
||||
});
|
||||
|
||||
auto mt = make_memtable(cache.schema(), muts2);
|
||||
auto mt = make_memtable(cache.schema(), muts2).get();
|
||||
|
||||
// Make snapshot on pkeys[2]
|
||||
auto pr = dht::partition_range::make_singular(pkeys[2]);
|
||||
@@ -2675,9 +2675,9 @@ SEASTAR_TEST_CASE(test_exception_safety_of_update_from_memtable) {
|
||||
snap->set_max_buffer_size(1);
|
||||
snap->fill_buffer().get();
|
||||
|
||||
auto mt2 = make_memtable(cache.schema(), muts2).get();
|
||||
cache.update(row_cache::external_updater([&] {
|
||||
memory::scoped_critical_alloc_section dfg;
|
||||
auto mt2 = make_memtable(cache.schema(), muts2);
|
||||
underlying.apply(std::move(mt2));
|
||||
}), *mt).get();
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ future <> test_schema_changes_int(sstable_version_types sstable_vtype) {
|
||||
shared_sstable created_with_base_schema;
|
||||
shared_sstable created_with_changed_schema;
|
||||
if (it == cache.end()) {
|
||||
created_with_base_schema = make_sstable_containing(env.make_sstable(base), base_mutations);
|
||||
created_with_base_schema = make_sstable_containing(env.make_sstable(base), base_mutations).get();
|
||||
cache.emplace(base, created_with_base_schema);
|
||||
} else {
|
||||
created_with_base_schema = it->second;
|
||||
|
||||
@@ -3220,7 +3220,7 @@ static sstables::shared_sstable write_and_compare_sstables(test_env& env, schema
|
||||
}
|
||||
|
||||
static sstables::shared_sstable write_sstables(test_env& env, schema_ptr s, lw_shared_ptr<replica::memtable> mt, sstable_version_types version) {
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), mt);
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), mt).get();
|
||||
BOOST_TEST_MESSAGE(format("write_sstable from memtable: {}", sst->get_filename()));
|
||||
return sst;
|
||||
}
|
||||
@@ -3253,7 +3253,7 @@ constexpr std::array<sstable_version_types, 4> test_sstable_versions = {
|
||||
|
||||
static void write_mut_and_compare_sstables_version(test_env& env, schema_ptr s, mutation& mut, const sstring& table_name,
|
||||
sstable_version_types version) {
|
||||
lw_shared_ptr<replica::memtable> mt = make_memtable(s, {mut});
|
||||
lw_shared_ptr<replica::memtable> mt = make_memtable(s, {mut}).get();
|
||||
write_and_compare_sstables(env, s, mt, table_name, version);
|
||||
}
|
||||
|
||||
@@ -3265,8 +3265,8 @@ static void write_mut_and_compare_sstables(test_env& env, schema_ptr s, mutation
|
||||
|
||||
static void write_muts_and_compare_sstables_version(test_env& env, schema_ptr s, mutation& mut1, mutation& mut2, const sstring& table_name,
|
||||
sstable_version_types version) {
|
||||
lw_shared_ptr<replica::memtable> mt1 = make_memtable(s, {mut1});
|
||||
lw_shared_ptr<replica::memtable> mt2 = make_memtable(s, {mut2});
|
||||
lw_shared_ptr<replica::memtable> mt1 = make_memtable(s, {mut1}).get();
|
||||
lw_shared_ptr<replica::memtable> mt2 = make_memtable(s, {mut2}).get();
|
||||
write_and_compare_sstables(env, s, mt1, mt2, table_name, version);
|
||||
}
|
||||
|
||||
@@ -3326,7 +3326,7 @@ using validate_stats_metadata = bool_class<validate_stats_metadata_tag>;
|
||||
|
||||
static void write_mut_and_validate_version(test_env& env, schema_ptr s, const sstring& table_name, mutation& mut,
|
||||
sstable_version_types version, validate_stats_metadata validate_flag) {
|
||||
lw_shared_ptr<replica::memtable> mt = make_memtable(s, {mut});
|
||||
lw_shared_ptr<replica::memtable> mt = make_memtable(s, {mut}).get();
|
||||
auto sst = write_and_compare_sstables(env, s, mt, table_name, version);
|
||||
auto written_sst = validate_read(env, sst, {mut});
|
||||
if (validate_flag) {
|
||||
@@ -3348,7 +3348,7 @@ static void write_mut_and_validate_version(test_env& env, schema_ptr s, const ss
|
||||
// but cannot now because flat reader version transforms rearrange
|
||||
// range tombstones. Revisit once the reader v2 migration is
|
||||
// complete and those version transforms are gone
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), {mut});
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), {mut}).get();
|
||||
auto written_sst = validate_read(env, sst, {mut});
|
||||
check_min_max_column_names(written_sst, std::move(min_components), std::move(max_components));
|
||||
}
|
||||
@@ -3362,7 +3362,7 @@ static void write_mut_and_validate(test_env& env, schema_ptr s, const sstring& t
|
||||
|
||||
static void write_mut_and_validate_version(test_env& env, schema_ptr s, const sstring& table_name, utils::chunked_vector<mutation> muts,
|
||||
sstable_version_types version, validate_stats_metadata validate_flag) {
|
||||
lw_shared_ptr<replica::memtable> mt = make_memtable(s, muts);
|
||||
lw_shared_ptr<replica::memtable> mt = make_memtable(s, muts).get();
|
||||
auto sst = write_and_compare_sstables(env, s, mt, table_name, version);
|
||||
auto written_sst = validate_read(env, sst, muts);
|
||||
if (validate_flag) {
|
||||
@@ -3701,7 +3701,7 @@ static future<> test_write_many_partitions(sstring table_name, tombstone partiti
|
||||
|
||||
bool compressed = cp.get_algorithm() != compression_parameters::algorithm::none;
|
||||
for (auto version : test_sstable_versions) {
|
||||
lw_shared_ptr<replica::memtable> mt = make_memtable(s, muts);
|
||||
lw_shared_ptr<replica::memtable> mt = make_memtable(s, muts).get();
|
||||
auto sst = compressed ? write_sstables(env, s, mt, version) : write_and_compare_sstables(env, s, mt, table_name, version);
|
||||
std::ranges::sort(muts, mutation_decorated_key_less_comparator());
|
||||
validate_read(env, sst, muts);
|
||||
@@ -5016,7 +5016,7 @@ SEASTAR_TEST_CASE(test_write_empty_static_row) {
|
||||
mut2.set_cell(ckey, "rc", data_value{3}, ts);
|
||||
|
||||
for (auto version : test_sstable_versions) {
|
||||
lw_shared_ptr<replica::memtable> mt = make_memtable(s, {mut1, mut2});
|
||||
lw_shared_ptr<replica::memtable> mt = make_memtable(s, {mut1, mut2}).get();
|
||||
auto sst = write_and_compare_sstables(env, s, mt, table_name, version);
|
||||
validate_read(env, sst, {mut2, mut1}); // Mutations are re-ordered according to decorated_key order
|
||||
}
|
||||
@@ -5069,7 +5069,7 @@ SEASTAR_TEST_CASE(test_sstable_reader_on_unknown_column) {
|
||||
partition.set_cell(ckey, "val2", data_value{200 + i}, write_timestamp);
|
||||
};
|
||||
for (auto version : test_sstable_versions) {
|
||||
auto mt = make_memtable(write_schema, {partition});
|
||||
auto mt = make_memtable(write_schema, {partition}).get();
|
||||
for (auto index_block_size : {1, 128, 64*1024}) {
|
||||
auto _ = env.tempdir().make_sweeper();
|
||||
sstable_writer_config cfg = env.manager().configure_writer();
|
||||
@@ -5190,7 +5190,7 @@ SEASTAR_THREAD_TEST_CASE(test_sstable_write_large_row) {
|
||||
auto ck2 = s.make_ckey("cv2");
|
||||
s.add_row(partition, ck2, "foo bar");
|
||||
for (auto version : test_sstable_versions) {
|
||||
auto mt = make_memtable(s.schema(), {partition});
|
||||
auto mt = make_memtable(s.schema(), {partition}).get();
|
||||
test_sstable_write_large_row_f(s.schema(), semaphore.make_permit(), *mt, pk, {nullptr, &ck1, &ck2}, 21, version);
|
||||
test_sstable_write_large_row_f(s.schema(), semaphore.make_permit(), *mt, pk, {nullptr, &ck2}, 22, version);
|
||||
}
|
||||
@@ -5271,7 +5271,7 @@ SEASTAR_THREAD_TEST_CASE(test_sstable_write_large_cell) {
|
||||
auto ck2 = s.make_ckey("cv2");
|
||||
s.add_row(partition, ck2, "foo bar");
|
||||
for (auto version : test_sstable_versions) {
|
||||
auto mt = make_memtable(s.schema(), {partition});
|
||||
auto mt = make_memtable(s.schema(), {partition}).get();
|
||||
test_sstable_write_large_cell_f(s.schema(), semaphore.make_permit(), *mt, pk, {nullptr, &ck1, &ck2}, 13, version);
|
||||
test_sstable_write_large_cell_f(s.schema(), semaphore.make_permit(), *mt, pk, {nullptr, &ck2}, 14, version);
|
||||
}
|
||||
@@ -5291,7 +5291,7 @@ static void test_sstable_log_too_many_rows_f(int rows, int range_tombstones, uin
|
||||
}
|
||||
|
||||
schema_ptr sc = s.schema();
|
||||
auto mt = make_memtable(sc, {p});
|
||||
auto mt = make_memtable(sc, {p}).get();
|
||||
|
||||
// rows_count_threshold = threshold; all other thresholds at MAX.
|
||||
large_data_test_handler handler(std::numeric_limits<uint64_t>::max(),
|
||||
@@ -5457,7 +5457,7 @@ static void test_sstable_too_many_collection_elements_f(int elements, uint64_t t
|
||||
}
|
||||
s.add_row_with_collection(p, s.make_ckey("ck1"), kv_map);
|
||||
schema_ptr sc = s.schema();
|
||||
auto mt = make_memtable(sc, {p});
|
||||
auto mt = make_memtable(sc, {p}).get();
|
||||
|
||||
BOOST_TEST_MESSAGE(format("elements={} threshold={} expected={}", elements, threshold, expected));
|
||||
// collection_elements_threshold = threshold; all other thresholds at MAX.
|
||||
@@ -5530,7 +5530,7 @@ SEASTAR_THREAD_TEST_CASE(test_large_data_records_round_trip) {
|
||||
auto ck = ss.make_ckey("ck1");
|
||||
ss.add_row(m, ck, "a_value_that_is_larger_than_one_byte");
|
||||
|
||||
auto mt = make_memtable(s, {m});
|
||||
auto mt = make_memtable(s, {m}).get();
|
||||
auto sst = env.make_sstable(s, version);
|
||||
sst->write_components(mt->make_mutation_reader(s, env.make_reader_permit()),
|
||||
1, s, env.manager().configure_writer("test"), encoding_stats{}).get();
|
||||
@@ -5646,7 +5646,7 @@ SEASTAR_THREAD_TEST_CASE(test_large_data_records_top_n_bounded) {
|
||||
muts.push_back(std::move(m));
|
||||
}
|
||||
|
||||
auto mt = make_memtable(s, muts);
|
||||
auto mt = make_memtable(s, muts).get();
|
||||
auto sst = env.make_sstable(s, version);
|
||||
sst->write_components(mt->make_mutation_reader(s, env.make_reader_permit()),
|
||||
6, s, env.manager().configure_writer("test"), encoding_stats{}).get();
|
||||
@@ -5699,7 +5699,7 @@ SEASTAR_THREAD_TEST_CASE(test_large_data_records_none_when_below_threshold) {
|
||||
mutation m(s, pk);
|
||||
ss.add_row(m, ss.make_ckey("ck1"), "small_value");
|
||||
|
||||
auto mt = make_memtable(s, {m});
|
||||
auto mt = make_memtable(s, {m}).get();
|
||||
auto sst = env.make_sstable(s, version);
|
||||
sst->write_components(mt->make_mutation_reader(s, env.make_reader_permit()),
|
||||
1, s, env.manager().configure_writer("test"), encoding_stats{}).get();
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <seastar/core/future-util.hh>
|
||||
#include <seastar/core/align.hh>
|
||||
#include <seastar/core/aligned_buffer.hh>
|
||||
#include <seastar/core/loop.hh>
|
||||
#include <seastar/util/closeable.hh>
|
||||
#include <seastar/util/short_streams.hh>
|
||||
#include <seastar/core/coroutine.hh>
|
||||
@@ -195,7 +196,7 @@ void compaction_manager_basic(test_env& env) {
|
||||
mutation m(s, key);
|
||||
m.set_clustered_cell(c_key, r1_col, make_atomic_cell(int32_type, int32_type->decompose(1)));
|
||||
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
column_family_test(cf).add_sstable(sst).get();
|
||||
}
|
||||
|
||||
@@ -403,7 +404,7 @@ static future<compact_sstables_result> compact_sstables(test_env& env, std::vect
|
||||
mutation m(s, key);
|
||||
m.set_clustered_cell(c_key, r1_col, make_atomic_cell(utf8_type, bytes(min_sstable_size, 'a')));
|
||||
|
||||
sst = make_sstable_containing(sst, {std::move(m)});
|
||||
sst = make_sstable_containing(sst, {std::move(m)}).get();
|
||||
sstables.push_back(sst);
|
||||
}
|
||||
}
|
||||
@@ -610,14 +611,14 @@ static void compact_corrupted_by_compression_mode(const std::string& tname,
|
||||
testlog.info("Compacting {}compressed SSTable with invalid checksums", compress ? "" : "un");
|
||||
|
||||
const auto muts = tests::generate_random_mutations(random_schema, 2).get();
|
||||
auto sst = make_sstable_containing(env.make_sstable(schema), muts);
|
||||
auto sst = make_sstable_containing(env.make_sstable(schema), muts).get();
|
||||
corrupt_sstable(sst);
|
||||
|
||||
test_failing_compact(schema, {sst}, error_msg, "failed checksum");
|
||||
|
||||
testlog.info("Compacting {}compressed SSTable with invalid digest", compress ? "" : "un");
|
||||
|
||||
sst = make_sstable_containing(env.make_sstable(schema), muts);
|
||||
sst = make_sstable_containing(env.make_sstable(schema), muts).get();
|
||||
{
|
||||
auto f = sstables::test(sst).open_file(component_type::Digest, {}, {}).get();
|
||||
auto stream = make_file_input_stream(f);
|
||||
@@ -1430,8 +1431,8 @@ future<> tombstone_purge(test_env& env) {
|
||||
auto mut3 = make_delete(alpha);
|
||||
|
||||
std::vector<shared_sstable> sstables = {
|
||||
make_sstable_containing(sst_gen, {mut1, mut2}),
|
||||
make_sstable_containing(sst_gen, {mut3})
|
||||
make_sstable_containing(sst_gen, {mut1, mut2}).get(),
|
||||
make_sstable_containing(sst_gen, {mut3}).get()
|
||||
};
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(ttl));
|
||||
@@ -1449,8 +1450,8 @@ future<> tombstone_purge(test_env& env) {
|
||||
auto mut2 = make_insert(alpha);
|
||||
auto mut3 = make_delete(alpha);
|
||||
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3});
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1}).get();
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3}).get();
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(ttl));
|
||||
|
||||
@@ -1468,8 +1469,8 @@ future<> tombstone_purge(test_env& env) {
|
||||
auto mut3 = make_insert(beta);
|
||||
auto mut4 = make_insert(alpha);
|
||||
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1, mut2, mut3});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut4});
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1, mut2, mut3}).get();
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut4}).get();
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(ttl));
|
||||
|
||||
@@ -1487,8 +1488,8 @@ future<> tombstone_purge(test_env& env) {
|
||||
auto mut3 = make_insert(beta);
|
||||
auto mut4 = make_insert(beta);
|
||||
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1, mut2, mut3});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut4});
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1, mut2, mut3}).get();
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut4}).get();
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(ttl));
|
||||
|
||||
@@ -1505,8 +1506,8 @@ future<> tombstone_purge(test_env& env) {
|
||||
auto mut1 = make_insert(alpha);
|
||||
auto mut2 = make_expiring(alpha, ttl);
|
||||
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2});
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1}).get();
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2}).get();
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(ttl));
|
||||
|
||||
@@ -1521,8 +1522,8 @@ future<> tombstone_purge(test_env& env) {
|
||||
auto mut1 = make_insert(alpha);
|
||||
auto mut2 = make_expiring(beta, ttl);
|
||||
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2});
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1}).get();
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2}).get();
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(ttl));
|
||||
|
||||
@@ -1534,8 +1535,8 @@ future<> tombstone_purge(test_env& env) {
|
||||
auto mut2 = make_expiring(alpha, ttl);
|
||||
auto mut3 = make_insert(beta);
|
||||
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3});
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1}).get();
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3}).get();
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(ttl));
|
||||
|
||||
@@ -1569,7 +1570,7 @@ future<> tombstone_purge(test_env& env) {
|
||||
auto sst1 = make_sstable_containing(sst_gen,
|
||||
{make_insert(alpha),
|
||||
make_delete(alpha, deletion_time)},
|
||||
validate::no);
|
||||
validate::no).get();
|
||||
auto result = compact({sst1}, {sst1});
|
||||
BOOST_CHECK_EQUAL(1, sstables_stats::get_shard_stats().capped_tombstone_deletion_time);
|
||||
}
|
||||
@@ -1579,8 +1580,8 @@ future<> tombstone_purge(test_env& env) {
|
||||
auto mut2 = make_delete(alpha);
|
||||
auto mut3 = make_insert(beta);
|
||||
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3});
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1}).get();
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3}).get();
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(1));
|
||||
|
||||
@@ -1597,8 +1598,8 @@ future<> tombstone_purge(test_env& env) {
|
||||
auto mut2 = make_delete(alpha);
|
||||
auto mut3 = make_insert(beta);
|
||||
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3});
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1}).get();
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3}).get();
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(1));
|
||||
|
||||
@@ -1613,8 +1614,8 @@ future<> tombstone_purge(test_env& env) {
|
||||
auto mut1 = make_insert(alpha);
|
||||
auto mut2 = make_expiring(alpha, ttl);
|
||||
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2});
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1}).get();
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2}).get();
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(ttl));
|
||||
|
||||
@@ -1627,8 +1628,8 @@ future<> tombstone_purge(test_env& env) {
|
||||
auto mut1 = make_delete(alpha);
|
||||
auto mut2 = make_expiring(alpha, ttl);
|
||||
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2});
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1}).get();
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2}).get();
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(ttl));
|
||||
|
||||
@@ -1731,9 +1732,9 @@ future<> mv_tombstone_purge(test_env& env) {
|
||||
auto mut4 = make_delete_row(alpha, 2, gc_clock::now(), api::timestamp_type(2));
|
||||
auto mut5 = make_insert(alpha, 3, 1, api::timestamp_type(1), api::timestamp_type(3));
|
||||
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3});
|
||||
auto sst3 = make_sstable_containing(sst_gen, {mut4, mut5});
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1}).get();
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3}).get();
|
||||
auto sst3 = make_sstable_containing(sst_gen, {mut4, mut5}).get();
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(1));
|
||||
|
||||
@@ -1774,7 +1775,7 @@ future<> sstable_rewrite(test_env& env) {
|
||||
mutation mut(s, key_for_this_shard[0]);
|
||||
mut.set_clustered_cell(c_key, r1_col, make_atomic_cell(utf8_type, bytes("a")));
|
||||
|
||||
auto sstp = make_sstable_containing(sst_gen, {std::move(mut)});
|
||||
auto sstp = make_sstable_containing(sst_gen, {std::move(mut)}).get();
|
||||
auto key = key_for_this_shard[0];
|
||||
std::vector<sstables::shared_sstable> new_tables;
|
||||
auto creator = [&] {
|
||||
@@ -1844,7 +1845,7 @@ future<> sstable_max_local_deletion_time_2(test_env& env) {
|
||||
add_row(m, to_bytes("deletecolumn" + to_sstring(i)), 100);
|
||||
}
|
||||
add_row(m, to_bytes("todelete"), 1000);
|
||||
auto sst1 = make_sstable_containing(sst_gen, mt);
|
||||
auto sst1 = make_sstable_containing(sst_gen, mt).get();
|
||||
BOOST_REQUIRE(last_expiry == sst1->get_stats_metadata().max_local_deletion_time);
|
||||
|
||||
mt = make_lw_shared<replica::memtable>(s);
|
||||
@@ -1852,7 +1853,7 @@ future<> sstable_max_local_deletion_time_2(test_env& env) {
|
||||
tombstone tomb(api::new_timestamp(), now);
|
||||
m.partition().apply_delete(*s, clustering_key::from_exploded(*s, {to_bytes("todelete")}), tomb);
|
||||
mt->apply(std::move(m));
|
||||
auto sst2 = make_sstable_containing(sst_gen, mt);
|
||||
auto sst2 = make_sstable_containing(sst_gen, mt).get();
|
||||
BOOST_REQUIRE(now.time_since_epoch().count() == sst2->get_stats_metadata().max_local_deletion_time);
|
||||
|
||||
auto creator = sst_gen;
|
||||
@@ -1955,7 +1956,7 @@ void compaction_with_fully_expired_table_fn(test_env& env) {
|
||||
mutation m(s, key);
|
||||
tombstone tomb(api::new_timestamp(), gc_clock::now() - std::chrono::seconds(3600));
|
||||
m.partition().apply_delete(*s, c_key, tomb);
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
|
||||
auto cf = env.make_table_for_tests(s);
|
||||
auto close_cf = deferred_stop(cf);
|
||||
@@ -2084,14 +2085,14 @@ void time_window_strategy_correctness_fn(test_env& env) {
|
||||
for (api::timestamp_type t = 0; t < 3; t++) {
|
||||
auto key = partition_key::from_exploded(*s, {to_bytes("key" + to_sstring(t))});
|
||||
auto mut = make_insert(std::move(key), t);
|
||||
sstables.push_back(make_sstable_containing(env.make_sstable(s), {std::move(mut)}));
|
||||
sstables.push_back(make_sstable_containing(env.make_sstable(s), {std::move(mut)}).get());
|
||||
}
|
||||
// Decrement the timestamp to simulate a timestamp in the past hour
|
||||
for (api::timestamp_type t = 3; t < 5; t++) {
|
||||
// And add progressively more cells into each sstable
|
||||
auto key = partition_key::from_exploded(*s, {to_bytes("key" + to_sstring(t))});
|
||||
auto mut = make_insert(std::move(key), t);
|
||||
sstables.push_back(make_sstable_containing(env.make_sstable(s), {std::move(mut)}));
|
||||
sstables.push_back(make_sstable_containing(env.make_sstable(s), {std::move(mut)}).get());
|
||||
}
|
||||
|
||||
std::map<sstring, sstring> options;
|
||||
@@ -2139,7 +2140,7 @@ void time_window_strategy_correctness_fn(test_env& env) {
|
||||
for (int i = 0 ; i < r ; i++) {
|
||||
mutations.push_back(make_insert(key, tstamp + r));
|
||||
}
|
||||
sstables.push_back(make_sstable_containing(env.make_sstable(s), std::move(mutations)));
|
||||
sstables.push_back(make_sstable_containing(env.make_sstable(s), std::move(mutations)).get());
|
||||
}
|
||||
|
||||
// Reset the buckets, overfill it now
|
||||
@@ -2198,7 +2199,7 @@ void time_window_strategy_size_tiered_behavior_correctness_fn(test_env& env) {
|
||||
auto add_new_sstable_to_bucket = [&] (api::timestamp_type ts, api::timestamp_type window_ts) {
|
||||
auto key = partition_key::from_exploded(*s, {to_bytes("key" + to_sstring(ts))});
|
||||
auto mut = make_insert(std::move(key), ts);
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(mut)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(mut)}).get();
|
||||
auto bound = compaction::time_window_compaction_strategy::get_window_lower_bound(window_size, window_ts);
|
||||
buckets[bound].push_back(std::move(sst));
|
||||
};
|
||||
@@ -2303,7 +2304,7 @@ future<> min_max_clustering_key_2(test_env& env) {
|
||||
}
|
||||
mt->apply(std::move(m));
|
||||
}
|
||||
auto sst = make_sstable_containing(sst_gen, mt);
|
||||
auto sst = make_sstable_containing(sst_gen, mt).get();
|
||||
check_min_max_column_names(sst, {"0ck100"}, {"7ck149"});
|
||||
|
||||
mt = make_lw_shared<replica::memtable>(s);
|
||||
@@ -2314,7 +2315,7 @@ future<> min_max_clustering_key_2(test_env& env) {
|
||||
m.set_clustered_cell(c_key, r1_col, make_atomic_cell(int32_type, int32_type->decompose(1)));
|
||||
}
|
||||
mt->apply(std::move(m));
|
||||
auto sst2 = make_sstable_containing(sst_gen, mt);
|
||||
auto sst2 = make_sstable_containing(sst_gen, mt).get();
|
||||
check_min_max_column_names(sst2, {"9ck101"}, {"9ck298"});
|
||||
|
||||
auto creator = sst_gen;
|
||||
@@ -2416,7 +2417,7 @@ void sstable_expired_data_ratio(test_env& env) {
|
||||
for (auto i = 0; i < remaining; i++) {
|
||||
insert_key(to_bytes("key" + to_sstring(i)), 3600, expiration_time);
|
||||
}
|
||||
auto sst = make_sstable_containing(sst_gen, mt);
|
||||
auto sst = make_sstable_containing(sst_gen, mt).get();
|
||||
const auto& stats = sst->get_stats_metadata();
|
||||
BOOST_REQUIRE(stats.estimated_tombstone_drop_time.bin.size() == sstables::TOMBSTONE_HISTOGRAM_BIN_SIZE);
|
||||
auto uncompacted_size = sst->data_size();
|
||||
@@ -2542,8 +2543,8 @@ void compaction_correctness_with_partitioned_sstable_set_fn(test_env& env) {
|
||||
|
||||
{
|
||||
std::vector<shared_sstable> sstables = {
|
||||
make_sstable_containing(sst_gen, {mut1, mut2}),
|
||||
make_sstable_containing(sst_gen, {mut3, mut4})
|
||||
make_sstable_containing(sst_gen, {mut1, mut2}).get(),
|
||||
make_sstable_containing(sst_gen, {mut3, mut4}).get()
|
||||
};
|
||||
|
||||
auto result = compact(std::move(sstables));
|
||||
@@ -2568,9 +2569,9 @@ void compaction_correctness_with_partitioned_sstable_set_fn(test_env& env) {
|
||||
// [mut1, mut2]
|
||||
// (mut2, mut3]
|
||||
std::vector<shared_sstable> sstables = {
|
||||
make_sstable_containing(sst_gen, {mut1, mut2}),
|
||||
make_sstable_containing(sst_gen, {mut2, mut3}),
|
||||
make_sstable_containing(sst_gen, {mut3, mut4})
|
||||
make_sstable_containing(sst_gen, {mut1, mut2}).get(),
|
||||
make_sstable_containing(sst_gen, {mut2, mut3}).get(),
|
||||
make_sstable_containing(sst_gen, {mut3, mut4}).get()
|
||||
};
|
||||
|
||||
auto result = compact(std::move(sstables));
|
||||
@@ -2593,8 +2594,8 @@ void compaction_correctness_with_partitioned_sstable_set_fn(test_env& env) {
|
||||
{
|
||||
// with gap between tables
|
||||
std::vector<shared_sstable> sstables = {
|
||||
make_sstable_containing(sst_gen, {mut1, mut2}),
|
||||
make_sstable_containing(sst_gen, {mut4, mut4})
|
||||
make_sstable_containing(sst_gen, {mut1, mut2}).get(),
|
||||
make_sstable_containing(sst_gen, {mut4, mut4}).get()
|
||||
};
|
||||
|
||||
auto result = compact(std::move(sstables));
|
||||
@@ -2649,7 +2650,7 @@ void sstable_cleanup_correctness_fn(cql_test_env& cql_env, test_env& env) {
|
||||
for (auto i = 0U; i < total_partitions; i++) {
|
||||
mutations.push_back(make_insert(local_keys.at(i)));
|
||||
}
|
||||
auto sst = make_sstable_containing(sst_gen, mutations);
|
||||
auto sst = make_sstable_containing(sst_gen, mutations).get();
|
||||
auto run_identifier = sst->run_identifier();
|
||||
|
||||
auto cf = env.make_table_for_tests(s);
|
||||
@@ -3944,7 +3945,7 @@ void scrubbed_sstable_removal_fn(test_env& env) {
|
||||
|
||||
auto mut1 = mutation(s, pk);
|
||||
mut1.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp());
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)});
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)}).get();
|
||||
|
||||
auto cf = env.make_table_for_tests(s);
|
||||
auto close_cf = deferred_stop(cf);
|
||||
@@ -4002,7 +4003,7 @@ void compact_uncompressed_sstable_during_scrub_validate_fn(test_env& env) {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
auto mut = mutation(s, tests::generate_partition_key(s));
|
||||
mut.partition().apply_insert(*s, tests::generate_clustering_key(s), timestamp++);
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut)});
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut)}).get();
|
||||
cf->add_sstable_and_update_cache(std::move(sst)).get();
|
||||
}
|
||||
|
||||
@@ -4158,7 +4159,7 @@ void sstable_run_based_compaction_fn(test_env& env) {
|
||||
// Generate 4 sstable runs composed of 4 fragments each after 4 compactions.
|
||||
// All fragments non-overlapping.
|
||||
for (auto i = 0U; i < keys.size(); i++) {
|
||||
auto sst = make_sstable_containing(sst_gen, { make_insert(keys[i]) });
|
||||
auto sst = make_sstable_containing(sst_gen, { make_insert(keys[i]) }).get();
|
||||
sst->set_sstable_level(1);
|
||||
BOOST_REQUIRE_EQUAL(sst->get_sstable_level(), 1);
|
||||
column_family_test(cf).add_sstable(sst).get();
|
||||
@@ -4203,9 +4204,9 @@ void compaction_strategy_aware_major_compaction_fn(test_env& env) {
|
||||
};
|
||||
|
||||
auto alpha = partition_key::from_exploded(*s, {to_bytes("alpha")});
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {make_insert(alpha)});
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {make_insert(alpha)}).get();
|
||||
sst->set_sstable_level(2);
|
||||
auto sst2 = make_sstable_containing(env.make_sstable(s), {make_insert(alpha)});
|
||||
auto sst2 = make_sstable_containing(env.make_sstable(s), {make_insert(alpha)}).get();
|
||||
sst2->set_sstable_level(3);
|
||||
auto candidates = std::vector<sstables::shared_sstable>({ sst, sst2 });
|
||||
|
||||
@@ -4265,8 +4266,8 @@ void backlog_tracker_correctness_after_changing_compaction_strategy_fn(test_env&
|
||||
auto mut3 = make_insert(keys[2]);
|
||||
auto mut4 = make_insert(keys[3]);
|
||||
std::vector<shared_sstable> ssts = {
|
||||
make_sstable_containing(sst_gen, {mut1, mut2}),
|
||||
make_sstable_containing(sst_gen, {mut3, mut4})
|
||||
make_sstable_containing(sst_gen, {mut1, mut2}).get(),
|
||||
make_sstable_containing(sst_gen, {mut3, mut4}).get()
|
||||
};
|
||||
|
||||
for (auto& sst : ssts) {
|
||||
@@ -4477,8 +4478,8 @@ void purged_tombstone_consumer_sstable_fn(test_env& env) {
|
||||
auto [mut3, mut3_tombstone] = make_delete(alpha);
|
||||
|
||||
std::vector<shared_sstable> sstables = {
|
||||
make_sstable_containing(env.make_sstable(s), {mut1, mut2}),
|
||||
make_sstable_containing(env.make_sstable(s), {mut3})
|
||||
make_sstable_containing(env.make_sstable(s), {mut1, mut2}).get(),
|
||||
make_sstable_containing(env.make_sstable(s), {mut3}).get()
|
||||
};
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(ttl));
|
||||
@@ -4567,9 +4568,9 @@ void incremental_compaction_data_resurrection_fn(test_env& env) {
|
||||
auto mut4 = make_insert(zetta);
|
||||
auto mut1_deletion = make_delete(alpha);
|
||||
|
||||
auto non_expired_sst = make_sstable_containing(sst_gen, {mut1, mut2, mut3});
|
||||
auto non_expired_sst_2 = make_sstable_containing(sst_gen, {mut4});
|
||||
auto expired_sst = make_sstable_containing(sst_gen, {mut1_deletion});
|
||||
auto non_expired_sst = make_sstable_containing(sst_gen, {mut1, mut2, mut3}).get();
|
||||
auto non_expired_sst_2 = make_sstable_containing(sst_gen, {mut4}).get();
|
||||
auto expired_sst = make_sstable_containing(sst_gen, {mut1_deletion}).get();
|
||||
|
||||
std::vector<shared_sstable> sstables = {
|
||||
non_expired_sst,
|
||||
@@ -4697,12 +4698,12 @@ void twcs_major_compaction_fn(test_env& env) {
|
||||
cf->start();
|
||||
cf->set_compaction_strategy(compaction::compaction_strategy_type::time_window);
|
||||
|
||||
auto original_together = make_sstable_containing(sst_gen, {mut3, mut4});
|
||||
auto original_together = make_sstable_containing(sst_gen, {mut3, mut4}).get();
|
||||
|
||||
auto ret = compact_sstables(env, compaction::compaction_descriptor({original_together}), cf, sst_gen, replacer_fn_no_op()).get();
|
||||
BOOST_REQUIRE(ret.new_sstables.size() == 1);
|
||||
|
||||
auto original_apart = make_sstable_containing(sst_gen, {mut1, mut2});
|
||||
auto original_apart = make_sstable_containing(sst_gen, {mut1, mut2}).get();
|
||||
ret = compact_sstables(env, compaction::compaction_descriptor({original_apart}), cf, sst_gen, replacer_fn_no_op()).get();
|
||||
BOOST_REQUIRE(ret.new_sstables.size() == 2);
|
||||
}
|
||||
@@ -4749,7 +4750,7 @@ void autocompaction_control_fn(test_env& env) {
|
||||
const auto keys = tests::generate_partition_keys(1, s);
|
||||
for (auto i = 0; i < 2 * min_threshold; ++i) {
|
||||
auto mut = make_insert(keys[0]);
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {mut});
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {mut}).get();
|
||||
cf->add_sstable_and_update_cache(sst).wait();
|
||||
}
|
||||
|
||||
@@ -4840,8 +4841,8 @@ void test_bug_6472_fn(test_env& env) {
|
||||
// Reproduce issue 6472 by making an input set which causes both interposer and GC writer to be enabled
|
||||
//
|
||||
std::vector<shared_sstable> sstables_spanning_many_windows = {
|
||||
make_sstable_containing(sst_gen, muts),
|
||||
make_sstable_containing(sst_gen, muts),
|
||||
make_sstable_containing(sst_gen, muts).get(),
|
||||
make_sstable_containing(sst_gen, muts).get(),
|
||||
};
|
||||
sstables::run_id run_id = sstables::run_id::create_random_id();
|
||||
for (auto& sst : sstables_spanning_many_windows) {
|
||||
@@ -4939,7 +4940,7 @@ void test_twcs_partition_estimate_fn(test_env& env) {
|
||||
|
||||
auto keys = tests::generate_partition_keys(4, s);
|
||||
|
||||
auto make_sstable = [&] (int sstable_idx) {
|
||||
auto make_sstable = [&] (int sstable_idx) -> future<shared_sstable> {
|
||||
static thread_local int32_t value = 1;
|
||||
|
||||
auto key = keys[sstable_idx];
|
||||
@@ -4949,7 +4950,7 @@ void test_twcs_partition_estimate_fn(test_env& env) {
|
||||
auto c_key = clustering_key::from_exploded(*s, {int32_type->decompose(value++)});
|
||||
m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value)), next_timestamp(sstable_idx, ck));
|
||||
}
|
||||
return make_sstable_containing(sst_gen, {m});
|
||||
co_return co_await make_sstable_containing(sst_gen, {m});
|
||||
};
|
||||
|
||||
auto cf = env.make_table_for_tests(s);
|
||||
@@ -4978,12 +4979,10 @@ void test_twcs_partition_estimate_fn(test_env& env) {
|
||||
estimation_test(s, compaction::time_window_compaction_strategy::max_data_segregation_window_count);
|
||||
}
|
||||
|
||||
std::vector<shared_sstable> sstables_spanning_many_windows = {
|
||||
make_sstable(0),
|
||||
make_sstable(1),
|
||||
make_sstable(2),
|
||||
make_sstable(3),
|
||||
};
|
||||
std::vector<shared_sstable> sstables_spanning_many_windows(4);
|
||||
parallel_for_each(std::views::iota(0, 4), [&](int i) -> future<> {
|
||||
sstables_spanning_many_windows[i] = co_await make_sstable(i);
|
||||
}).get();
|
||||
|
||||
auto ret = compact_sstables(env, compaction::compaction_descriptor(sstables_spanning_many_windows), cf, sst_gen, replacer_fn_no_op()).get();
|
||||
// The real test here is that we don't SCYLLA_ASSERT() in
|
||||
@@ -5211,7 +5210,7 @@ void test_twcs_compaction_across_buckets_fn(test_env& env) {
|
||||
sstables_spanning_many_windows.reserve(windows + 1);
|
||||
|
||||
for (unsigned w = 0; w < windows; w++) {
|
||||
sstables_spanning_many_windows.push_back(make_sstable_containing(sst_gen, {make_row(std::chrono::hours((w + 1) * 2))}));
|
||||
sstables_spanning_many_windows.push_back(make_sstable_containing(sst_gen, {make_row(std::chrono::hours((w + 1) * 2))}).get());
|
||||
}
|
||||
auto deletion_mut = [&] () {
|
||||
mutation m(s, pkey);
|
||||
@@ -5219,7 +5218,7 @@ void test_twcs_compaction_across_buckets_fn(test_env& env) {
|
||||
m.partition().apply(tomb);
|
||||
return m;
|
||||
}();
|
||||
sstables_spanning_many_windows.push_back(make_sstable_containing(sst_gen, {deletion_mut}));
|
||||
sstables_spanning_many_windows.push_back(make_sstable_containing(sst_gen, {deletion_mut}).get());
|
||||
|
||||
auto ret = compact_sstables(env, compaction::compaction_descriptor(std::move(sstables_spanning_many_windows)), cf, sst_gen, replacer_fn_no_op(), can_purge_tombstones::no).get();
|
||||
|
||||
@@ -5263,8 +5262,12 @@ void test_offstrategy_sstable_compaction_fn(test_env& env) {
|
||||
|
||||
cf->start();
|
||||
|
||||
for (auto i = 0; i < cf->schema()->max_compaction_threshold(); i++) {
|
||||
auto sst = make_sstable_containing(sst_gen, {mut});
|
||||
auto threshold = cf->schema()->max_compaction_threshold();
|
||||
std::vector<sstables::shared_sstable> ssts(threshold);
|
||||
parallel_for_each(std::views::iota(0, threshold), [&](int i) -> future<> {
|
||||
ssts[i] = co_await make_sstable_containing(sst_gen, {mut});
|
||||
}).get();
|
||||
for (auto& sst : ssts) {
|
||||
cf->add_sstable_and_update_cache(std::move(sst), sstables::offstrategy::yes).get();
|
||||
}
|
||||
BOOST_REQUIRE(cf->perform_offstrategy_compaction(tasks::task_info{}).get());
|
||||
@@ -5285,8 +5288,7 @@ SEASTAR_FIXTURE_TEST_CASE(test_offstrategy_sstable_compaction_gcs, gcs_fixture,
|
||||
test_env_config{.storage = make_test_object_storage_options("GS")});
|
||||
}
|
||||
|
||||
void twcs_reshape_with_disjoint_set_fn(test_env& env) {
|
||||
static constexpr unsigned disjoint_sstable_count = 256;
|
||||
void twcs_reshape_with_disjoint_set_fn(test_env& env, unsigned disjoint_sstable_count = 256) {
|
||||
auto builder = schema_builder("tests", "twcs_reshape_test")
|
||||
.with_column("id", utf8_type, column_kind::partition_key)
|
||||
.with_column("cl", ::timestamp_type, column_kind::clustering_key)
|
||||
@@ -5343,27 +5345,23 @@ void twcs_reshape_with_disjoint_set_fn(test_env& env) {
|
||||
auto sst_gen = env.make_sst_factory(s);
|
||||
|
||||
{
|
||||
// create set of 256 disjoint ssts that belong to the same time window and expect that twcs reshape allows them all to be compacted at once
|
||||
// create set of disjoint ssts that belong to the same time window and expect that twcs reshape allows them all to be compacted at once
|
||||
|
||||
std::vector<sstables::shared_sstable> sstables;
|
||||
sstables.reserve(disjoint_sstable_count);
|
||||
for (unsigned i = 0; i < disjoint_sstable_count; i++) {
|
||||
auto sst = make_sstable_containing(sst_gen, {make_row(i, std::chrono::hours(1))});
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
std::vector<sstables::shared_sstable> sstables(disjoint_sstable_count);
|
||||
parallel_for_each(std::views::iota(0u, disjoint_sstable_count), [&](unsigned i) -> future<> {
|
||||
sstables[i] = co_await make_sstable_containing(sst_gen, {make_row(i, std::chrono::hours(1))});
|
||||
}).get();
|
||||
|
||||
BOOST_REQUIRE_EQUAL(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size(), disjoint_sstable_count);
|
||||
}
|
||||
|
||||
{
|
||||
// create set of 256 disjoint ssts that belong to different windows and expect that twcs reshape allows them all to be compacted at once
|
||||
// create set of disjoint ssts that belong to different windows and expect that twcs reshape allows them all to be compacted at once
|
||||
|
||||
std::vector<sstables::shared_sstable> sstables;
|
||||
sstables.reserve(disjoint_sstable_count);
|
||||
for (unsigned i = 0; i < disjoint_sstable_count; i++) {
|
||||
auto sst = make_sstable_containing(sst_gen, {make_row(i, std::chrono::hours(i))});
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
std::vector<sstables::shared_sstable> sstables(disjoint_sstable_count);
|
||||
parallel_for_each(std::views::iota(0u, disjoint_sstable_count), [&](unsigned i) -> future<> {
|
||||
sstables[i] = co_await make_sstable_containing(sst_gen, {make_row(i, std::chrono::hours(i))});
|
||||
}).get();
|
||||
|
||||
auto reshaping_count = get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size();
|
||||
BOOST_REQUIRE_GE(reshaping_count, disjoint_sstable_count - min_threshold + 1);
|
||||
@@ -5371,30 +5369,25 @@ void twcs_reshape_with_disjoint_set_fn(test_env& env) {
|
||||
}
|
||||
|
||||
{
|
||||
// create set of 256 disjoint ssts that belong to different windows with none over the threshold and expect that twcs reshape selects none of them
|
||||
// create set of disjoint ssts that belong to different windows with none over the threshold and expect that twcs reshape selects none of them
|
||||
|
||||
std::vector<sstables::shared_sstable> sstables;
|
||||
sstables.reserve(disjoint_sstable_count);
|
||||
for (unsigned i = 0; i < disjoint_sstable_count; i++) {
|
||||
auto sst = make_sstable_containing(sst_gen, {make_row(i, std::chrono::hours(24*i))});
|
||||
sstables.push_back(std::move(sst));
|
||||
i++;
|
||||
sst = make_sstable_containing(sst_gen, {make_row(i, std::chrono::hours(24*i + 1))});
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
std::vector<sstables::shared_sstable> sstables(disjoint_sstable_count);
|
||||
parallel_for_each(std::views::iota(0u, disjoint_sstable_count / 2), [&](unsigned pair_idx) -> future<> {
|
||||
unsigned i = pair_idx * 2;
|
||||
sstables[i] = co_await make_sstable_containing(sst_gen, {make_row(i, std::chrono::hours(24*i))});
|
||||
sstables[i+1] = co_await make_sstable_containing(sst_gen, {make_row(i+1, std::chrono::hours(24*(i+1) + 1))});
|
||||
}).get();
|
||||
|
||||
BOOST_REQUIRE_EQUAL(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size(), 0);
|
||||
}
|
||||
|
||||
{
|
||||
// create set of 256 overlapping ssts that belong to the same time window and expect that twcs reshape allows only 32 to be compacted at once
|
||||
// create set of overlapping ssts that belong to the same time window and expect that twcs reshape allows only 32 to be compacted at once
|
||||
|
||||
std::vector<sstables::shared_sstable> sstables;
|
||||
sstables.reserve(disjoint_sstable_count);
|
||||
for (unsigned i = 0; i < disjoint_sstable_count; i++) {
|
||||
auto sst = make_sstable_containing(sst_gen, {make_row(0, std::chrono::hours(1))});
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
std::vector<sstables::shared_sstable> sstables(disjoint_sstable_count);
|
||||
parallel_for_each(std::views::iota(0u, disjoint_sstable_count), [&](unsigned i) -> future<> {
|
||||
sstables[i] = co_await make_sstable_containing(sst_gen, {make_row(0, std::chrono::hours(1))});
|
||||
}).get();
|
||||
|
||||
BOOST_REQUIRE_EQUAL(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size(), uint64_t(s->max_compaction_threshold()));
|
||||
}
|
||||
@@ -5413,21 +5406,21 @@ void twcs_reshape_with_disjoint_set_fn(test_env& env) {
|
||||
|
||||
std::unordered_set<sstables::generation_type> generations_for_small_files;
|
||||
|
||||
std::vector<sstables::shared_sstable> sstables;
|
||||
sstables.reserve(64);
|
||||
std::vector<sstables::shared_sstable> sstables(64);
|
||||
|
||||
// Track which indices are small files
|
||||
parallel_for_each(std::views::iota(0u, 64u), [&](unsigned i) -> future<> {
|
||||
if (i % 2 == 0) {
|
||||
sstables[i] = co_await make_sstable_containing(sst_gen, mutations_for_small_files);
|
||||
} else {
|
||||
sstables[i] = co_await make_sstable_containing(sst_gen, mutations_for_big_files);
|
||||
}
|
||||
}).get();
|
||||
|
||||
for (unsigned i = 0; i < 64; i++) {
|
||||
sstables::shared_sstable sst;
|
||||
//
|
||||
// intermix big and small files, to make sure STCS logic is really applied to favor similar-sized reshape jobs.
|
||||
//
|
||||
if (i % 2 == 0) {
|
||||
sst = make_sstable_containing(sst_gen, mutations_for_small_files);
|
||||
generations_for_small_files.insert(sst->generation());
|
||||
} else {
|
||||
sst = make_sstable_containing(sst_gen, mutations_for_big_files);
|
||||
generations_for_small_files.insert(sstables[i]->generation());
|
||||
}
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
|
||||
auto check_mode_correctness = [&] (compaction::reshape_mode mode) {
|
||||
@@ -5445,19 +5438,17 @@ void twcs_reshape_with_disjoint_set_fn(test_env& env) {
|
||||
}
|
||||
|
||||
{
|
||||
// create set of 256 disjoint ssts that spans multiple windows (essentially what happens in off-strategy during node op)
|
||||
// create set of disjoint ssts that spans multiple windows (essentially what happens in off-strategy during node op)
|
||||
|
||||
std::vector<sstables::shared_sstable> sstables;
|
||||
sstables.reserve(disjoint_sstable_count);
|
||||
for (auto i = 0U; i < disjoint_sstable_count; i++) {
|
||||
std::vector<sstables::shared_sstable> sstables(disjoint_sstable_count);
|
||||
parallel_for_each(std::views::iota(0u, disjoint_sstable_count), [&](unsigned i) -> future<> {
|
||||
utils::chunked_vector<mutation> muts;
|
||||
muts.reserve(5);
|
||||
for (auto j = 0; j < 5; j++) {
|
||||
muts.push_back(make_row(i, std::chrono::hours(j * 8)));
|
||||
}
|
||||
auto sst = make_sstable_containing(sst_gen, std::move(muts));
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
sstables[i] = co_await make_sstable_containing(sst_gen, std::move(muts));
|
||||
}).get();
|
||||
|
||||
auto job_size = [] (auto&& sst_range) {
|
||||
return std::ranges::fold_left(sst_range | std::views::transform(std::mem_fn(&sstable::bytes_on_disk)), uint64_t(0), std::plus{});
|
||||
@@ -5489,23 +5480,16 @@ SEASTAR_TEST_CASE(twcs_reshape_with_disjoint_set_test) {
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(twcs_reshape_with_disjoint_set_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) {
|
||||
// TODO: Deeper investigation needed to figure out why it takes 4+ minutes to run on S3 storage, while it runs in seconds on local storage. For now,
|
||||
// skipping the test for S3.
|
||||
testlog.info("cleanup_during_offstrategy_incremental_compaction_test_s3 is not supported for S3 storage yet, skipping test");
|
||||
return make_ready_future();
|
||||
#if 0
|
||||
return test_env::do_with_async([](test_env& env) { twcs_reshape_with_disjoint_set_fn(env); },
|
||||
return test_env::do_with_async([](test_env& env) { twcs_reshape_with_disjoint_set_fn(env, 64); },
|
||||
test_env_config{.storage = make_test_object_storage_options("S3")});
|
||||
#endif
|
||||
}
|
||||
|
||||
SEASTAR_FIXTURE_TEST_CASE(twcs_reshape_with_disjoint_set_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) {
|
||||
return test_env::do_with_async([](test_env& env) { twcs_reshape_with_disjoint_set_fn(env); },
|
||||
return test_env::do_with_async([](test_env& env) { twcs_reshape_with_disjoint_set_fn(env, 64); },
|
||||
test_env_config{.storage = make_test_object_storage_options("GS")});
|
||||
}
|
||||
|
||||
void stcs_reshape_overlapping_fn(test_env& env) {
|
||||
static constexpr unsigned disjoint_sstable_count = 256;
|
||||
void stcs_reshape_overlapping_fn(test_env& env, unsigned disjoint_sstable_count = 256) {
|
||||
auto builder = schema_builder("tests", "stcs_reshape_test")
|
||||
.with_column("id", utf8_type, column_kind::partition_key)
|
||||
.with_column("cl", ::timestamp_type, column_kind::clustering_key)
|
||||
@@ -5531,27 +5515,23 @@ void stcs_reshape_overlapping_fn(test_env& env) {
|
||||
auto sst_gen = env.make_sst_factory(s);
|
||||
|
||||
{
|
||||
// create set of 256 disjoint ssts and expect that stcs reshape allows them all to be compacted at once
|
||||
// create set of disjoint ssts and expect that stcs reshape allows them all to be compacted at once
|
||||
|
||||
std::vector<sstables::shared_sstable> sstables;
|
||||
sstables.reserve(disjoint_sstable_count);
|
||||
for (unsigned i = 0; i < disjoint_sstable_count; i++) {
|
||||
auto sst = make_sstable_containing(sst_gen, {make_row(i)});
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
std::vector<sstables::shared_sstable> sstables(disjoint_sstable_count);
|
||||
parallel_for_each(std::views::iota(0u, disjoint_sstable_count), [&](unsigned i) -> future<> {
|
||||
sstables[i] = co_await make_sstable_containing(sst_gen, {make_row(i)});
|
||||
}).get();
|
||||
|
||||
BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size() == disjoint_sstable_count);
|
||||
}
|
||||
|
||||
{
|
||||
// create set of 256 overlapping ssts and expect that stcs reshape allows only 32 to be compacted at once
|
||||
// create set of overlapping ssts and expect that stcs reshape allows only 32 to be compacted at once
|
||||
|
||||
std::vector<sstables::shared_sstable> sstables;
|
||||
sstables.reserve(disjoint_sstable_count);
|
||||
for (unsigned i = 0; i < disjoint_sstable_count; i++) {
|
||||
auto sst = make_sstable_containing(sst_gen, {make_row(0)});
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
std::vector<sstables::shared_sstable> sstables(disjoint_sstable_count);
|
||||
parallel_for_each(std::views::iota(0u, disjoint_sstable_count), [&](unsigned i) -> future<> {
|
||||
sstables[i] = co_await make_sstable_containing(sst_gen, {make_row(0)});
|
||||
}).get();
|
||||
|
||||
BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size() == uint64_t(s->max_compaction_threshold()));
|
||||
}
|
||||
@@ -5562,11 +5542,11 @@ SEASTAR_TEST_CASE(stcs_reshape_overlapping_test) {
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(stcs_reshape_overlapping_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) {
|
||||
return test_env::do_with_async([](test_env& env) { stcs_reshape_overlapping_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")});
|
||||
return test_env::do_with_async([](test_env& env) { stcs_reshape_overlapping_fn(env, 64); }, test_env_config{.storage = make_test_object_storage_options("S3")});
|
||||
}
|
||||
|
||||
SEASTAR_FIXTURE_TEST_CASE(stcs_reshape_overlapping_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) {
|
||||
return test_env::do_with_async([](test_env& env) { stcs_reshape_overlapping_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")});
|
||||
return test_env::do_with_async([](test_env& env) { stcs_reshape_overlapping_fn(env, 64); }, test_env_config{.storage = make_test_object_storage_options("GS")});
|
||||
}
|
||||
|
||||
// Regression test for #8432
|
||||
@@ -5587,8 +5567,8 @@ void test_twcs_single_key_reader_filtering_fn(test_env& env) {
|
||||
return m;
|
||||
};
|
||||
|
||||
auto sst1 = make_sstable_containing(sst_gen, {make_row(0, 0)});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {make_row(0, 1)});
|
||||
auto sst1 = make_sstable_containing(sst_gen, {make_row(0, 0)}).get();
|
||||
auto sst2 = make_sstable_containing(sst_gen, {make_row(0, 1)}).get();
|
||||
auto dkey = sst1->get_first_decorated_key();
|
||||
|
||||
auto cf = env.make_table_for_tests(s);
|
||||
@@ -5708,7 +5688,7 @@ void max_ongoing_compaction_fn(test_env& env) {
|
||||
auto s = schemas[idx];
|
||||
auto cf = tables[idx];
|
||||
auto muts = { make_expiring_cell(s, std::chrono::hours(1)) };
|
||||
auto sst = make_sstable_containing([&sst_gen, idx] { return sst_gen(idx); }, muts);
|
||||
auto sst = make_sstable_containing([&sst_gen, idx] { return sst_gen(idx); }, muts).get();
|
||||
column_family_test(cf).add_sstable(sst).get();
|
||||
};
|
||||
|
||||
@@ -5758,7 +5738,7 @@ void max_ongoing_compaction_fn(test_env& env) {
|
||||
auto cft = column_family_test(cf);
|
||||
for (size_t i = 0; i < num_sstables; i++) {
|
||||
auto muts = { make_expiring_cell(s, std::chrono::hours(1)) };
|
||||
cft.add_sstable(make_sstable_containing([&sst_gen, idx] { return sst_gen(idx); }, muts)).get();
|
||||
cft.add_sstable(make_sstable_containing([&sst_gen, idx] { return sst_gen(idx); }, muts).get()).get();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -5947,8 +5927,8 @@ void twcs_single_key_reader_through_compound_set_fn(test_env& env) {
|
||||
auto sst_gen = env.make_sst_factory(s);
|
||||
|
||||
// sstables with same key but belonging to different windows
|
||||
auto sst1 = make_sstable_containing(sst_gen, {make_row(std::chrono::hours(1))});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {make_row(std::chrono::hours(5))});
|
||||
auto sst1 = make_sstable_containing(sst_gen, {make_row(std::chrono::hours(1))}).get();
|
||||
auto sst2 = make_sstable_containing(sst_gen, {make_row(std::chrono::hours(5))}).get();
|
||||
BOOST_REQUIRE(sst1->get_first_decorated_key().token() == sst2->get_last_decorated_key().token());
|
||||
auto dkey = sst1->get_first_decorated_key();
|
||||
|
||||
@@ -6064,7 +6044,7 @@ void test_major_does_not_miss_data_in_memtable_fn(test_env& env) {
|
||||
m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value)), gc_clock::now().time_since_epoch().count());
|
||||
return m;
|
||||
}();
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(row_mut)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(row_mut)}).get();
|
||||
cf->add_sstable_and_update_cache(sst).get();
|
||||
assert_table_sstable_count(cf, 1);
|
||||
|
||||
@@ -6267,10 +6247,9 @@ SEASTAR_FIXTURE_TEST_CASE(simple_backlog_controller_test_incremental_gcs, gcs_fi
|
||||
return run_controller_test(compaction::compaction_strategy_type::incremental, test_env_config{.storage = make_test_object_storage_options("GS")});
|
||||
}
|
||||
|
||||
void test_compaction_strategy_cleanup_method_fn(test_env& env) {
|
||||
constexpr size_t all_files = 64;
|
||||
void test_compaction_strategy_cleanup_method_fn(test_env& env, size_t all_files = 64) {
|
||||
|
||||
auto get_cleanup_jobs = [&env] (compaction::compaction_strategy_type compaction_strategy_type,
|
||||
auto get_cleanup_jobs = [&env, all_files] (compaction::compaction_strategy_type compaction_strategy_type,
|
||||
std::map<sstring, sstring> strategy_options = {},
|
||||
const api::timestamp_clock::duration step_base = 0ms,
|
||||
unsigned sstable_level = 0) {
|
||||
@@ -6301,14 +6280,13 @@ void test_compaction_strategy_cleanup_method_fn(test_env& env) {
|
||||
return m;
|
||||
};
|
||||
|
||||
std::vector<sstables::shared_sstable> candidates;
|
||||
candidates.reserve(all_files);
|
||||
for (size_t i = 0; i < all_files; i++) {
|
||||
std::vector<sstables::shared_sstable> candidates(all_files);
|
||||
parallel_for_each(std::views::iota(size_t(0), all_files), [&](size_t i) -> future<> {
|
||||
auto current_step = duration_cast<microseconds>(step_base) * i;
|
||||
auto sst = make_sstable_containing(sst_gen, {make_mutation(i, next_timestamp(current_step))});
|
||||
auto sst = co_await make_sstable_containing(sst_gen, {make_mutation(i, next_timestamp(current_step))});
|
||||
sst->set_sstable_level(sstable_level);
|
||||
candidates.push_back(std::move(sst));
|
||||
}
|
||||
candidates[i] = std::move(sst);
|
||||
}).get();
|
||||
|
||||
auto strategy = cf->get_compaction_strategy();
|
||||
auto jobs = strategy.get_cleanup_compaction_jobs(cf.as_compaction_group_view(), candidates);
|
||||
@@ -6432,7 +6410,7 @@ void test_large_partition_splitting_on_compaction_fn(test_env& env) {
|
||||
mutations.push_back(make_open_ended_range_tombstone());
|
||||
}
|
||||
|
||||
auto sst = make_sstable_containing(sst_gen, std::move(mutations));
|
||||
auto sst = make_sstable_containing(sst_gen, std::move(mutations)).get();
|
||||
|
||||
auto desc = compaction::compaction_descriptor({ sst });
|
||||
// With max_sstable_bytes of 1, we'll perform the splitting of the partition as soon as possible.
|
||||
@@ -6524,7 +6502,7 @@ void check_table_sstable_set_includes_maintenance_sstables_fn(test_env& env) {
|
||||
|
||||
auto mut1 = mutation(s, pks[0]);
|
||||
mut1.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp());
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)});
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)}).get();
|
||||
|
||||
auto cf = env.make_table_for_tests(s);
|
||||
auto close_cf = deferred_stop(cf);
|
||||
@@ -6596,11 +6574,11 @@ void test_print_shared_sstables_vector_fn(test_env& env) {
|
||||
|
||||
auto mut0 = mutation(s, pks[0]);
|
||||
mut0.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp());
|
||||
ssts[0] = make_sstable_containing(sst_gen, {std::move(mut0)});
|
||||
ssts[0] = make_sstable_containing(sst_gen, {std::move(mut0)}).get();
|
||||
|
||||
auto mut1 = mutation(s, pks[1]);
|
||||
mut1.partition().apply_insert(*s, ss.make_ckey(1), ss.new_timestamp());
|
||||
ssts[1] = make_sstable_containing(sst_gen, {std::move(mut1)});
|
||||
ssts[1] = make_sstable_containing(sst_gen, {std::move(mut1)}).get();
|
||||
|
||||
std::string msg = seastar::format("{}", ssts);
|
||||
for (const auto& sst : ssts) {
|
||||
@@ -6680,8 +6658,8 @@ void tombstone_gc_disabled_fn(test_env& env) {
|
||||
auto mut2 = make_delete(alpha);
|
||||
auto mut3 = make_insert(beta);
|
||||
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1});
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3});
|
||||
auto sst1 = make_sstable_containing(sst_gen, {mut1}).get();
|
||||
auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3}).get();
|
||||
|
||||
forward_jump_clocks(std::chrono::seconds(1));
|
||||
|
||||
@@ -6760,8 +6738,8 @@ void compaction_optimization_to_avoid_bloom_filter_checks_fn(test_env& env) {
|
||||
return m;
|
||||
};
|
||||
|
||||
auto uncompacting = make_sstable_containing(sst_gen, { make_insert(partition_key::from_exploded(*s, {to_bytes("pk1")}) )});
|
||||
auto compacting = make_sstable_containing(sst_gen, { make_delete(partition_key::from_exploded(*s, {to_bytes("pk1")}) )});
|
||||
auto uncompacting = make_sstable_containing(sst_gen, { make_insert(partition_key::from_exploded(*s, {to_bytes("pk1")}) )}).get();
|
||||
auto compacting = make_sstable_containing(sst_gen, { make_delete(partition_key::from_exploded(*s, {to_bytes("pk1")}) )}).get();
|
||||
|
||||
auto result = compact({uncompacting, compacting}, {compacting});
|
||||
BOOST_REQUIRE_EQUAL(1, result.new_sstables.size());
|
||||
@@ -6825,24 +6803,32 @@ static future<> run_incremental_compaction_test(sstables::offstrategy offstrateg
|
||||
|
||||
std::unordered_set<sstables::generation_type> gens; // input sstable generations
|
||||
run_id run_identifier = run_id::create_random_id();
|
||||
|
||||
// Pre-extract mutation pairs for parallel SSTable creation
|
||||
std::vector<std::pair<mutation, mutation>> mutation_pairs;
|
||||
mutation_pairs.reserve(sstables_nr);
|
||||
auto merged_it = merged.begin();
|
||||
for (unsigned i = 0; i < sstables_nr; i++) {
|
||||
auto mut1 = std::move(*merged_it);
|
||||
merged_it++;
|
||||
auto mut2 = std::move(*merged_it);
|
||||
merged_it++;
|
||||
auto sst = make_sstable_containing(sst_gen, {
|
||||
std::move(mut1),
|
||||
std::move(mut2)
|
||||
mutation_pairs.emplace_back(std::move(mut1), std::move(mut2));
|
||||
}
|
||||
|
||||
ssts.resize(sstables_nr);
|
||||
parallel_for_each(std::views::iota(size_t(0), sstables_nr), [&](size_t i) -> future<> {
|
||||
ssts[i] = co_await make_sstable_containing(sst_gen, {
|
||||
std::move(mutation_pairs[i].first),
|
||||
std::move(mutation_pairs[i].second)
|
||||
});
|
||||
sstables::test(sst).set_run_identifier(run_identifier); // in order to produce multi-fragment run.
|
||||
sst->set_sstable_level(offstrategy ? 0 : 1);
|
||||
sstables::test(ssts[i]).set_run_identifier(run_identifier);
|
||||
ssts[i]->set_sstable_level(offstrategy ? 0 : 1);
|
||||
}).get();
|
||||
|
||||
// every sstable will be eligible for cleanup, by having both an owned and unowned token.
|
||||
owned_token_ranges.push_back(dht::token_range::make_singular(sst->get_last_decorated_key().token()));
|
||||
|
||||
gens.insert(sst->generation());
|
||||
ssts.push_back(std::move(sst));
|
||||
for (unsigned i = 0; i < sstables_nr; i++) {
|
||||
owned_token_ranges.push_back(dht::token_range::make_singular(ssts[i]->get_last_decorated_key().token()));
|
||||
gens.insert(ssts[i]->generation());
|
||||
}
|
||||
|
||||
size_t last_input_sstable_count = sstables_nr;
|
||||
@@ -6971,26 +6957,32 @@ void cleanup_during_offstrategy_incremental_compaction_fn(test_env& env) {
|
||||
}
|
||||
|
||||
std::unordered_set<sstables::generation_type> gens; // input sstable generations
|
||||
|
||||
// Pre-extract mutation pairs for parallel SSTable creation
|
||||
std::vector<std::pair<mutation, mutation>> mutation_pairs;
|
||||
mutation_pairs.reserve(sstables_nr);
|
||||
auto merged_it = merged.begin();
|
||||
for (unsigned i = 0; i < sstables_nr; i++) {
|
||||
auto mut1 = std::move(*merged_it);
|
||||
merged_it++;
|
||||
auto mut2 = std::move(*merged_it);
|
||||
merged_it++;
|
||||
auto sst = make_sstable_containing(sst_gen, {
|
||||
std::move(mut1),
|
||||
std::move(mut2)
|
||||
mutation_pairs.emplace_back(std::move(mut1), std::move(mut2));
|
||||
}
|
||||
|
||||
ssts.resize(sstables_nr);
|
||||
parallel_for_each(std::views::iota(size_t(0), sstables_nr), [&](size_t i) -> future<> {
|
||||
ssts[i] = co_await make_sstable_containing(sst_gen, {
|
||||
std::move(mutation_pairs[i].first),
|
||||
std::move(mutation_pairs[i].second)
|
||||
});
|
||||
// Force a new run_id to trigger offstrategy compaction
|
||||
sstables::test(sst).set_run_identifier(run_id::create_random_id());
|
||||
// Set level to 0 to trigger offstrategy compaction
|
||||
sst->set_sstable_level(0);
|
||||
sstables::test(ssts[i]).set_run_identifier(run_id::create_random_id());
|
||||
ssts[i]->set_sstable_level(0);
|
||||
}).get();
|
||||
|
||||
// every sstable will be eligible for cleanup, by having both an owned and unowned token.
|
||||
owned_token_ranges.push_back(dht::token_range::make_singular(sst->get_last_decorated_key().token()));
|
||||
|
||||
gens.insert(sst->generation());
|
||||
ssts.push_back(std::move(sst));
|
||||
for (unsigned i = 0; i < sstables_nr; i++) {
|
||||
owned_token_ranges.push_back(dht::token_range::make_singular(ssts[i]->get_last_decorated_key().token()));
|
||||
gens.insert(ssts[i]->generation());
|
||||
}
|
||||
|
||||
{
|
||||
@@ -7077,11 +7069,11 @@ future<> test_sstables_excluding_staging_correctness(test_env_config cfg) {
|
||||
|
||||
auto sst_gen = env.make_sst_factory(s);
|
||||
|
||||
auto staging_sst = make_sstable_containing(sst_gen, {*sorted_muts.begin()});
|
||||
auto staging_sst = make_sstable_containing(sst_gen, {*sorted_muts.begin()}).get();
|
||||
staging_sst->change_state(sstables::sstable_state::staging).get();
|
||||
BOOST_REQUIRE(staging_sst->requires_view_building());
|
||||
|
||||
auto regular_sst = make_sstable_containing(sst_gen, {*sorted_muts.rbegin()});
|
||||
auto regular_sst = make_sstable_containing(sst_gen, {*sorted_muts.rbegin()}).get();
|
||||
|
||||
t->add_sstable_and_update_cache(staging_sst).get();
|
||||
t->add_sstable_and_update_cache(regular_sst).get();
|
||||
@@ -7150,7 +7142,7 @@ void produces_optimal_filter_by_estimating_correctly_partitions_per_sstable_fn(t
|
||||
for (auto i = 0; i < keys; i++) {
|
||||
muts.push_back(make_insert(partition_key::from_exploded(*s, {to_bytes(shared_key_prefix + to_sstring(i))})));
|
||||
}
|
||||
auto sst = make_sstable_containing(sst_gen, std::move(muts));
|
||||
auto sst = make_sstable_containing(sst_gen, std::move(muts)).get();
|
||||
|
||||
testlog.info("index size: {}, data_size: {}", sst->index_size(), sst->ondisk_data_size());
|
||||
|
||||
@@ -7220,7 +7212,7 @@ void splitting_compaction_fn(test_env& env) {
|
||||
auto close_table = deferred_stop(t);
|
||||
t->start();
|
||||
|
||||
auto input = make_sstable_containing(sst_gen, std::move(muts));
|
||||
auto input = make_sstable_containing(sst_gen, std::move(muts)).get();
|
||||
|
||||
std::unordered_set<int64_t> groups;
|
||||
auto classify_fn = [&groups] (dht::token t) -> mutation_writer::token_group_id {
|
||||
@@ -7354,7 +7346,7 @@ void sstable_clone_leaving_unsealed_dest_sstable_fn(test_env& env) {
|
||||
|
||||
auto mut1 = mutation(s, pk);
|
||||
mut1.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp());
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)});
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)}).get();
|
||||
|
||||
auto table = env.make_table_for_tests(s);
|
||||
auto close_table = deferred_stop(table);
|
||||
@@ -7389,7 +7381,7 @@ void object_storage_sstable_clone_leaving_unsealed_dest_sstable(test_env& env) {
|
||||
|
||||
auto mut1 = mutation(s, pk);
|
||||
mut1.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp());
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)});
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)}).get();
|
||||
|
||||
auto table = env.make_table_for_tests(s);
|
||||
auto close_table = deferred_stop(table);
|
||||
@@ -7456,7 +7448,7 @@ void failure_when_adding_new_sstable_fn(test_env& env) {
|
||||
|
||||
auto mut1 = mutation(s, pk);
|
||||
mut1.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp());
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {mut1});
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {mut1}).get();
|
||||
|
||||
auto table = env.make_table_for_tests(s);
|
||||
auto close_table = deferred_stop(table);
|
||||
@@ -7467,8 +7459,8 @@ void failure_when_adding_new_sstable_fn(test_env& env) {
|
||||
// Verify new sstable was unlinked on failure.
|
||||
BOOST_REQUIRE(!sst->get_storage().exists(*sst, sstables::component_type::Data).get());
|
||||
|
||||
auto sst2 = make_sstable_containing(env.make_sstable(s), {mut1});
|
||||
auto sst3 = make_sstable_containing(env.make_sstable(s), {mut1});
|
||||
auto sst2 = make_sstable_containing(env.make_sstable(s), {mut1}).get();
|
||||
auto sst3 = make_sstable_containing(env.make_sstable(s), {mut1}).get();
|
||||
BOOST_REQUIRE_THROW(table->add_new_sstables_and_update_cache({sst2, sst3}, on_add).get(), std::runtime_error);
|
||||
|
||||
// Verify both sstables are unlinked on failure.
|
||||
@@ -7498,7 +7490,7 @@ static future<> test_perform_component_rewrite_single_sstable(compaction::compac
|
||||
|
||||
auto mut1 = mutation(s, pk);
|
||||
mut1.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp());
|
||||
auto original_sst = make_sstable_containing(env.make_sstable(s), {mut1});
|
||||
auto original_sst = make_sstable_containing(env.make_sstable(s), {mut1}).get();
|
||||
|
||||
BOOST_REQUIRE(original_sst->get_sstable_level() == 0);
|
||||
|
||||
@@ -7567,7 +7559,7 @@ SEASTAR_TEST_CASE(test_perform_component_rewrite_multiple_sstables) {
|
||||
auto pk = ss.make_pkey(i);
|
||||
auto mut = mutation(s, pk);
|
||||
mut.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp());
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {mut});
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {mut}).get();
|
||||
all_sstables.push_back(sst);
|
||||
}
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ static
|
||||
mutation_source make_sstable_mutation_source(sstables::test_env& env, schema_ptr s, sstring dir, utils::chunked_vector<mutation> mutations,
|
||||
sstable_writer_config cfg, sstables::sstable::version_types version, gc_clock::time_point query_time = gc_clock::now()) {
|
||||
auto sst = env.make_sstable(s, dir, env.new_generation(), version, sstable_format_types::big, default_sstable_buffer_size, to_db_clock(query_time));
|
||||
auto mt = make_memtable(s, mutations);
|
||||
auto mt = make_memtable(s, mutations).get();
|
||||
auto mr = mt->make_mutation_reader(s, env.make_reader_permit());
|
||||
sst->write_components(std::move(mr), mutations.size(), s, cfg, mt->get_encoding_stats()).get();
|
||||
sst->load(s->get_sharder()).get();
|
||||
|
||||
@@ -100,7 +100,7 @@ SEASTAR_TEST_CASE(datafile_generation_09) {
|
||||
mutation m(s, key);
|
||||
m.set_clustered_cell(c_key, r1_col, make_atomic_cell(int32_type, int32_type->decompose(1)));
|
||||
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {std::move(m)});
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {std::move(m)}).get();
|
||||
auto sst2 = env.reusable_sst(sst).get();
|
||||
|
||||
sstables::test(sst2).read_summary().get();
|
||||
@@ -151,7 +151,7 @@ SEASTAR_TEST_CASE(datafile_generation_11) {
|
||||
|
||||
m2.set_clustered_cell(c_key, set_col, set_mut_single.serialize(*set_col.type));
|
||||
|
||||
auto mt = make_memtable(s, {std::move(m), std::move(m2)});
|
||||
auto mt = make_memtable(s, {std::move(m), std::move(m2)}).get();
|
||||
|
||||
auto verifier = [s, set_col, c_key] (auto& mutation) {
|
||||
auto& mp = mutation->partition();
|
||||
@@ -189,14 +189,14 @@ SEASTAR_TEST_CASE(datafile_generation_11) {
|
||||
// The clustered set
|
||||
auto m = verifier(mutation);
|
||||
verify_set(m);
|
||||
});
|
||||
}).get();
|
||||
|
||||
verify_mutation(env, sstp, "key2", [&] (mutation_opt& mutation) {
|
||||
auto m = verifier(mutation);
|
||||
BOOST_REQUIRE(!m.tomb);
|
||||
BOOST_REQUIRE(m.cells.size() == 1);
|
||||
BOOST_REQUIRE(m.cells[0].first == to_bytes("4"));
|
||||
});
|
||||
}).get();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -212,7 +212,7 @@ SEASTAR_TEST_CASE(datafile_generation_12) {
|
||||
tombstone tomb(api::new_timestamp(), gc_clock::now());
|
||||
m.partition().apply_delete(*s, cp, tomb);
|
||||
|
||||
auto mt = make_memtable(s, {std::move(m)});
|
||||
auto mt = make_memtable(s, {std::move(m)}).get();
|
||||
|
||||
verify_mutation(env, env.make_sstable(s), mt, "key1", [&] (mutation_opt& mutation) {
|
||||
auto& mp = mutation->partition();
|
||||
@@ -220,7 +220,7 @@ SEASTAR_TEST_CASE(datafile_generation_12) {
|
||||
for (auto& rt: mp.row_tombstones()) {
|
||||
BOOST_REQUIRE(rt.tombstone().tomb == tomb);
|
||||
}
|
||||
});
|
||||
}).get();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -238,7 +238,7 @@ static future<> sstable_compression_test(compression_parameters::algorithm c) {
|
||||
|
||||
tombstone tomb(api::new_timestamp(), gc_clock::now());
|
||||
m.partition().apply_delete(*s, cp, tomb);
|
||||
auto mtp = make_memtable(s, {std::move(m)});
|
||||
auto mtp = make_memtable(s, {std::move(m)}).get();
|
||||
|
||||
verify_mutation(env, env.make_sstable(s), mtp, "key1", [&] (mutation_opt& mutation) {
|
||||
auto& mp = mutation->partition();
|
||||
@@ -246,7 +246,7 @@ static future<> sstable_compression_test(compression_parameters::algorithm c) {
|
||||
for (auto& rt: mp.row_tombstones()) {
|
||||
BOOST_REQUIRE(rt.tombstone().tomb == tomb);
|
||||
}
|
||||
});
|
||||
}).get();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -278,7 +278,7 @@ future<> test_datafile_generation_16(test_env_config cfg) {
|
||||
mtp->apply(std::move(m));
|
||||
}
|
||||
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), mtp);
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), mtp).get();
|
||||
// Not crashing is enough
|
||||
BOOST_REQUIRE(sst);
|
||||
sst->destroy().get();
|
||||
@@ -318,7 +318,7 @@ SEASTAR_TEST_CASE(datafile_generation_37) {
|
||||
const column_definition& cl2 = *s->get_column_definition("cl2");
|
||||
|
||||
m.set_clustered_cell(c_key, cl2, make_atomic_cell(bytes_type, bytes_type->decompose(data_value(to_bytes("cl2")))));
|
||||
auto mtp = make_memtable(s, {std::move(m)});
|
||||
auto mtp = make_memtable(s, {std::move(m)}).get();
|
||||
|
||||
verify_mutation(env, env.make_sstable(s), mtp, "key1", [&] (mutation_opt& mutation) {
|
||||
auto& mp = mutation->partition();
|
||||
@@ -327,7 +327,7 @@ SEASTAR_TEST_CASE(datafile_generation_37) {
|
||||
|
||||
auto& row = mp.clustered_row(*s, clustering);
|
||||
match_live_cell(row.cells(), *s, "cl2", data_value(to_bytes("cl2")));
|
||||
});
|
||||
}).get();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -342,7 +342,7 @@ SEASTAR_TEST_CASE(datafile_generation_38) {
|
||||
|
||||
const column_definition& cl3 = *s->get_column_definition("cl3");
|
||||
m.set_clustered_cell(c_key, cl3, make_atomic_cell(bytes_type, bytes_type->decompose(data_value(to_bytes("cl3")))));
|
||||
auto mtp = make_memtable(s, {std::move(m)});
|
||||
auto mtp = make_memtable(s, {std::move(m)}).get();
|
||||
|
||||
verify_mutation(env, env.make_sstable(s), mtp, "key1", [&] (mutation_opt& mutation) {
|
||||
auto& mp = mutation->partition();
|
||||
@@ -350,7 +350,7 @@ SEASTAR_TEST_CASE(datafile_generation_38) {
|
||||
|
||||
auto& row = mp.clustered_row(*s, clustering);
|
||||
match_live_cell(row.cells(), *s, "cl3", data_value(to_bytes("cl3")));
|
||||
});
|
||||
}).get();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -367,14 +367,14 @@ SEASTAR_TEST_CASE(datafile_generation_39) {
|
||||
m.set_clustered_cell(c_key, cl1, make_atomic_cell(bytes_type, bytes_type->decompose(data_value(to_bytes("cl1")))));
|
||||
const column_definition& cl2 = *s->get_column_definition("cl2");
|
||||
m.set_clustered_cell(c_key, cl2, make_atomic_cell(bytes_type, bytes_type->decompose(data_value(to_bytes("cl2")))));
|
||||
auto mtp = make_memtable(s, {std::move(m)});
|
||||
auto mtp = make_memtable(s, {std::move(m)}).get();
|
||||
|
||||
verify_mutation(env, env.make_sstable(s), mtp, "key1", [&] (mutation_opt& mutation) {
|
||||
auto& mp = mutation->partition();
|
||||
auto& row = mp.clustered_row(*s, clustering_key::make_empty());
|
||||
match_live_cell(row.cells(), *s, "cl1", data_value(data_value(to_bytes("cl1"))));
|
||||
match_live_cell(row.cells(), *s, "cl2", data_value(data_value(to_bytes("cl2"))));
|
||||
});
|
||||
}).get();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -392,14 +392,14 @@ SEASTAR_TEST_CASE(datafile_generation_41) {
|
||||
|
||||
tombstone tomb(api::new_timestamp(), gc_clock::now());
|
||||
m.partition().apply_delete(*s, std::move(c_key), tomb);
|
||||
auto mt = make_memtable(s, {std::move(m)});
|
||||
auto mt = make_memtable(s, {std::move(m)}).get();
|
||||
|
||||
verify_mutation(env, env.make_sstable(s), mt, "key1", [&] (mutation_opt& mutation) {
|
||||
auto& mp = mutation->partition();
|
||||
BOOST_REQUIRE(mp.clustered_rows().calculate_size() == 1);
|
||||
auto& c_row = *(mp.clustered_rows().begin());
|
||||
BOOST_REQUIRE(c_row.row().deleted_at().tomb() == tomb);
|
||||
});
|
||||
}).get();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -418,7 +418,7 @@ SEASTAR_TEST_CASE(datafile_generation_47) {
|
||||
mutation m(s, key);
|
||||
m.set_clustered_cell(c_key, r1_col, make_atomic_cell(utf8_type, bytes(512*1024, 'a')));
|
||||
|
||||
auto sstp = make_sstable_containing(env.make_sstable(s), {std::move(m)});
|
||||
auto sstp = make_sstable_containing(env.make_sstable(s), {std::move(m)}).get();
|
||||
auto reader = sstable_mutation_reader(sstp, s, env.make_reader_permit());
|
||||
auto close_reader = deferred_close(reader);
|
||||
while (reader().get()) {
|
||||
@@ -461,7 +461,7 @@ SEASTAR_TEST_CASE(test_counter_write) {
|
||||
|
||||
m.set_clustered_cell(c_key2, r1_col, make_dead_atomic_cell(1));
|
||||
|
||||
auto sstp = make_sstable_containing(env.make_sstable(s), {m});
|
||||
auto sstp = make_sstable_containing(env.make_sstable(s), {m}).get();
|
||||
assert_that(sstable_mutation_reader(sstp, s, env.make_reader_permit()))
|
||||
.produces(m)
|
||||
.produces_end_of_stream();
|
||||
@@ -860,7 +860,7 @@ SEASTAR_TEST_CASE(test_sstable_max_local_deletion_time) {
|
||||
make_atomic_cell(utf8_type, bytes("a"), 3600 + i, last_expiry));
|
||||
mt->apply(std::move(m));
|
||||
}
|
||||
auto sstp = make_sstable_containing(env.make_sstable(s, version), mt);
|
||||
auto sstp = make_sstable_containing(env.make_sstable(s, version), mt).get();
|
||||
BOOST_REQUIRE(last_expiry == sstp->get_stats_metadata().max_local_deletion_time);
|
||||
}
|
||||
});
|
||||
@@ -972,7 +972,7 @@ static void test_min_max_clustering_key(test_env& env, schema_ptr s, std::vector
|
||||
}
|
||||
}
|
||||
}
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), mt);
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), mt).get();
|
||||
check_min_max_column_names(sst, std::move(min_components), std::move(max_components));
|
||||
sst->unlink().get();
|
||||
}
|
||||
@@ -1103,7 +1103,7 @@ SEASTAR_TEST_CASE(sstable_tombstone_metadata_check) {
|
||||
mutation m(s, key);
|
||||
tombstone tomb(api::new_timestamp(), gc_clock::now());
|
||||
m.partition().apply_delete(*s, c_key, tomb);
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {"c1"}, {"c1"});
|
||||
}
|
||||
@@ -1111,7 +1111,7 @@ SEASTAR_TEST_CASE(sstable_tombstone_metadata_check) {
|
||||
{
|
||||
mutation m(s, key);
|
||||
m.set_clustered_cell(c_key, r1_col, make_dead_atomic_cell(3600));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {"c1"}, {"c1"});
|
||||
}
|
||||
@@ -1119,7 +1119,7 @@ SEASTAR_TEST_CASE(sstable_tombstone_metadata_check) {
|
||||
{
|
||||
mutation m(s, key);
|
||||
m.set_clustered_cell(c_key, r1_col, make_atomic_cell(int32_type, int32_type->decompose(1)));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(!sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {"c1"}, {"c1"});
|
||||
}
|
||||
@@ -1133,7 +1133,7 @@ SEASTAR_TEST_CASE(sstable_tombstone_metadata_check) {
|
||||
mutation m2(s, key2);
|
||||
m2.set_clustered_cell(c_key, r1_col, make_atomic_cell(int32_type, int32_type->decompose(1)));
|
||||
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m), std::move(m2)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m), std::move(m2)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {"c1"}, {"c1"});
|
||||
}
|
||||
@@ -1142,7 +1142,7 @@ SEASTAR_TEST_CASE(sstable_tombstone_metadata_check) {
|
||||
mutation m(s, key);
|
||||
tombstone tomb(api::new_timestamp(), gc_clock::now());
|
||||
m.partition().apply(tomb);
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {}, {});
|
||||
}
|
||||
@@ -1153,7 +1153,7 @@ SEASTAR_TEST_CASE(sstable_tombstone_metadata_check) {
|
||||
range_tombstone rt(clustering_key_prefix::from_single_value(*s, bytes(
|
||||
"a")), clustering_key_prefix::from_single_value(*s, bytes("a")), tomb);
|
||||
m.partition().apply_delete(*s, std::move(rt));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
if (version >= sstable_version_types::mc) {
|
||||
check_min_max_column_names(sst, {"a"}, {"a"});
|
||||
@@ -1169,7 +1169,7 @@ SEASTAR_TEST_CASE(sstable_tombstone_metadata_check) {
|
||||
clustering_key_prefix::from_single_value(*s, bytes("a")),
|
||||
tomb);
|
||||
m.partition().apply_delete(*s, std::move(rt));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
if (version >= sstable_version_types::mc) {
|
||||
check_min_max_column_names(sst, {"a"}, {"c1"});
|
||||
@@ -1185,7 +1185,7 @@ SEASTAR_TEST_CASE(sstable_tombstone_metadata_check) {
|
||||
clustering_key_prefix::from_single_value(*s, bytes("d")),
|
||||
tomb);
|
||||
m.partition().apply_delete(*s, std::move(rt));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
if (version >= sstable_version_types::mc) {
|
||||
check_min_max_column_names(sst, {"c"}, {"d"});
|
||||
@@ -1201,7 +1201,7 @@ SEASTAR_TEST_CASE(sstable_tombstone_metadata_check) {
|
||||
clustering_key_prefix::from_single_value(*s, bytes("z")),
|
||||
tomb);
|
||||
m.partition().apply_delete(*s, std::move(rt));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
if (version >= sstable_version_types::mc) {
|
||||
check_min_max_column_names(sst, {"c1"}, {"z"});
|
||||
@@ -1218,7 +1218,7 @@ SEASTAR_TEST_CASE(sstable_tombstone_metadata_check) {
|
||||
bound_view(clustering_key_prefix::from_single_value(*s, bytes("z")), bound_kind::incl_end),
|
||||
tomb);
|
||||
m.partition().apply_delete(*s, std::move(rt));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {}, {"z"});
|
||||
}
|
||||
@@ -1232,7 +1232,7 @@ SEASTAR_TEST_CASE(sstable_tombstone_metadata_check) {
|
||||
bound_view::top(),
|
||||
tomb);
|
||||
m.partition().apply_delete(*s, std::move(rt));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {"a"}, {});
|
||||
}
|
||||
@@ -1242,7 +1242,7 @@ SEASTAR_TEST_CASE(sstable_tombstone_metadata_check) {
|
||||
m.set_clustered_cell(c_key, r1_col, make_atomic_cell(int32_type, int32_type->decompose(1)));
|
||||
tombstone tomb(api::new_timestamp(), gc_clock::now());
|
||||
m.partition().apply_delete(*s, clustering_key_prefix::make_empty(), tomb);
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {}, {});
|
||||
}
|
||||
@@ -1271,7 +1271,7 @@ SEASTAR_TEST_CASE(sstable_composite_tombstone_metadata_check) {
|
||||
mutation m(s, key);
|
||||
tombstone tomb(api::new_timestamp(), gc_clock::now());
|
||||
m.partition().apply_delete(*s, c_key, tomb);
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {"c1", "c2"}, {"c1", "c2"});
|
||||
}
|
||||
@@ -1279,7 +1279,7 @@ SEASTAR_TEST_CASE(sstable_composite_tombstone_metadata_check) {
|
||||
{
|
||||
mutation m(s, key);
|
||||
m.set_clustered_cell(c_key, r1_col, make_dead_atomic_cell(3600));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {"c1", "c2"}, {"c1", "c2"});
|
||||
}
|
||||
@@ -1287,7 +1287,7 @@ SEASTAR_TEST_CASE(sstable_composite_tombstone_metadata_check) {
|
||||
{
|
||||
mutation m(s, key);
|
||||
m.set_clustered_cell(c_key, r1_col, make_atomic_cell(int32_type, int32_type->decompose(1)));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(!sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {"c1", "c2"}, {"c1", "c2"});
|
||||
}
|
||||
@@ -1301,7 +1301,7 @@ SEASTAR_TEST_CASE(sstable_composite_tombstone_metadata_check) {
|
||||
mutation m2(s, key2);
|
||||
m2.set_clustered_cell(c_key, r1_col, make_atomic_cell(int32_type, int32_type->decompose(1)));
|
||||
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m), std::move(m2)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m), std::move(m2)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {"c1", "c2"}, {"c1", "c2"});
|
||||
}
|
||||
@@ -1310,7 +1310,7 @@ SEASTAR_TEST_CASE(sstable_composite_tombstone_metadata_check) {
|
||||
mutation m(s, key);
|
||||
tombstone tomb(api::new_timestamp(), gc_clock::now());
|
||||
m.partition().apply(tomb);
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {}, {});
|
||||
}
|
||||
@@ -1323,7 +1323,7 @@ SEASTAR_TEST_CASE(sstable_composite_tombstone_metadata_check) {
|
||||
clustering_key_prefix::from_exploded(*s, {to_bytes("z"), to_bytes("zz")}),
|
||||
tomb);
|
||||
m.partition().apply_delete(*s, std::move(rt));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
if (version >= sstable_version_types::mc) {
|
||||
check_min_max_column_names(sst, {"a", "aa"}, {"z", "zz"});
|
||||
@@ -1339,7 +1339,7 @@ SEASTAR_TEST_CASE(sstable_composite_tombstone_metadata_check) {
|
||||
clustering_key_prefix::from_exploded(*s, {to_bytes("a"), to_bytes("zz")}),
|
||||
tomb);
|
||||
m.partition().apply_delete(*s, std::move(rt));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
if (version >= sstable_version_types::mc) {
|
||||
check_min_max_column_names(sst, {"a"}, {"c1", "c2"});
|
||||
@@ -1355,7 +1355,7 @@ SEASTAR_TEST_CASE(sstable_composite_tombstone_metadata_check) {
|
||||
clustering_key_prefix::from_exploded(*s, {to_bytes("c1"), to_bytes("zz")}),
|
||||
tomb);
|
||||
m.partition().apply_delete(*s, std::move(rt));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
if (version >= sstable_version_types::mc) {
|
||||
check_min_max_column_names(sst, {"c1", "aa"}, {"c1", "zz"});
|
||||
@@ -1371,7 +1371,7 @@ SEASTAR_TEST_CASE(sstable_composite_tombstone_metadata_check) {
|
||||
clustering_key_prefix::from_exploded(*s, {to_bytes("z"), to_bytes("zz")}),
|
||||
tomb);
|
||||
m.partition().apply_delete(*s, std::move(rt));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
if (version >= sstable_version_types::mc) {
|
||||
check_min_max_column_names(sst, {"c1", "c2"}, {"z", "zz"});
|
||||
@@ -1388,7 +1388,7 @@ SEASTAR_TEST_CASE(sstable_composite_tombstone_metadata_check) {
|
||||
bound_view(clustering_key_prefix::from_single_value(*s, bytes("z")), bound_kind::incl_end),
|
||||
tomb);
|
||||
m.partition().apply_delete(*s, std::move(rt));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {}, {"z"});
|
||||
}
|
||||
@@ -1402,7 +1402,7 @@ SEASTAR_TEST_CASE(sstable_composite_tombstone_metadata_check) {
|
||||
bound_view::top(),
|
||||
tomb);
|
||||
m.partition().apply_delete(*s, std::move(rt));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {"a"}, {});
|
||||
}
|
||||
@@ -1431,7 +1431,7 @@ SEASTAR_TEST_CASE(sstable_composite_reverse_tombstone_metadata_check) {
|
||||
mutation m(s, key);
|
||||
tombstone tomb(api::new_timestamp(), gc_clock::now());
|
||||
m.partition().apply_delete(*s, c_key, tomb);
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {"c1", "c2"}, {"c1", "c2"});
|
||||
}
|
||||
@@ -1439,7 +1439,7 @@ SEASTAR_TEST_CASE(sstable_composite_reverse_tombstone_metadata_check) {
|
||||
{
|
||||
mutation m(s, key);
|
||||
m.set_clustered_cell(c_key, r1_col, make_dead_atomic_cell(3600));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {"c1", "c2"}, {"c1", "c2"});
|
||||
}
|
||||
@@ -1447,7 +1447,7 @@ SEASTAR_TEST_CASE(sstable_composite_reverse_tombstone_metadata_check) {
|
||||
{
|
||||
mutation m(s, key);
|
||||
m.set_clustered_cell(c_key, r1_col, make_atomic_cell(int32_type, int32_type->decompose(1)));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(!sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {"c1", "c2"}, {"c1", "c2"});
|
||||
}
|
||||
@@ -1461,7 +1461,7 @@ SEASTAR_TEST_CASE(sstable_composite_reverse_tombstone_metadata_check) {
|
||||
mutation m2(s, key2);
|
||||
m2.set_clustered_cell(c_key, r1_col, make_atomic_cell(int32_type, int32_type->decompose(1)));
|
||||
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m), std::move(m2)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m), std::move(m2)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {"c1", "c2"}, {"c1", "c2"});
|
||||
}
|
||||
@@ -1470,7 +1470,7 @@ SEASTAR_TEST_CASE(sstable_composite_reverse_tombstone_metadata_check) {
|
||||
mutation m(s, key);
|
||||
tombstone tomb(api::new_timestamp(), gc_clock::now());
|
||||
m.partition().apply(tomb);
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {}, {});
|
||||
}
|
||||
@@ -1483,7 +1483,7 @@ SEASTAR_TEST_CASE(sstable_composite_reverse_tombstone_metadata_check) {
|
||||
clustering_key_prefix::from_exploded(*s, {to_bytes("a"), to_bytes("aa")}),
|
||||
tomb);
|
||||
m.partition().apply_delete(*s, std::move(rt));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
if (version >= sstable_version_types::mc) {
|
||||
check_min_max_column_names(sst, {"a", "zz"}, {"a", "aa"});
|
||||
@@ -1499,7 +1499,7 @@ SEASTAR_TEST_CASE(sstable_composite_reverse_tombstone_metadata_check) {
|
||||
clustering_key_prefix::from_exploded(*s, {to_bytes("a")}),
|
||||
tomb);
|
||||
m.partition().apply_delete(*s, std::move(rt));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
if (version >= sstable_version_types::mc) {
|
||||
check_min_max_column_names(sst, {"a", "zz"}, {"c1", "c2"});
|
||||
@@ -1515,7 +1515,7 @@ SEASTAR_TEST_CASE(sstable_composite_reverse_tombstone_metadata_check) {
|
||||
clustering_key_prefix::from_exploded(*s, {to_bytes("c1")}),
|
||||
tomb);
|
||||
m.partition().apply_delete(*s, std::move(rt));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
if (version >= sstable_version_types::mc) {
|
||||
check_min_max_column_names(sst, {"c1", "zz"}, {"c1"});
|
||||
@@ -1531,7 +1531,7 @@ SEASTAR_TEST_CASE(sstable_composite_reverse_tombstone_metadata_check) {
|
||||
clustering_key_prefix::from_exploded(*s, {to_bytes("c1"), to_bytes("d")}),
|
||||
tomb);
|
||||
m.partition().apply_delete(*s, std::move(rt));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
if (version >= sstable_version_types::mc) {
|
||||
check_min_max_column_names(sst, {"c1", "zz"}, {"c1", "c2"});
|
||||
@@ -1548,7 +1548,7 @@ SEASTAR_TEST_CASE(sstable_composite_reverse_tombstone_metadata_check) {
|
||||
bound_view(clustering_key_prefix::from_single_value(*s, bytes("z")), bound_kind::incl_end),
|
||||
tomb);
|
||||
m.partition().apply_delete(*s, std::move(rt));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {}, {"z"});
|
||||
}
|
||||
@@ -1562,7 +1562,7 @@ SEASTAR_TEST_CASE(sstable_composite_reverse_tombstone_metadata_check) {
|
||||
bound_view::top(),
|
||||
tomb);
|
||||
m.partition().apply_delete(*s, std::move(rt));
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)});
|
||||
auto sst = make_sstable_containing(sst_gen, {std::move(m)}).get();
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().estimated_tombstone_drop_time.bin.size());
|
||||
check_min_max_column_names(sst, {"a"}, {});
|
||||
}
|
||||
@@ -1988,7 +1988,7 @@ SEASTAR_TEST_CASE(sstable_tombstone_histogram_test) {
|
||||
mutations.push_back(make_delete(key));
|
||||
forward_jump_clocks(std::chrono::seconds(1));
|
||||
}
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), mutations);
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), mutations).get();
|
||||
auto histogram = sst->get_stats_metadata().estimated_tombstone_drop_time;
|
||||
sst = env.reusable_sst(sst).get();
|
||||
auto histogram2 = sst->get_stats_metadata().estimated_tombstone_drop_time;
|
||||
@@ -2040,7 +2040,7 @@ SEASTAR_TEST_CASE(sstable_owner_shards) {
|
||||
auto sst = env.make_sstable(std::move(schema));
|
||||
return sst;
|
||||
};
|
||||
auto sst = make_sstable_containing(sst_gen, std::move(muts));
|
||||
auto sst = make_sstable_containing(sst_gen, std::move(muts)).get();
|
||||
auto schema = schema_builder(s).with_sharder(smp_count, ignore_msb).build();
|
||||
sst = env.reusable_sst(std::move(schema), sst).get();
|
||||
return sst;
|
||||
@@ -2097,7 +2097,7 @@ SEASTAR_TEST_CASE(test_summary_entry_spanning_more_keys_than_min_interval) {
|
||||
}
|
||||
|
||||
auto version = sstable_version_types::me;
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), mutations);
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), mutations).get();
|
||||
|
||||
const summary& sum = sst->get_summary();
|
||||
BOOST_REQUIRE(sum.entries.size() == 1);
|
||||
@@ -2304,7 +2304,7 @@ SEASTAR_TEST_CASE(summary_rebuild_sanity) {
|
||||
}
|
||||
|
||||
auto version = sstable_version_types::me;
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), mutations);
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), mutations).get();
|
||||
|
||||
summary s1 = std::move(sstables::test(sst)._summary());
|
||||
BOOST_REQUIRE(!(bool)sstables::test(sst)._summary()); // make sure std::move above took place
|
||||
@@ -2353,7 +2353,7 @@ SEASTAR_TEST_CASE(sstable_partition_estimation_sanity_test) {
|
||||
auto key = to_bytes("key" + to_sstring(i));
|
||||
mutations.push_back(make_large_partition(partition_key::from_exploded(*s, {std::move(key)})));
|
||||
}
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), mutations);
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), mutations).get();
|
||||
|
||||
BOOST_REQUIRE(std::abs(int64_t(total_partitions) - int64_t(sst->get_estimated_key_count())) <= s->min_index_interval());
|
||||
}
|
||||
@@ -2366,7 +2366,7 @@ SEASTAR_TEST_CASE(sstable_partition_estimation_sanity_test) {
|
||||
auto key = to_bytes("key" + to_sstring(i));
|
||||
mutations.push_back(make_small_partition(partition_key::from_exploded(*s, {std::move(key)})));
|
||||
}
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), mutations);
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), mutations).get();
|
||||
|
||||
BOOST_REQUIRE(std::abs(int64_t(total_partitions) - int64_t(sst->get_estimated_key_count())) <= s->min_index_interval());
|
||||
}
|
||||
@@ -2393,7 +2393,7 @@ SEASTAR_TEST_CASE(sstable_timestamp_metadata_correcness_with_negative) {
|
||||
auto mut1 = make_insert(alpha, -50);
|
||||
auto mut2 = make_insert(beta, 5);
|
||||
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), {mut1, mut2});
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), {mut1, mut2}).get();
|
||||
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().min_timestamp == -50);
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().max_timestamp == 5);
|
||||
@@ -2469,7 +2469,7 @@ SEASTAR_TEST_CASE(sstable_run_clustering_disjoint_invariant_test) {
|
||||
}
|
||||
muts.push_back(std::move(mut));
|
||||
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), std::move(muts));
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), std::move(muts)).get();
|
||||
|
||||
BOOST_REQUIRE(sst->min_position().key() == first_ckey_prefix);
|
||||
BOOST_REQUIRE(sst->max_position().key() == last_ckey_prefix);
|
||||
@@ -2627,12 +2627,12 @@ SEASTAR_TEST_CASE(test_may_have_partition_tombstones) {
|
||||
ss.add_row(mut2, ss.make_ckey(6), "val");
|
||||
|
||||
{
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), {mut1, mut2});
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), {mut1, mut2}).get();
|
||||
BOOST_REQUIRE(!sst->may_have_partition_tombstones());
|
||||
}
|
||||
|
||||
mut2.partition().apply(ss.new_tombstone());
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), {mut1, mut2});
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), {mut1, mut2}).get();
|
||||
BOOST_REQUIRE(sst->may_have_partition_tombstones());
|
||||
}
|
||||
});
|
||||
@@ -2766,7 +2766,7 @@ SEASTAR_TEST_CASE(sstable_reader_with_timeout) {
|
||||
tombstone tomb(api::new_timestamp(), gc_clock::now());
|
||||
m.partition().apply_delete(*s, cp, tomb);
|
||||
|
||||
auto sstp = make_sstable_containing(env.make_sstable(s), {std::move(m)});
|
||||
auto sstp = make_sstable_containing(env.make_sstable(s), {std::move(m)}).get();
|
||||
auto pr = dht::partition_range::make_singular(make_dkey(s, "key1"));
|
||||
auto timeout = db::timeout_clock::now();
|
||||
auto rd = sstp->make_reader(s, env.make_reader_permit(timeout), pr, s->full_slice());
|
||||
@@ -2948,7 +2948,7 @@ SEASTAR_TEST_CASE(partial_sstable_deletion_test) {
|
||||
|
||||
auto mut1 = mutation(s, pks[0]);
|
||||
mut1.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp());
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)});
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)}).get();
|
||||
|
||||
// Rename TOC into TMP toc, to stress deletion path for partial files
|
||||
rename_file(test(sst).filename(sstables::component_type::TOC).native(), test(sst).filename(sstables::component_type::TemporaryTOC).native()).get();
|
||||
@@ -3024,7 +3024,7 @@ SEASTAR_TEST_CASE(test_full_scan_reader_out_of_range_last_range_tombstone_change
|
||||
using bound = query::clustering_range::bound;
|
||||
table.delete_range(mut, query::clustering_range::make(bound{ckeys[3], true}, bound{clustering_key::make_empty(), true}), tombstone(1, gc_clock::now()));
|
||||
|
||||
auto sst = make_sstable_containing(env.make_sstable(table.schema()), {mut});
|
||||
auto sst = make_sstable_containing(env.make_sstable(table.schema()), {mut}).get();
|
||||
|
||||
assert_that(sst->make_full_scan_reader(table.schema(), env.make_reader_permit())).has_monotonic_positions();
|
||||
});
|
||||
@@ -3045,7 +3045,7 @@ SEASTAR_TEST_CASE(test_full_scan_reader_random_schema_random_mutations) {
|
||||
|
||||
const auto muts = tests::generate_random_mutations(random_schema, 20).get();
|
||||
|
||||
auto sst = make_sstable_containing(env.make_sstable(schema), muts);
|
||||
auto sst = make_sstable_containing(env.make_sstable(schema), muts).get();
|
||||
|
||||
{
|
||||
auto rd = assert_that(sst->make_full_scan_reader(schema, env.make_reader_permit()));
|
||||
@@ -3109,7 +3109,7 @@ SEASTAR_TEST_CASE(find_first_position_in_partition_from_sstable_test) {
|
||||
}
|
||||
muts.push_back(std::move(mut1));
|
||||
}
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), std::move(muts));
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), std::move(muts)).get();
|
||||
position_in_partition::equal_compare eq(*s);
|
||||
if (!with_static_row) {
|
||||
BOOST_REQUIRE(sst->min_position().key() == first_position->key());
|
||||
@@ -3158,7 +3158,7 @@ future<> test_sstable_bytes_correctness(sstring tname, test_env_config cfg) {
|
||||
|
||||
const auto muts = tests::generate_random_mutations(random_schema, 20).get();
|
||||
|
||||
auto sst = make_sstable_containing(env.make_sstable(schema), muts);
|
||||
auto sst = make_sstable_containing(env.make_sstable(schema), muts).get();
|
||||
|
||||
auto free_space = sst->get_storage().free_space().get();
|
||||
BOOST_REQUIRE(free_space > 0);
|
||||
@@ -3209,7 +3209,7 @@ SEASTAR_TEST_CASE(test_sstable_set_predicate) {
|
||||
|
||||
const auto muts = tests::generate_random_mutations(random_schema, 20).get();
|
||||
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), muts);
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), muts).get();
|
||||
|
||||
auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::leveled, s->compaction_strategy_options());
|
||||
sstable_set set = env.make_sstable_set(cs, s);
|
||||
@@ -3287,7 +3287,7 @@ SEASTAR_TEST_CASE(sstable_identifier_correctness) {
|
||||
|
||||
auto mut1 = mutation(s, pks[0]);
|
||||
mut1.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp());
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)});
|
||||
auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)}).get();
|
||||
|
||||
BOOST_REQUIRE(sst->sstable_identifier());
|
||||
BOOST_REQUIRE_EQUAL(sst->sstable_identifier()->uuid(), sst->generation().as_uuid());
|
||||
|
||||
@@ -67,7 +67,7 @@ make_sstable_for_this_shard(std::function<sstables::shared_sstable()> sst_factor
|
||||
auto key = tests::generate_partition_key(s);
|
||||
mutation m(s, key);
|
||||
m.set_clustered_cell(clustering_key::make_empty(), bytes("c"), data_value(int32_t(0)), api::timestamp_type(0));
|
||||
return make_sstable_containing(sst_factory, {m});
|
||||
return make_sstable_containing(sst_factory, {m}).get();
|
||||
}
|
||||
|
||||
/// Create a shared SSTable belonging to all shards for the following schema: "create table cf (p text PRIMARY KEY, c int)"
|
||||
|
||||
@@ -309,7 +309,7 @@ SEASTAR_TEST_CASE(test_inexact_partition_index_range_query) {
|
||||
}
|
||||
|
||||
// Generate the sstable.
|
||||
auto sst = make_sstable_containing(env.make_sstable(table.schema(), sstable_version_types::me), muts);
|
||||
auto sst = make_sstable_containing(env.make_sstable(table.schema(), sstable_version_types::me), muts).get();
|
||||
|
||||
// Use the index to find key positions.
|
||||
std::vector<uint64_t> partition_positions = get_partition_positions(sst, permit);
|
||||
@@ -482,7 +482,7 @@ SEASTAR_TEST_CASE(test_inexact_partition_index_singular_query) {
|
||||
}
|
||||
|
||||
// Generate the sstable.
|
||||
auto sst = make_sstable_containing(env.make_sstable(table.schema()), muts);
|
||||
auto sst = make_sstable_containing(env.make_sstable(table.schema()), muts).get();
|
||||
|
||||
// Use the index to find key positions.
|
||||
std::vector<uint64_t> partition_positions = get_partition_positions(sst, permit);
|
||||
|
||||
@@ -155,7 +155,7 @@ SEASTAR_THREAD_TEST_CASE(test_sstable_clone_preserves_staging_state) {
|
||||
auto schema = ss.schema();
|
||||
|
||||
// Create an sstable in normal state.
|
||||
auto sst = make_sstable_containing(env.make_sst_factory(schema), {ss.new_mutation("key1")});
|
||||
auto sst = make_sstable_containing(env.make_sst_factory(schema), {ss.new_mutation("key1")}).get();
|
||||
|
||||
// Move it to staging state.
|
||||
sst->change_state(sstable_state::staging).get();
|
||||
|
||||
@@ -350,7 +350,7 @@ SEASTAR_TEST_CASE(read_partial_range_2) {
|
||||
static
|
||||
mutation_source make_sstable_mutation_source(sstables::test_env& env, schema_ptr s, utils::chunked_vector<mutation> mutations,
|
||||
sstables::sstable::version_types version, db_clock::time_point query_time = db_clock::now()) {
|
||||
return make_sstable_easy(env, make_memtable(s, mutations), env.manager().configure_writer(), version, mutations.size(), query_time)->as_mutation_source();
|
||||
return make_sstable_easy(env, make_memtable(s, mutations).get(), env.manager().configure_writer(), version, mutations.size(), query_time)->as_mutation_source();
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_sstable_can_write_and_read_range_tombstone) {
|
||||
@@ -370,7 +370,7 @@ SEASTAR_TEST_CASE(test_sstable_can_write_and_read_range_tombstone) {
|
||||
auto ttl = gc_clock::now() + std::chrono::seconds(1);
|
||||
m.partition().apply_delete(*s, range_tombstone(c_key_start, bound_kind::excl_start, c_key_end, bound_kind::excl_end, tombstone(9, ttl)));
|
||||
|
||||
auto mt = make_memtable(s, {std::move(m)});
|
||||
auto mt = make_memtable(s, {std::move(m)}).get();
|
||||
|
||||
verify_mutation(env, env.make_sstable(s), mt, query::full_partition_range, [&] (mutation_opt& mut) {
|
||||
BOOST_REQUIRE(bool(mut));
|
||||
@@ -749,7 +749,7 @@ SEASTAR_TEST_CASE(test_non_compound_table_row_is_not_marked_as_static) {
|
||||
auto cell = atomic_cell::make_live(*int32_type, 1, int32_type->decompose(17), { });
|
||||
m.set_clustered_cell(ck, *s->get_column_definition("v"), std::move(cell));
|
||||
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), {std::move(m)});
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), {std::move(m)}).get();
|
||||
auto mut = with_closeable(sst->make_reader(s, env.make_reader_permit(), query::full_partition_range, s->full_slice()), [] (auto& mr) {
|
||||
return read_mutation_from_mutation_reader(mr);
|
||||
}).get();
|
||||
@@ -774,7 +774,7 @@ SEASTAR_TEST_CASE(test_has_partition_key) {
|
||||
auto cell = atomic_cell::make_live(*int32_type, 1, int32_type->decompose(17), { });
|
||||
m.set_clustered_cell(ck, *s->get_column_definition("v"), std::move(cell));
|
||||
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), {std::move(m)});
|
||||
auto sst = make_sstable_containing(env.make_sstable(s, version), {std::move(m)}).get();
|
||||
auto hk = sstables::sstable::make_hashed_key(*s, dk.key());
|
||||
auto mr = sst->make_reader(s, env.make_reader_permit(), query::full_partition_range, s->full_slice());
|
||||
auto close_mr = deferred_close(mr);
|
||||
@@ -825,7 +825,7 @@ SEASTAR_TEST_CASE(test_promoted_index_blocks_are_monotonic) {
|
||||
bound_kind::incl_end,
|
||||
{1, gc_clock::now()}));
|
||||
|
||||
auto mt = make_memtable(s, {std::move(m)});
|
||||
auto mt = make_memtable(s, {std::move(m)}).get();
|
||||
sstable_writer_config cfg = env.manager().configure_writer();
|
||||
cfg.promoted_index_block_size = 1;
|
||||
cfg.promoted_index_auto_scale_threshold = 0; // disable auto-scaling
|
||||
@@ -860,7 +860,7 @@ SEASTAR_TEST_CASE(test_promoted_index_blocks_are_monotonic_with_auto_scaling) {
|
||||
bound_kind::incl_end,
|
||||
{1, gc_clock::now()}));
|
||||
|
||||
auto mt = make_memtable(s, {std::move(m)});
|
||||
auto mt = make_memtable(s, {std::move(m)}).get();
|
||||
sstable_writer_config cfg = env.manager().configure_writer();
|
||||
cfg.promoted_index_block_size = 1;
|
||||
cfg.promoted_index_auto_scale_threshold = 100; // set to a low value to trigger auto-scaling
|
||||
@@ -903,7 +903,7 @@ SEASTAR_TEST_CASE(test_promoted_index_blocks_are_monotonic_compound_dense) {
|
||||
bound_kind::incl_end,
|
||||
{1, gc_clock::now()}));
|
||||
|
||||
auto mt = make_memtable(s, {m});
|
||||
auto mt = make_memtable(s, {m}).get();
|
||||
sstable_writer_config cfg = env.manager().configure_writer();
|
||||
cfg.promoted_index_block_size = 1;
|
||||
|
||||
@@ -952,7 +952,7 @@ SEASTAR_TEST_CASE(test_promoted_index_blocks_are_monotonic_non_compound_dense) {
|
||||
bound_kind::incl_end,
|
||||
{1, gc_clock::now()}));
|
||||
|
||||
auto mt = make_memtable(s, {m});
|
||||
auto mt = make_memtable(s, {m}).get();
|
||||
sstable_writer_config cfg = env.manager().configure_writer();
|
||||
cfg.promoted_index_block_size = 1;
|
||||
|
||||
@@ -998,7 +998,7 @@ SEASTAR_TEST_CASE(test_promoted_index_repeats_open_tombstones) {
|
||||
auto ck = clustering_key::from_exploded(*s, {bytes_type->decompose(data_value(to_bytes("ck3")))});
|
||||
m.set_clustered_cell(ck, *s->get_column_definition("v"), atomic_cell(*int32_type, cell));
|
||||
|
||||
auto mt = make_memtable(s, {m});
|
||||
auto mt = make_memtable(s, {m}).get();
|
||||
sstable_writer_config cfg = env.manager().configure_writer();
|
||||
cfg.promoted_index_block_size = 1;
|
||||
|
||||
@@ -1034,7 +1034,7 @@ SEASTAR_TEST_CASE(test_range_tombstones_are_correctly_seralized_for_non_compound
|
||||
bound_kind::incl_end,
|
||||
{1, gc_clock::now()}));
|
||||
|
||||
auto mt = make_memtable(s, {m});
|
||||
auto mt = make_memtable(s, {m}).get();
|
||||
sstable_writer_config cfg = env.manager().configure_writer();
|
||||
|
||||
auto sst = make_sstable_easy(env, mt, cfg, version);
|
||||
@@ -1063,7 +1063,7 @@ SEASTAR_TEST_CASE(test_promoted_index_is_absent_for_schemas_without_clustering_k
|
||||
auto cell = atomic_cell::make_live(*int32_type, 1, int32_type->decompose(v), { });
|
||||
m.set_clustered_cell(clustering_key_prefix::make_empty(), *s->get_column_definition("v"), atomic_cell(*int32_type, cell));
|
||||
}
|
||||
auto mt = make_memtable(s, {m});
|
||||
auto mt = make_memtable(s, {m}).get();
|
||||
sstable_writer_config cfg = env.manager().configure_writer();
|
||||
cfg.promoted_index_block_size = 1;
|
||||
|
||||
@@ -1097,8 +1097,8 @@ SEASTAR_TEST_CASE(test_writing_combined_stream_with_tombstones_at_the_same_posit
|
||||
m2.partition().apply_delete(*s, rt2);
|
||||
ss.add_row(m2, ss.make_ckey(4), "v2"); // position inside rt2
|
||||
|
||||
auto mt1 = make_memtable(s, {m1});
|
||||
auto mt2 = make_memtable(s, {m2});
|
||||
auto mt1 = make_memtable(s, {m1}).get();
|
||||
auto mt2 = make_memtable(s, {m2}).get();
|
||||
auto combined_permit = env.make_reader_permit();
|
||||
auto mr = make_combined_reader(s, combined_permit,
|
||||
mt1->make_mutation_reader(s, combined_permit), mt2->make_mutation_reader(s, combined_permit));
|
||||
@@ -1323,7 +1323,7 @@ SEASTAR_TEST_CASE(test_reading_serialization_header) {
|
||||
tests::data_model::mutation_description::atomic_value(random_int32_value(), tests::data_model::data_timestamp, ttl, expiry_time));
|
||||
auto m2 = md2.build(s);
|
||||
|
||||
auto mt = make_memtable(s, {m1, m2});
|
||||
auto mt = make_memtable(s, {m1, m2}).get();
|
||||
auto md1_overwrite = tests::data_model::mutation_description({ to_bytes("pk1") });
|
||||
md1_overwrite.add_clustered_row_marker({ to_bytes("ck1") }, 10);
|
||||
auto m1ow = md1_overwrite.build(s);
|
||||
@@ -1422,7 +1422,7 @@ SEASTAR_TEST_CASE(test_counter_header_size) {
|
||||
}
|
||||
m.set_clustered_cell(ck, col, ccb.build(api::new_timestamp()));
|
||||
|
||||
auto mt = make_memtable(s, {m});
|
||||
auto mt = make_memtable(s, {m}).get();
|
||||
for (const auto version : writable_sstable_versions) {
|
||||
auto sst = make_sstable_easy(env, mt, env.manager().configure_writer(), version);
|
||||
assert_that(sst->as_mutation_source().make_mutation_reader(s, env.make_reader_permit()))
|
||||
|
||||
@@ -74,7 +74,7 @@ void run_sstable_resharding_test(sstables::test_env& env) {
|
||||
mt->apply(std::move(m));
|
||||
}
|
||||
}
|
||||
return make_sstable_containing(env.make_sstable(s, version), mt);
|
||||
return make_sstable_containing(env.make_sstable(s, version), mt).get();
|
||||
});
|
||||
|
||||
// FIXME: sstable write has a limitation in which it will generate sharding metadata only
|
||||
@@ -185,7 +185,7 @@ SEASTAR_TEST_CASE(sstable_is_shared_correctness) {
|
||||
muts.push_back(get_mutation(s, k, 0));
|
||||
}
|
||||
|
||||
auto sst = make_sstable_containing(sst_gen, muts);
|
||||
auto sst = make_sstable_containing(sst_gen, muts).get();
|
||||
BOOST_REQUIRE(!sst->is_shared());
|
||||
assert_sstable_computes_correct_owners(env, sst).get();
|
||||
}
|
||||
@@ -205,7 +205,7 @@ SEASTAR_TEST_CASE(sstable_is_shared_correctness) {
|
||||
}
|
||||
}
|
||||
|
||||
auto sst = make_sstable_containing(sst_gen, muts);
|
||||
auto sst = make_sstable_containing(sst_gen, muts).get();
|
||||
BOOST_REQUIRE(!sst->is_shared());
|
||||
|
||||
auto all_shards_s = get_schema(smp::count, cfg->murmur3_partitioner_ignore_msb_bits());
|
||||
|
||||
@@ -431,7 +431,7 @@ SEASTAR_TEST_CASE(statistics_rewrite) {
|
||||
auto schema = random_schema.schema();
|
||||
|
||||
const auto muts = tests::generate_random_mutations(random_schema, 2).get();
|
||||
auto sstp = make_sstable_containing(env.make_sstable(schema, sstable::version_types::me), muts);
|
||||
auto sstp = make_sstable_containing(env.make_sstable(schema, sstable::version_types::me), muts).get();
|
||||
|
||||
auto toc_path = fmt::to_string(sstp->toc_filename());
|
||||
auto dir_path = std::filesystem::path(toc_path).parent_path().string();
|
||||
@@ -920,7 +920,7 @@ static future<> test_component_digest_persistence(component_type component, ssta
|
||||
auto schema = random_schema.schema();
|
||||
|
||||
const auto muts = tests::generate_random_mutations(random_schema, 2).get();
|
||||
auto sst_original = make_sstable_containing(env.make_sstable(schema, version), muts);
|
||||
auto sst_original = make_sstable_containing(env.make_sstable(schema, version), muts).get();
|
||||
|
||||
auto& components = sstables::test(sst_original).get_components();
|
||||
bool has_component = components.find(component) != components.end();
|
||||
@@ -1046,7 +1046,7 @@ static future<> test_component_digest_validation(component_type component, sstab
|
||||
auto schema = random_schema.schema();
|
||||
|
||||
const auto muts = tests::generate_random_mutations(random_schema, 2).get();
|
||||
auto sst = make_sstable_containing(env.make_sstable(schema, version), muts);
|
||||
auto sst = make_sstable_containing(env.make_sstable(schema, version), muts).get();
|
||||
|
||||
auto digest = sst->get_component_digest(component);
|
||||
BOOST_REQUIRE(digest.has_value());
|
||||
|
||||
@@ -336,14 +336,14 @@ inline dht::decorated_key make_dkey(schema_ptr s, bytes b)
|
||||
}
|
||||
|
||||
// Must be called from a seastar thread.
|
||||
shared_sstable verify_mutation(test_env& env, shared_sstable sst, lw_shared_ptr<replica::memtable> mt, bytes key, std::function<void(mutation_opt&)> verify);
|
||||
inline shared_sstable verify_mutation(test_env& env, std::function<shared_sstable()> sst_gen, lw_shared_ptr<replica::memtable> mt, bytes key, std::function<void(mutation_opt&)> verify) {
|
||||
future<sstables::shared_sstable> verify_mutation(test_env& env, shared_sstable sst, lw_shared_ptr<replica::memtable> mt, bytes key, std::function<void(mutation_opt&)> verify);
|
||||
inline future<sstables::shared_sstable> verify_mutation(test_env& env, std::function<shared_sstable()> sst_gen, lw_shared_ptr<replica::memtable> mt, bytes key, std::function<void(mutation_opt&)> verify) {
|
||||
return verify_mutation(env, sst_gen(), std::move(mt), std::move(key), std::move(verify));
|
||||
}
|
||||
shared_sstable verify_mutation(test_env& env, shared_sstable sstp, bytes key, std::function<void(mutation_opt&)> verify);
|
||||
future<sstables::shared_sstable> verify_mutation(test_env& env, shared_sstable sstp, bytes key, std::function<void(mutation_opt&)> verify);
|
||||
|
||||
shared_sstable verify_mutation(test_env& env, shared_sstable sst, lw_shared_ptr<replica::memtable> mt, dht::partition_range pr, std::function<stop_iteration(mutation_opt&)> verify);
|
||||
inline shared_sstable verify_mutation(test_env& env, std::function<shared_sstable()> sst_gen, lw_shared_ptr<replica::memtable> mt, dht::partition_range pr, std::function<stop_iteration(mutation_opt&)> verify) {
|
||||
future<sstables::shared_sstable> verify_mutation(test_env& env, shared_sstable sst, lw_shared_ptr<replica::memtable> mt, dht::partition_range pr, std::function<stop_iteration(mutation_opt&)> verify);
|
||||
inline future<sstables::shared_sstable> verify_mutation(test_env& env, std::function<shared_sstable()> sst_gen, lw_shared_ptr<replica::memtable> mt, dht::partition_range pr, std::function<stop_iteration(mutation_opt&)> verify) {
|
||||
return verify_mutation(env, sst_gen(), std::move(mt), std::move(pr), std::move(verify));
|
||||
}
|
||||
shared_sstable verify_mutation(test_env& env, shared_sstable sstp, dht::partition_range pr, std::function<stop_iteration(mutation_opt&)> verify);
|
||||
future<sstables::shared_sstable> verify_mutation(test_env& env, shared_sstable sstp, dht::partition_range pr, std::function<stop_iteration(mutation_opt&)> verify);
|
||||
|
||||
@@ -32,7 +32,7 @@ custom_args:
|
||||
sstable_datafile_test:
|
||||
- '-c1 -m2G'
|
||||
sstable_compaction_test:
|
||||
- '-c1 -m2G --logger-log-level compaction=debug --logger-log-level compaction_manager=debug --logger-log-level s3=debug --logger-log-level gcp_storage=debug'
|
||||
- '-c1 -m2G --logger-log-level compaction=debug --logger-log-level compaction_manager=debug --logger-log-level s3=trace --logger-log-level gcp_storage=trace --logger-log-level http=trace --logger-log-level default_http_retry_strategy=trace'
|
||||
sstable_3_x_test:
|
||||
- '-c1 -m2G'
|
||||
cql_query_test:
|
||||
|
||||
@@ -830,7 +830,7 @@ SEASTAR_THREAD_TEST_CASE(test_view_update_generator_buffering) {
|
||||
|
||||
auto permit = sem.obtain_permit(schema, get_name(), replica::new_reader_base_cost, db::no_timeout, {}).get();
|
||||
|
||||
auto mt = make_memtable(schema, muts);
|
||||
auto mt = make_memtable(schema, muts).get();
|
||||
auto p = make_manually_paused_evictable_reader(
|
||||
mt->as_data_source(),
|
||||
schema,
|
||||
@@ -930,7 +930,7 @@ SEASTAR_THREAD_TEST_CASE(test_view_update_generator_buffering_with_random_mutati
|
||||
reader_concurrency_semaphore sem(reader_concurrency_semaphore::for_tests{}, get_name(), 1, replica::new_reader_base_cost);
|
||||
auto stop_sem = deferred_stop(sem);
|
||||
const abort_source as;
|
||||
auto mt = make_memtable(schema, {mut});
|
||||
auto mt = make_memtable(schema, {mut}).get();
|
||||
auto permit = sem.obtain_permit(schema, get_name(), replica::new_reader_base_cost, db::no_timeout, {}).get();
|
||||
auto p = make_manually_paused_evictable_reader(
|
||||
mt->as_data_source(),
|
||||
|
||||
@@ -38,7 +38,7 @@ public:
|
||||
|
||||
virtual future<> execute(reader_permit permit, db::result_collector& rc) override {
|
||||
return async([this, permit, &rc] {
|
||||
auto mt = make_memtable(_s, _mutations);
|
||||
auto mt = make_memtable(_s, _mutations).get();
|
||||
auto rdr = mt->make_mutation_reader(_s, permit);
|
||||
auto close_rdr = deferred_close(rdr);
|
||||
rdr.consume_pausable([&rc] (mutation_fragment_v2 mf) {
|
||||
|
||||
@@ -0,0 +1,65 @@
|
||||
#
|
||||
# Copyright (C) 2026-present ScyllaDB
|
||||
#
|
||||
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
|
||||
#
|
||||
|
||||
import pytest
|
||||
import logging
|
||||
|
||||
from test.pylib.manager_client import ManagerClient
|
||||
from test.cluster.auth_cluster import extra_scylla_config_options as auth_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_permissions_removal_and_restart(manager: ManagerClient) -> None:
|
||||
"""Test that a node boots successfully when role_permissions contains a
|
||||
ghost row with role and resource set but the permissions column missing.
|
||||
|
||||
The auth v2 migration (now removed) used INSERT to copy permission rows
|
||||
from the legacy table, which created CQL row markers. Normal GRANT uses
|
||||
UPDATE, which only writes collection cells without row markers. When
|
||||
permissions were later revoked, the collection cells were tombstoned but
|
||||
the row marker from the migration INSERT persisted. That leaves a row
|
||||
with role and resource but no permissions column.
|
||||
|
||||
This test simulates that scenario:
|
||||
1. INSERT permissions with row marker (simulating auth v2 migration)
|
||||
2. REVOKE ALL permissions (tombstones the cells, marker survives)
|
||||
3. Restart and verify the node boots successfully
|
||||
"""
|
||||
servers = await manager.servers_add(1, config=auth_config)
|
||||
cql, _ = await manager.get_ready_cql(servers)
|
||||
server = servers[0]
|
||||
|
||||
await cql.run_async("CREATE ROLE scylla_admin WITH PASSWORD = 'x' AND LOGIN = true")
|
||||
await cql.run_async("CREATE ROLE scylla_manager WITH PASSWORD = 'x' AND LOGIN = true")
|
||||
|
||||
# Simulate auth v2 migration: INSERT creates a row marker alongside the
|
||||
# permission cells, unlike GRANT which uses UPDATE (no row marker).
|
||||
await cql.run_async(
|
||||
"INSERT INTO system.role_permissions (role, resource, permissions) "
|
||||
"VALUES ('scylla_admin', 'roles/scylla_manager', {'ALTER', 'AUTHORIZE', 'DROP'})")
|
||||
|
||||
# Revoke all permissions — tombstones the collection cells, but the
|
||||
# row marker from the INSERT survives, creating a ghost row.
|
||||
await cql.run_async("REVOKE ALL ON ROLE scylla_manager FROM scylla_admin")
|
||||
|
||||
# Additional check: a row with an explicitly empty permissions set.
|
||||
await cql.run_async("CREATE ROLE test_empty_perms WITH PASSWORD = 'x' AND LOGIN = true")
|
||||
await cql.run_async(
|
||||
"INSERT INTO system.role_permissions (role, resource) "
|
||||
"VALUES ('test_empty_perms', 'roles/scylla_manager')")
|
||||
|
||||
# Restart — the auth cache loads the ghost row and must not crash
|
||||
logger.info("Restarting node")
|
||||
await manager.server_stop_gracefully(server.server_id)
|
||||
await manager.server_start(server.server_id)
|
||||
|
||||
await manager.driver_connect()
|
||||
cql, _ = await manager.get_ready_cql(servers)
|
||||
rows = await cql.run_async("SELECT * FROM system.local")
|
||||
assert len(rows) == 1, "Node should be functional after restart"
|
||||
logger.info("Node restarted successfully")
|
||||
@@ -398,4 +398,4 @@ async def key_provider(request, tmpdir, scylla_binary):
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def failure_detector_timeout(build_mode):
|
||||
return 2000 * MODES_TIMEOUT_FACTOR[build_mode]
|
||||
return 5000 * MODES_TIMEOUT_FACTOR[build_mode]
|
||||
|
||||
@@ -94,7 +94,7 @@ class TestCommitLog(Tester):
|
||||
files = glob.glob(f"{path}/*CommitLog-*.log")
|
||||
if not files:
|
||||
return 0, "" if include_stdout else 0
|
||||
cmd_args = ["du", "-m"]
|
||||
cmd_args = ["du", "-b"]
|
||||
cmd_args.extend(files)
|
||||
p = subprocess.Popen(cmd_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
stdout, stderr = p.communicate()
|
||||
@@ -118,7 +118,7 @@ class TestCommitLog(Tester):
|
||||
size += int(a[0])
|
||||
else:
|
||||
logger.warning(f"Unrecognized du output line: {l}")
|
||||
return size, stdout.decode() if include_stdout else size
|
||||
return size / (1024*1024), stdout.decode() if include_stdout else size
|
||||
|
||||
def _segment_size_test(self, segment_size_in_mb, compressed=False):
|
||||
"""Execute a basic commitlog test and validate the commitlog files"""
|
||||
@@ -474,12 +474,12 @@ class TestCommitLog(Tester):
|
||||
reach_threshold_cases = []
|
||||
# Scylla allows to create one more commitlog file out of the space limit
|
||||
# and commitlog segments may be go over commitlog_segment_size_in_mb in 1MB as well.
|
||||
actual_space_limit = (total_space_limit // commitlog_segment_size_in_mb + 1) * (commitlog_segment_size_in_mb + 1)
|
||||
actual_space_limit = total_space_limit + commitlog_segment_size_in_mb + 1
|
||||
|
||||
def check_commitlog_size(allow_errors: bool):
|
||||
dir_size, stdout = self._get_commitlog_size(include_stdout=True, allow_errors=allow_errors)
|
||||
if dir_size > actual_space_limit and not allow_errors:
|
||||
logger.debug(f"Commitlog file sizes in MB:\n{stdout}")
|
||||
logger.info(f"Commitlog file sizes in MB:\n{stdout}")
|
||||
assert dir_size <= actual_space_limit, f"Out of total space limit\n"
|
||||
return dir_size
|
||||
|
||||
|
||||
@@ -60,8 +60,8 @@ async def insert_with_concurrency(cql, table, value_count, concurrency):
|
||||
@pytest.mark.skip_mode(mode='release', reason="error injections aren't enabled in release mode")
|
||||
async def test_delete_partition_rows_from_table_with_mv(manager: ManagerClient) -> None:
|
||||
node_count = 2
|
||||
await manager.servers_add(node_count, config={'error_injections_at_startup': ['view_update_limit', 'delay_before_remote_view_update']})
|
||||
cql = manager.get_cql()
|
||||
servers = await manager.servers_add(node_count, config={'error_injections_at_startup': ['view_update_limit', 'delay_before_remote_view_update', 'update_backlog_immediately']})
|
||||
cql, hosts = await manager.get_ready_cql(servers)
|
||||
async with new_test_keyspace(manager, "WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1}") as ks:
|
||||
await cql.run_async(f"CREATE TABLE {ks}.tab (key int, c int, PRIMARY KEY (key, c))")
|
||||
await insert_with_concurrency(cql, f"{ks}.tab", 200, 100)
|
||||
@@ -71,8 +71,13 @@ async def test_delete_partition_rows_from_table_with_mv(manager: ManagerClient)
|
||||
|
||||
await wait_for_view(cql, "mv_cf_view", node_count)
|
||||
|
||||
# The view building process elevates the view update backlog, potentially above the limit.
|
||||
# When the view is build it should drop back down to 0 but this information may not reach
|
||||
# the coordinator before the delete, so we perform an additional write on the same host before
|
||||
# the delete - the current view update backlog will be propagated along the write response.
|
||||
await cql.run_async(f"INSERT INTO {ks}.tab (key, c) VALUES (0, 999)", host=hosts[0], timeout=300)
|
||||
logger.info(f"Deleting all rows from partition with key 0")
|
||||
await cql.run_async(f"DELETE FROM {ks}.tab WHERE key = 0", timeout=300)
|
||||
await cql.run_async(f"DELETE FROM {ks}.tab WHERE key = 0", host=hosts[0], timeout=300)
|
||||
|
||||
# Test deleting a large partition when there is a view with the same partition
|
||||
# key, and verify that view updates metrics is increased by exactly 1. Deleting
|
||||
|
||||
@@ -1379,7 +1379,7 @@ async def test_alternator_invalid_shard_for_lwt(manager: ManagerClient):
|
||||
# The next barrier must be for the write_both_read_new, we need a guarantee
|
||||
# that the src_shard observed it
|
||||
logger.info("Waiting for the next barrier")
|
||||
await log.wait_for(re.escape(f"[shard {src_shard}: gms] raft_topology - raft_topology_cmd::barrier_and_drain done"),
|
||||
await log.wait_for(f"\\[shard {src_shard}: gms\\] raft_topology - raft_topology_cmd::barrier_and_drain.*done",
|
||||
from_mark=m)
|
||||
|
||||
# Now we have a guarantee that a new barrier succeeded on the src_shard,
|
||||
|
||||
@@ -376,7 +376,7 @@ async def test_lwt_fencing_upgrade(manager: ManagerClient, scylla_2025_1: Scylla
|
||||
},
|
||||
auto_rack_dc='dc1',
|
||||
version=scylla_2025_1)
|
||||
(cql, hosts) = await manager.get_ready_cql(servers)
|
||||
(cql, _) = await manager.get_ready_cql(servers)
|
||||
|
||||
logger.info("Create a test keyspace")
|
||||
async with new_test_keyspace(manager, "WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 3}") as ks:
|
||||
@@ -429,13 +429,7 @@ async def test_lwt_fencing_upgrade(manager: ManagerClient, scylla_2025_1: Scylla
|
||||
# so the LWT workload doesn’t fail if the driver suddenly sees all nodes as “down”.
|
||||
if s == servers[-1]:
|
||||
logger.info("Wait all nodes are up")
|
||||
async def all_hosts_are_alive():
|
||||
for h in hosts:
|
||||
if not h.is_up:
|
||||
logger.info(f"Host {h} is down, continue waiting")
|
||||
return None
|
||||
return True
|
||||
await wait_for(all_hosts_are_alive, deadline=time.time() + 60, period=0.1)
|
||||
await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60)
|
||||
logger.info(f"Upgrading {s.server_id}")
|
||||
await manager.server_change_version(s.server_id, scylla_binary)
|
||||
await manager.server_sees_others(s.server_id, 2, interval=60.0)
|
||||
|
||||
@@ -961,7 +961,7 @@ async def test_tablets_merge_waits_for_lwt(manager: ManagerClient, scale_timeout
|
||||
logger.info("Wait for the global barrier to start draining on shard0")
|
||||
await log0.wait_for("\\[shard 0: gms\\] raft_topology - Got raft_topology_cmd::barrier_and_drain", from_mark=m)
|
||||
# Just to confirm that the guard still holds the erm
|
||||
matches = await log0.grep("\\[shard 0: gms\\] raft_topology - raft_topology_cmd::barrier_and_drain done", from_mark=m)
|
||||
matches = await log0.grep("\\[shard 0: gms\\] raft_topology - raft_topology_cmd::barrier_and_drain.*done", from_mark=m)
|
||||
assert len(matches) == 0
|
||||
|
||||
# Before the fix, the tablet migration global barrier did not wait for the LWT operation.
|
||||
|
||||
@@ -378,7 +378,7 @@ async def test_tablet_merge_cross_rack_migrations(manager: ManagerClient, racks)
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
|
||||
async def test_tablet_split_merge_with_many_tables(build_mode: str, manager: ManagerClient, racks = 2):
|
||||
cmdline = ['--smp', '4', '-m', '2G', '--target-tablet-size-in-bytes', '30000', '--max-task-backlog', '200',]
|
||||
cmdline = ['--smp', '4', '-m', '2G', '--target-tablet-size-in-bytes', '30000', '--max-task-backlog', '200', '--logger-log-level', 'load_balancer=debug']
|
||||
config = {'tablet_load_stats_refresh_interval_in_seconds': 1}
|
||||
|
||||
servers = []
|
||||
@@ -390,7 +390,7 @@ async def test_tablet_split_merge_with_many_tables(build_mode: str, manager: Man
|
||||
cql = manager.get_cql()
|
||||
ks = await create_new_test_keyspace(cql, f"WITH replication = {{'class': 'NetworkTopologyStrategy', 'replication_factor': {rf}}} AND tablets = {{'initial': 1}}")
|
||||
await cql.run_async(f"CREATE TABLE {ks}.test (pk int PRIMARY KEY, c blob) WITH compression = {{'sstable_compression': ''}};")
|
||||
num_tables = 200 if build_mode != 'debug' else 20
|
||||
num_tables = 200 if build_mode != 'debug' else 5
|
||||
await asyncio.gather(*[cql.run_async(f"CREATE TABLE {ks}.test{i} (pk int PRIMARY KEY, c blob);") for i in range(1, num_tables)])
|
||||
|
||||
async def check_logs(when):
|
||||
|
||||
@@ -23,20 +23,16 @@
|
||||
using namespace sstables;
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
lw_shared_ptr<replica::memtable> make_memtable(schema_ptr s, const utils::chunked_vector<mutation>& muts) {
|
||||
future<lw_shared_ptr<replica::memtable>> make_memtable(schema_ptr s, const utils::chunked_vector<mutation>& muts) {
|
||||
auto mt = make_lw_shared<replica::memtable>(s);
|
||||
|
||||
std::size_t i{0};
|
||||
for (auto&& m : muts) {
|
||||
mt->apply(m);
|
||||
// Give the reactor some time to breathe
|
||||
if (++i == 10) {
|
||||
seastar::thread::yield();
|
||||
i = 0;
|
||||
}
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
|
||||
return mt;
|
||||
co_return mt;
|
||||
}
|
||||
|
||||
std::vector<replica::memtable*> active_memtables(replica::table& t) {
|
||||
@@ -47,27 +43,24 @@ std::vector<replica::memtable*> active_memtables(replica::table& t) {
|
||||
return active_memtables;
|
||||
}
|
||||
|
||||
sstables::shared_sstable make_sstable_containing(std::function<sstables::shared_sstable()> sst_factory, lw_shared_ptr<replica::memtable> mt) {
|
||||
future<sstables::shared_sstable> make_sstable_containing(std::function<sstables::shared_sstable()> sst_factory, lw_shared_ptr<replica::memtable> mt) {
|
||||
return make_sstable_containing(sst_factory(), std::move(mt));
|
||||
}
|
||||
|
||||
sstables::shared_sstable make_sstable_containing(sstables::shared_sstable sst, lw_shared_ptr<replica::memtable> mt) {
|
||||
write_memtable_to_sstable(*mt, sst).get();
|
||||
future<sstables::shared_sstable> make_sstable_containing(sstables::shared_sstable sst, lw_shared_ptr<replica::memtable> mt) {
|
||||
co_await write_memtable_to_sstable(*mt, sst);
|
||||
sstable_open_config cfg { .load_first_and_last_position_metadata = true };
|
||||
sst->open_data(cfg).get();
|
||||
return sst;
|
||||
co_await sst->open_data(cfg);
|
||||
co_return sst;
|
||||
}
|
||||
|
||||
sstables::shared_sstable make_sstable_containing(std::function<sstables::shared_sstable()> sst_factory, utils::chunked_vector<mutation> muts, validate do_validate) {
|
||||
return make_sstable_containing(sst_factory(), std::move(muts), do_validate);
|
||||
}
|
||||
|
||||
sstables::shared_sstable make_sstable_containing(sstables::shared_sstable sst, utils::chunked_vector<mutation> muts, validate do_validate) {
|
||||
future<sstables::shared_sstable> make_sstable_containing(sstables::shared_sstable sst, utils::chunked_vector<mutation> muts, validate do_validate) {
|
||||
schema_ptr s = muts[0].schema();
|
||||
make_sstable_containing(sst, make_memtable(s, muts));
|
||||
co_await make_sstable_containing(sst, co_await make_memtable(s, muts));
|
||||
|
||||
if (do_validate) {
|
||||
tests::reader_concurrency_semaphore_wrapper semaphore;
|
||||
reader_concurrency_semaphore sem(
|
||||
reader_concurrency_semaphore::no_limits{}, "make_sstable_containing", reader_concurrency_semaphore::register_metrics::no);
|
||||
|
||||
std::set<mutation, mutation_decorated_key_less_comparator> merged;
|
||||
for (auto&& m : muts) {
|
||||
@@ -79,16 +72,25 @@ sstables::shared_sstable make_sstable_containing(sstables::shared_sstable sst, u
|
||||
old.value().apply(std::move(m));
|
||||
merged.insert(std::move(old));
|
||||
}
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
|
||||
// validate the sstable
|
||||
auto rd = assert_that(sst->as_mutation_source().make_mutation_reader(s, semaphore.make_permit()));
|
||||
auto rd = sst->as_mutation_source().make_mutation_reader(s, sem.make_tracking_only_permit(nullptr, "test", db::no_timeout, {}));
|
||||
for (auto&& m : merged) {
|
||||
rd.produces(m);
|
||||
auto mo = co_await read_mutation_from_mutation_reader(rd);
|
||||
BOOST_REQUIRE(mo);
|
||||
assert_that(*mo).is_equal_to_compacted(m);
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
rd.produces_end_of_stream();
|
||||
co_await rd.close();
|
||||
co_await sem.stop();
|
||||
}
|
||||
return sst;
|
||||
co_return sst;
|
||||
}
|
||||
|
||||
future<sstables::shared_sstable> make_sstable_containing(std::function<sstables::shared_sstable()> sst_factory, utils::chunked_vector<mutation> muts, validate do_validate) {
|
||||
return make_sstable_containing(sst_factory(), std::move(muts), do_validate);
|
||||
}
|
||||
|
||||
shared_sstable make_sstable_easy(test_env& env, mutation_reader rd, sstable_writer_config cfg,
|
||||
@@ -154,34 +156,34 @@ future<> run_compaction_task(test_env& env, sstables::run_id output_run_id, comp
|
||||
co_await tcm.perform_compaction(std::move(task));
|
||||
}
|
||||
|
||||
shared_sstable verify_mutation(test_env& env, shared_sstable sst, lw_shared_ptr<replica::memtable> mt, bytes key, std::function<void(mutation_opt&)> verify) {
|
||||
auto sstp = make_sstable_containing(std::move(sst), mt);
|
||||
return verify_mutation(env, std::move(sstp), std::move(key), std::move(verify));
|
||||
future<sstables::shared_sstable> verify_mutation(test_env& env, shared_sstable sst, lw_shared_ptr<replica::memtable> mt, bytes key, std::function<void(mutation_opt&)> verify) {
|
||||
auto sstp = co_await make_sstable_containing(std::move(sst), mt);
|
||||
co_return co_await verify_mutation(env, std::move(sstp), std::move(key), std::move(verify));
|
||||
}
|
||||
|
||||
shared_sstable verify_mutation(test_env& env, shared_sstable sstp, bytes key, std::function<void(mutation_opt&)> verify) {
|
||||
future<sstables::shared_sstable> verify_mutation(test_env& env, shared_sstable sstp, bytes key, std::function<void(mutation_opt&)> verify) {
|
||||
auto s = sstp->get_schema();
|
||||
auto pr = dht::partition_range::make_singular(make_dkey(s, key));
|
||||
auto rd = sstp->make_reader(s, env.make_reader_permit(), pr, s->full_slice());
|
||||
auto close_rd = deferred_close(rd);
|
||||
auto mopt = read_mutation_from_mutation_reader(rd).get();
|
||||
auto mopt = co_await read_mutation_from_mutation_reader(rd);
|
||||
verify(mopt);
|
||||
return sstp;
|
||||
co_await rd.close();
|
||||
co_return sstp;
|
||||
}
|
||||
|
||||
shared_sstable verify_mutation(test_env& env, shared_sstable sst, lw_shared_ptr<replica::memtable> mt, dht::partition_range pr, std::function<stop_iteration(mutation_opt&)> verify) {
|
||||
auto sstp = make_sstable_containing(std::move(sst), mt);
|
||||
return verify_mutation(env, std::move(sstp), std::move(pr), std::move(verify));
|
||||
future<sstables::shared_sstable> verify_mutation(test_env& env, shared_sstable sst, lw_shared_ptr<replica::memtable> mt, dht::partition_range pr, std::function<stop_iteration(mutation_opt&)> verify) {
|
||||
auto sstp = co_await make_sstable_containing(std::move(sst), mt);
|
||||
co_return co_await verify_mutation(env, std::move(sstp), std::move(pr), std::move(verify));
|
||||
}
|
||||
|
||||
shared_sstable verify_mutation(test_env& env, shared_sstable sstp, dht::partition_range pr, std::function<stop_iteration(mutation_opt&)> verify) {
|
||||
future<sstables::shared_sstable> verify_mutation(test_env& env, shared_sstable sstp, dht::partition_range pr, std::function<stop_iteration(mutation_opt&)> verify) {
|
||||
auto s = sstp->get_schema();
|
||||
auto rd = sstp->make_reader(s, env.make_reader_permit(), std::move(pr), s->full_slice());
|
||||
auto close_rd = deferred_close(rd);
|
||||
while (auto mopt = read_mutation_from_mutation_reader(rd).get()) {
|
||||
while (auto mopt = co_await read_mutation_from_mutation_reader(rd)) {
|
||||
if (verify(mopt) == stop_iteration::yes) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return sstp;
|
||||
co_await rd.close();
|
||||
co_return sstp;
|
||||
}
|
||||
|
||||
@@ -26,11 +26,10 @@ using namespace sstables;
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
using validate = bool_class<struct validate_tag>;
|
||||
// Must be called in a seastar thread.
|
||||
sstables::shared_sstable make_sstable_containing(std::function<sstables::shared_sstable()> sst_factory, lw_shared_ptr<replica::memtable> mt);
|
||||
sstables::shared_sstable make_sstable_containing(sstables::shared_sstable sst, lw_shared_ptr<replica::memtable> mt);
|
||||
sstables::shared_sstable make_sstable_containing(std::function<sstables::shared_sstable()> sst_factory, utils::chunked_vector<mutation> muts, validate do_validate = validate::yes);
|
||||
sstables::shared_sstable make_sstable_containing(sstables::shared_sstable sst, utils::chunked_vector<mutation> muts, validate do_validate = validate::yes);
|
||||
future<sstables::shared_sstable> make_sstable_containing(std::function<sstables::shared_sstable()> sst_factory, lw_shared_ptr<replica::memtable> mt);
|
||||
future<sstables::shared_sstable> make_sstable_containing(sstables::shared_sstable sst, lw_shared_ptr<replica::memtable> mt);
|
||||
future<sstables::shared_sstable> make_sstable_containing(std::function<sstables::shared_sstable()> sst_factory, utils::chunked_vector<mutation> muts, validate do_validate = validate::yes);
|
||||
future<sstables::shared_sstable> make_sstable_containing(sstables::shared_sstable sst, utils::chunked_vector<mutation> muts, validate do_validate = validate::yes);
|
||||
|
||||
namespace sstables {
|
||||
|
||||
@@ -270,5 +269,5 @@ inline shared_sstable make_sstable_easy(test_env& env, lw_shared_ptr<replica::me
|
||||
return make_sstable_easy(env, std::move(mt), std::move(cfg), env.new_generation(), version, estimated_partitions, query_time);
|
||||
}
|
||||
|
||||
lw_shared_ptr<replica::memtable> make_memtable(schema_ptr s, const utils::chunked_vector<mutation>& muts);
|
||||
future<lw_shared_ptr<replica::memtable>> make_memtable(schema_ptr s, const utils::chunked_vector<mutation>& muts);
|
||||
std::vector<replica::memtable*> active_memtables(replica::table& t);
|
||||
|
||||
@@ -197,6 +197,7 @@ std::vector<db::object_storage_endpoint_param> make_storage_options_config(const
|
||||
.port = std::stoul(tests::getenv_safe("S3_SERVER_PORT_FOR_TEST")),
|
||||
.use_https = ::getenv("AWS_DEFAULT_REGION") != nullptr,
|
||||
.region = tests::getenv_or_default("AWS_DEFAULT_REGION", "local"),
|
||||
.max_connections = 32,
|
||||
});
|
||||
}
|
||||
if (os.type == data_dictionary::storage_options::GS_NAME) {
|
||||
|
||||
@@ -203,3 +203,183 @@ SEASTAR_THREAD_TEST_CASE(test_wait_for_leader_on_aborted_server) {
|
||||
SEASTAR_THREAD_TEST_CASE(test_wait_for_state_change_on_aborted_server) {
|
||||
test_func_on_aborted_server_aux(&raft::server::wait_for_state_change);
|
||||
}
|
||||
|
||||
// Auxiliary function for testing add_entry behavior when a snapshot that
|
||||
// includes the entry being added is taken before wait_for_entry runs.
|
||||
//
|
||||
// Uses a 1-node cluster with aggressive snapshotting and an error injection
|
||||
// point that pauses add_entry after the entry is added to the log but before
|
||||
// wait_for_entry checks its status. During the pause, the entry is committed,
|
||||
// applied, and a snapshot is taken.
|
||||
//
|
||||
// If `advance_snapshot_past_entry` is true, a second entry is added so the
|
||||
// snapshot moves past the first entry's index, fully truncating it from the
|
||||
// log (term_for returns nullopt). Otherwise the snapshot is taken at the
|
||||
// entry's index (term_for returns the snapshot's term).
|
||||
//
|
||||
// In both cases, wait_for_entry should succeed for both wait types, since
|
||||
// the snapshot's term matching the entry's term proves the entry was committed
|
||||
// and included in the snapshot.
|
||||
static void test_add_entry_load_snapshot_before_wait_aux(raft::wait_type type, bool advance_snapshot_past_entry) {
|
||||
#ifndef SCYLLA_ENABLE_ERROR_INJECTION
|
||||
std::cerr << "Skipping test as it depends on error injection. Please run in mode where it's enabled (debug,dev).\n";
|
||||
return;
|
||||
#endif
|
||||
const size_t command_size = sizeof(size_t);
|
||||
test_case test_config {
|
||||
.nodes = 1,
|
||||
.config = std::vector<raft::server::configuration>({
|
||||
raft::server::configuration {
|
||||
// Snapshot after every entry; truncate aggressively.
|
||||
.snapshot_threshold = 1,
|
||||
.snapshot_threshold_log_size = 1,
|
||||
.snapshot_trailing = 0,
|
||||
.snapshot_trailing_size = 0,
|
||||
.max_log_size = 10 * (command_size + sizeof(raft::log_entry)),
|
||||
.enable_forwarding = false,
|
||||
.max_command_size = command_size
|
||||
}
|
||||
})
|
||||
};
|
||||
// apply_entries must be greater than the number of entries added
|
||||
// during the test, otherwise the state machine's done promise fires
|
||||
// prematurely.
|
||||
auto cluster = raft_cluster<std::chrono::steady_clock>{
|
||||
std::move(test_config),
|
||||
::apply_changes,
|
||||
100, // apply_entries
|
||||
0,
|
||||
0, false, tick_delay, rpc_config{}
|
||||
};
|
||||
cluster.start_all().get();
|
||||
auto stop = defer([&cluster] { cluster.stop_all().get(); });
|
||||
|
||||
cluster.add_entries(5, 0).get();
|
||||
|
||||
// one_shot: only the first add_entry is paused; the second one
|
||||
// (if used) bypasses the injection.
|
||||
utils::get_local_injector().enable("block_raft_add_entry_before_wait_for_entry", true);
|
||||
|
||||
auto& server = cluster.get_server(0);
|
||||
auto fut = server.add_entry(create_command(42), type, nullptr);
|
||||
|
||||
// Wait for add_entry(42) to reach the injection point.
|
||||
while (utils::get_local_injector().is_enabled("block_raft_add_entry_before_wait_for_entry")) {
|
||||
seastar::thread::yield();
|
||||
}
|
||||
|
||||
// Wait for the entry to be applied.
|
||||
server.read_barrier(nullptr).get();
|
||||
|
||||
if (advance_snapshot_past_entry) {
|
||||
// Add another entry so the snapshot moves past the first entry,
|
||||
// fully truncating it from the log (term_for returns nullopt).
|
||||
// The injection is one-shot and already consumed, so this goes through.
|
||||
server.add_entry(create_command(43), raft::wait_type::applied, nullptr).get();
|
||||
}
|
||||
|
||||
// Take a snapshot, truncating the entry from the log.
|
||||
server.trigger_snapshot(nullptr).get();
|
||||
|
||||
// Unblock wait_for_entry.
|
||||
utils::get_local_injector().receive_message("block_raft_add_entry_before_wait_for_entry");
|
||||
|
||||
// Both wait types should succeed: the snapshot's term matches the entry's
|
||||
// term, proving the entry was committed and included in the snapshot.
|
||||
BOOST_CHECK_NO_THROW(fut.get());
|
||||
}
|
||||
|
||||
// Snapshot at the entry's index: term_for(eid.idx) returns the snapshot's term.
|
||||
// Tests wait_for_entry site where the removed `applied` check used to throw
|
||||
// commit_status_unknown.
|
||||
SEASTAR_THREAD_TEST_CASE(test_add_entry_applied_load_snapshot_at_entry) {
|
||||
test_add_entry_load_snapshot_before_wait_aux(raft::wait_type::applied, false);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_add_entry_committed_load_snapshot_at_entry) {
|
||||
test_add_entry_load_snapshot_before_wait_aux(raft::wait_type::committed, false);
|
||||
}
|
||||
|
||||
// Snapshot past the entry's index: term_for(eid.idx) returns nullopt.
|
||||
// Tests the `!term` branch in wait_for_entry where `snap_term == eid.term`
|
||||
// now succeeds for both wait types.
|
||||
SEASTAR_THREAD_TEST_CASE(test_add_entry_applied_load_snapshot_past_entry) {
|
||||
test_add_entry_load_snapshot_before_wait_aux(raft::wait_type::applied, true);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_add_entry_committed_load_snapshot_past_entry) {
|
||||
test_add_entry_load_snapshot_before_wait_aux(raft::wait_type::committed, true);
|
||||
}
|
||||
|
||||
// Auxiliary function for testing add_entry behavior when a follower receives
|
||||
// the entry via a snapshot (load_snapshot) instead of applying it locally.
|
||||
//
|
||||
// Setup: 3-node cluster. Node 1 (follower) is blocked from receiving
|
||||
// messages from the leader (node 0), but can still send to it. Node 1
|
||||
// forwards add_entry to the leader, which commits the entry (with node 2),
|
||||
// applies it, and takes a snapshot. When node 1 is reconnected, the leader
|
||||
// sends a snapshot (since the log entries are truncated). Node 1 loads the
|
||||
// snapshot via load_snapshot(), which calls drop_waiters(). The pending
|
||||
// waiter for the forwarded entry is resolved successfully because the
|
||||
// snapshot's term matches the entry's term.
|
||||
static void test_add_entry_wait_resolved_via_drop_waiters_aux(raft::wait_type type) {
|
||||
const size_t command_size = sizeof(size_t);
|
||||
raft::server::configuration srv_config {
|
||||
.snapshot_threshold = 1,
|
||||
.snapshot_threshold_log_size = 1,
|
||||
.snapshot_trailing = 0,
|
||||
.snapshot_trailing_size = 0,
|
||||
.max_log_size = 10 * (command_size + sizeof(raft::log_entry)),
|
||||
.max_command_size = command_size
|
||||
};
|
||||
test_case test_config {
|
||||
.nodes = 3,
|
||||
.config = std::vector<raft::server::configuration>({srv_config, srv_config, srv_config})
|
||||
};
|
||||
// apply_entries must be greater than the number of entries added
|
||||
// during the test, otherwise the state machine's done promise fires
|
||||
// prematurely.
|
||||
auto cluster = raft_cluster<std::chrono::steady_clock>{
|
||||
std::move(test_config),
|
||||
::apply_changes,
|
||||
100, // apply_entries
|
||||
0,
|
||||
0, false, tick_delay, rpc_config{}
|
||||
};
|
||||
cluster.start_all().get();
|
||||
auto stop = defer([&cluster] { cluster.stop_all().get(); });
|
||||
|
||||
// Add a few entries so all nodes are caught up.
|
||||
cluster.add_entries(5, 0).get();
|
||||
|
||||
// Block node 1 from receiving messages from node 0 (leader).
|
||||
// Node 1 can still send to node 0 (forwarding works).
|
||||
cluster.block_receive(1, 0);
|
||||
|
||||
// Node 1 forwards add_entry to node 0. Node 0 commits (with node 2),
|
||||
// applies, and takes a snapshot. Node 1 registers a waiter but never
|
||||
// receives the entry via append entries.
|
||||
auto& follower = cluster.get_server(1);
|
||||
auto fut = follower.add_entry(create_command(42), type, nullptr);
|
||||
|
||||
// Wait for the leader to commit, apply, and snapshot the entry.
|
||||
auto& leader = cluster.get_server(0);
|
||||
leader.read_barrier(nullptr).get();
|
||||
leader.trigger_snapshot(nullptr).get();
|
||||
|
||||
// Reconnect node 1. The leader will send a snapshot since the log
|
||||
// entries are truncated (snapshot_trailing = 0).
|
||||
cluster.connect_all();
|
||||
|
||||
// drop_waiters resolves the waiter successfully since the snapshot's
|
||||
// term matches the entry's term, proving it was committed.
|
||||
BOOST_CHECK_NO_THROW(fut.get());
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_add_entry_applied_wait_resolved_via_drop_waiters) {
|
||||
test_add_entry_wait_resolved_via_drop_waiters_aux(raft::wait_type::applied);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_add_entry_committed_wait_resolved_via_drop_waiters) {
|
||||
test_add_entry_wait_resolved_via_drop_waiters_aux(raft::wait_type::committed);
|
||||
}
|
||||
|
||||
@@ -221,9 +221,25 @@ raft::command make_command(const cmd_id_t& cmd_id, const Input& input) {
|
||||
return cmd;
|
||||
}
|
||||
|
||||
// Indicates that add_entry succeeded but apply() was not called locally
|
||||
// for this entry — a snapshot load subsumed it. The entry's effects
|
||||
// are included in the state machine's state (via the snapshot).
|
||||
struct apply_skipped {};
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const apply_skipped&) {
|
||||
return os << "apply_skipped";
|
||||
}
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<apply_skipped> : fmt::formatter<string_view> {
|
||||
auto format(const apply_skipped&, fmt::format_context& ctx) const {
|
||||
return fmt::format_to(ctx.out(), "apply_skipped");
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: handle other errors?
|
||||
template <PureStateMachine M>
|
||||
using call_result_t = std::variant<typename M::output_t, timed_out_error, raft::not_a_leader, raft::dropped_entry, raft::commit_status_unknown, raft::stopped_error, raft::not_a_member>;
|
||||
using call_result_t = std::variant<typename M::output_t, timed_out_error, raft::not_a_leader, raft::dropped_entry, raft::commit_status_unknown, raft::stopped_error, raft::not_a_member, apply_skipped>;
|
||||
|
||||
// Wait for a future `f` to finish, but keep the result inside a `future`.
|
||||
// Works for `future<void>` as well as for `future<T>`.
|
||||
@@ -319,7 +335,15 @@ future<call_result_t<M>> call(
|
||||
std::rethrow_exception(add_entry_f.get_exception());
|
||||
}
|
||||
|
||||
return std::move(output_f);
|
||||
if (output_f.available()) {
|
||||
return std::move(output_f);
|
||||
}
|
||||
|
||||
// add_entry succeeded but apply() hasn't been called locally — a snapshot load subsumed this entry.
|
||||
// The output channel will never be written to. Discard it and signal via output_channel_dropped, which
|
||||
// is converted to apply_skipped in the outer exception handler.
|
||||
(void)output_f.discard_result().handle_exception_type([] (const output_channel_dropped&) {});
|
||||
throw output_channel_dropped{};
|
||||
});
|
||||
}, std::move(input), std::move(f)));
|
||||
}).then([] (output_t output) {
|
||||
@@ -327,6 +351,8 @@ future<call_result_t<M>> call(
|
||||
}).handle_exception([] (std::exception_ptr eptr) {
|
||||
try {
|
||||
std::rethrow_exception(eptr);
|
||||
} catch (const output_channel_dropped&) {
|
||||
return make_ready_future<call_result_t<M>>(apply_skipped{});
|
||||
} catch (raft::not_a_leader& e) {
|
||||
return make_ready_future<call_result_t<M>>(e);
|
||||
} catch (raft::not_a_member& e) {
|
||||
@@ -3472,6 +3498,7 @@ SEASTAR_TEST_CASE(basic_generator_test) {
|
||||
size_t invocations{0};
|
||||
size_t successes{0};
|
||||
size_t failures{0};
|
||||
size_t skipped_applies{0};
|
||||
};
|
||||
|
||||
class consistency_checker {
|
||||
@@ -3515,6 +3542,9 @@ SEASTAR_TEST_CASE(basic_generator_test) {
|
||||
// SCYLLA_ASSERT(false); TODO debug this
|
||||
++_stats.failures;
|
||||
},
|
||||
[this] (apply_skipped&) {
|
||||
++_stats.skipped_applies;
|
||||
},
|
||||
[this] (auto&) {
|
||||
++_stats.failures;
|
||||
}
|
||||
@@ -3561,8 +3591,8 @@ SEASTAR_TEST_CASE(basic_generator_test) {
|
||||
SCYLLA_ASSERT(false);
|
||||
}
|
||||
|
||||
tlogger.info("Finished generator run, time: {}, invocations: {}, successes: {}, failures: {}, total: {}",
|
||||
timer.now(), stats.invocations, stats.successes, stats.failures, stats.successes + stats.failures);
|
||||
tlogger.info("Finished generator run, time: {}, invocations: {}, successes: {}, failures: {}, skipped applies: {}, total: {}",
|
||||
timer.now(), stats.invocations, stats.successes, stats.failures, stats.skipped_applies, stats.successes + stats.failures + stats.skipped_applies);
|
||||
|
||||
// Liveness check: we must be able to obtain a final response after all the nemeses have stopped.
|
||||
// Due to possible multiple leaders at this point and the cluster stabilizing (for example there
|
||||
|
||||
@@ -359,6 +359,9 @@ public:
|
||||
future<> stop_all();
|
||||
future<> wait_all();
|
||||
void disconnect(size_t id, std::optional<raft::server_id> except = std::nullopt);
|
||||
// Block node `to` from receiving messages from node `from`.
|
||||
// Messages in the reverse direction (from `to` to `from`) are unaffected.
|
||||
void block_receive(size_t to, size_t from);
|
||||
void connect_all();
|
||||
void elapse_elections();
|
||||
future<> elect_new_leader(size_t new_leader);
|
||||
@@ -520,6 +523,10 @@ struct raft_cluster<Clock>::connected {
|
||||
disconnected.insert({id1, id2});
|
||||
disconnected.insert({id2, id1});
|
||||
}
|
||||
// Block `to` from receiving messages from `from` (one-way).
|
||||
void block_receive(raft::server_id to, raft::server_id from) {
|
||||
disconnected.insert({to, from});
|
||||
}
|
||||
// Isolate a server
|
||||
void disconnect(raft::server_id id, std::optional<raft::server_id> except = std::nullopt) {
|
||||
for (size_t other = 0; other < n; ++other) {
|
||||
@@ -545,8 +552,8 @@ struct raft_cluster<Clock>::connected {
|
||||
disconnected.clear();
|
||||
}
|
||||
bool operator()(raft::server_id id1, raft::server_id id2) {
|
||||
// It's connected if both ways are not disconnected
|
||||
return !disconnected.contains({id1, id2}) && !disconnected.contains({id1, id2});
|
||||
// Can id2 send to id1? (i.e. is the id2->id1 connection not blocked?)
|
||||
return !disconnected.contains({id1, id2});
|
||||
}
|
||||
};
|
||||
|
||||
@@ -927,6 +934,11 @@ void raft_cluster<Clock>::disconnect(size_t id, std::optional<raft::server_id> e
|
||||
_connected->disconnect(to_raft_id(id), except);
|
||||
}
|
||||
|
||||
template <typename Clock>
|
||||
void raft_cluster<Clock>::block_receive(size_t to, size_t from) {
|
||||
_connected->block_receive(to_raft_id(to), to_raft_id(from));
|
||||
}
|
||||
|
||||
template <typename Clock>
|
||||
void raft_cluster<Clock>::connect_all() {
|
||||
_connected->connect_all();
|
||||
|
||||
@@ -176,7 +176,7 @@ SEASTAR_TEST_CASE(vector_store_client_test_dns_resolving_repeated) {
|
||||
vs.start_background_tasks();
|
||||
|
||||
// Wait for the DNS resolution to fail
|
||||
BOOST_CHECK(co_await repeat_until(seconds(1), [&vs, &as]() -> future<bool> {
|
||||
BOOST_CHECK(co_await repeat_until([&vs, &as]() -> future<bool> {
|
||||
auto addrs = co_await vector_store_client_tester::resolve_hostname(vs, as.reset());
|
||||
co_return addrs.empty();
|
||||
}));
|
||||
@@ -184,7 +184,7 @@ SEASTAR_TEST_CASE(vector_store_client_test_dns_resolving_repeated) {
|
||||
fail_dns_resolution = false;
|
||||
|
||||
// Wait for the DNS resolution to succeed
|
||||
BOOST_CHECK(co_await repeat_until(seconds(1), [&vs, &as]() -> future<bool> {
|
||||
BOOST_CHECK(co_await repeat_until([&vs, &as]() -> future<bool> {
|
||||
auto addrs = co_await vector_store_client_tester::resolve_hostname(vs, as.reset());
|
||||
co_return addrs.size() == 1;
|
||||
}));
|
||||
@@ -193,12 +193,11 @@ SEASTAR_TEST_CASE(vector_store_client_test_dns_resolving_repeated) {
|
||||
BOOST_CHECK_EQUAL(print_addr(addrs1[0]), "127.0.0.1");
|
||||
|
||||
fail_dns_resolution = true;
|
||||
// Trigger DNS resolver to check for address changes
|
||||
// Resolver will not re-check automatically after successful resolution
|
||||
vector_store_client_tester::trigger_dns_resolver(vs);
|
||||
|
||||
// Wait for the DNS resolution to fail again
|
||||
BOOST_CHECK(co_await repeat_until(seconds(1), [&vs, &as]() -> future<bool> {
|
||||
// Wait for the DNS resolution to fail again.
|
||||
// Trigger is called inside the loop to mitigate SCYLLADB-1794.
|
||||
BOOST_CHECK(co_await repeat_until([&vs, &as]() -> future<bool> {
|
||||
vector_store_client_tester::trigger_dns_resolver(vs);
|
||||
auto addrs = co_await vector_store_client_tester::resolve_hostname(vs, as.reset());
|
||||
co_return addrs.empty();
|
||||
}));
|
||||
@@ -208,7 +207,7 @@ SEASTAR_TEST_CASE(vector_store_client_test_dns_resolving_repeated) {
|
||||
fail_dns_resolution = false;
|
||||
|
||||
// Wait for the DNS resolution to succeed
|
||||
BOOST_CHECK(co_await repeat_until(seconds(1), [&vs, &as]() -> future<bool> {
|
||||
BOOST_CHECK(co_await repeat_until([&vs, &as]() -> future<bool> {
|
||||
auto addrs = co_await vector_store_client_tester::resolve_hostname(vs, as.reset());
|
||||
co_return addrs.size() == 1;
|
||||
}));
|
||||
|
||||
@@ -80,6 +80,16 @@ using coordinator_result = exceptions::coordinator_result<T>;
|
||||
|
||||
namespace cql_transport {
|
||||
|
||||
// process_fn_return_type constructors/destructor - defined here because
|
||||
// response is an incomplete type in the header.
|
||||
cql_server::process_fn_return_type::process_fn_return_type(result_with_foreign_response_ptr r, std::optional<service::deferred_latency_mark> lm)
|
||||
: result(std::move(r)), latency_mark(std::move(lm)) {}
|
||||
cql_server::process_fn_return_type::process_fn_return_type(result_with_bounce r, std::optional<service::deferred_latency_mark> lm)
|
||||
: result(std::move(r)), latency_mark(std::move(lm)) {}
|
||||
cql_server::process_fn_return_type::process_fn_return_type(process_fn_return_type&&) noexcept = default;
|
||||
cql_server::process_fn_return_type& cql_server::process_fn_return_type::operator=(process_fn_return_type&&) noexcept = default;
|
||||
cql_server::process_fn_return_type::~process_fn_return_type() = default;
|
||||
|
||||
static logging::logger clogger("cql_server");
|
||||
|
||||
/**
|
||||
@@ -503,7 +513,9 @@ future<forward_cql_execute_response> cql_server::handle_forward_execute(
|
||||
std::move(req.cached_fn_calls),
|
||||
handling_node_bounce::yes));
|
||||
|
||||
if (auto* bounce_msg = std::get_if<cql_server::result_with_bounce>(&result)) {
|
||||
if (auto* bounce_msg = std::get_if<cql_server::result_with_bounce>(&result.result)) {
|
||||
// The request needs to be redirected — don't mark latency since
|
||||
// no real work was done on this node.
|
||||
auto host = (*bounce_msg)->target_host();
|
||||
auto shard = (*bounce_msg)->target_shard();
|
||||
co_return forward_cql_execute_response{
|
||||
@@ -513,7 +525,13 @@ future<forward_cql_execute_response> cql_server::handle_forward_execute(
|
||||
};
|
||||
}
|
||||
|
||||
auto& final_result = std::get<cql_server::result_with_foreign_response_ptr>(result);
|
||||
// Mark latency on the target shard since forwarded requests don't
|
||||
// go through the originating shard's transport flush path.
|
||||
if (result.latency_mark && result.latency_mark->histogram) {
|
||||
result.latency_mark->histogram->mark(result.latency_mark->lc.stop().latency());
|
||||
}
|
||||
|
||||
auto& final_result = std::get<cql_server::result_with_foreign_response_ptr>(result.result);
|
||||
|
||||
if (!final_result) {
|
||||
co_return co_await coroutine::try_future(final_result.assume_error().as_exception_future<forward_cql_execute_response>());
|
||||
@@ -934,7 +952,7 @@ std::unique_ptr<cql_server::response> cql_server::handle_exception(int16_t strea
|
||||
return make_error(stream, exceptions::exception_code::SERVER_ERROR, "unknown error", trace_state);
|
||||
}
|
||||
}
|
||||
future<foreign_ptr<std::unique_ptr<cql_server::response>>>
|
||||
future<cql_server::response_with_latency>
|
||||
cql_server::connection::process_request_one(fragmented_temporary_buffer::istream fbuf, uint8_t op, uint16_t stream, service::client_state& client_state, tracing_request_type tracing_request, service_permit permit) {
|
||||
using auth_state = service::client_state::auth_state;
|
||||
|
||||
@@ -1004,18 +1022,26 @@ future<foreign_ptr<std::unique_ptr<cql_server::response>>>
|
||||
_version, get_dialect());
|
||||
default: return make_exception_future<process_fn_return_type>(exceptions::protocol_exception(format("Unknown opcode {:d}", int(cqlop))));
|
||||
}
|
||||
}).then_wrapped([this, cqlop, &cql_stats, stream, &client_state, linearization_buffer = std::move(linearization_buffer), trace_state] (future<process_fn_return_type> f) {
|
||||
}).then_wrapped([this, cqlop, &cql_stats, stream, &client_state, linearization_buffer = std::move(linearization_buffer), trace_state] (future<process_fn_return_type> f) -> future<response_with_latency> {
|
||||
auto stop_trace = defer([&] {
|
||||
tracing::stop_foreground(trace_state);
|
||||
});
|
||||
return seastar::futurize_invoke([&] () {
|
||||
// Extract latency mark so it survives error paths.
|
||||
// On f.failed(), the mark was destroyed with the coroutine frame,
|
||||
// so latency_mark stays nullopt — acceptable since the statement
|
||||
// layer may not have run far enough to set a histogram.
|
||||
std::optional<service::deferred_latency_mark> latency_mark;
|
||||
std::exception_ptr eptr;
|
||||
try {
|
||||
if (f.failed()) {
|
||||
return make_exception_future<foreign_ptr<std::unique_ptr<cql_server::response>>>(std::move(f).get_exception());
|
||||
std::rethrow_exception(std::move(f).get_exception());
|
||||
}
|
||||
|
||||
result_with_foreign_response_ptr res = std::get<result_with_foreign_response_ptr>(f.get());
|
||||
auto ret = f.get();
|
||||
latency_mark = std::move(ret.latency_mark);
|
||||
result_with_foreign_response_ptr res = std::get<result_with_foreign_response_ptr>(std::move(ret.result));
|
||||
if (!res) {
|
||||
return std::move(res).assume_error().as_exception_future<foreign_ptr<std::unique_ptr<cql_server::response>>>();
|
||||
std::move(res).assume_error().throw_me();
|
||||
}
|
||||
|
||||
auto response = std::move(res).assume_value();
|
||||
@@ -1035,7 +1061,7 @@ future<foreign_ptr<std::unique_ptr<cql_server::response>>>
|
||||
case auth_state::AUTHENTICATION:
|
||||
// Support both SASL auth from protocol v2 and the older style Credentials auth from v1
|
||||
if (cqlop != cql_binary_opcode::AUTH_RESPONSE && cqlop != cql_binary_opcode::CREDENTIALS) {
|
||||
return make_exception_future<foreign_ptr<std::unique_ptr<cql_server::response>>>(exceptions::protocol_exception(format("Unexpected message {:d}, expecting AUTH_RESPONSE or CREDENTIALS", int(cqlop))));
|
||||
throw exceptions::protocol_exception(format("Unexpected message {:d}, expecting AUTH_RESPONSE or CREDENTIALS", int(cqlop)));
|
||||
}
|
||||
if (res_op == cql_binary_opcode::READY || res_op == cql_binary_opcode::AUTH_SUCCESS) {
|
||||
client_state.set_auth_state(auth_state::READY);
|
||||
@@ -1048,15 +1074,17 @@ future<foreign_ptr<std::unique_ptr<cql_server::response>>>
|
||||
|
||||
tracing::set_response_size(trace_state, response->size());
|
||||
cql_stats.response_size.add(response->size());
|
||||
return make_ready_future<foreign_ptr<std::unique_ptr<cql_server::response>>>(std::move(response));
|
||||
}).handle_exception([this, stream, &client_state, trace_state] (std::exception_ptr eptr) {
|
||||
auto response = _server.handle_exception(stream, eptr, trace_state, _version, client_state);
|
||||
if (auto timeout = _server.timeout_for_sleep(eptr)) {
|
||||
// Return read timeout exception, as we wait here until the timeout passes
|
||||
return _server.sleep_until_timeout_passes(*timeout, std::move(response));
|
||||
}
|
||||
return utils::result_into_future<result_with_foreign_response_ptr>(std::move(response));
|
||||
});
|
||||
co_return response_with_latency{std::move(response), std::move(latency_mark)};
|
||||
} catch (...) {
|
||||
eptr = std::current_exception();
|
||||
}
|
||||
// Handle exception outside catch block so co_await is allowed.
|
||||
auto response = _server.handle_exception(stream, eptr, trace_state, _version, client_state);
|
||||
if (auto timeout = _server.timeout_for_sleep(eptr)) {
|
||||
auto resp = co_await _server.sleep_until_timeout_passes(*timeout, make_foreign(std::move(response)));
|
||||
co_return response_with_latency{std::move(resp), std::move(latency_mark)};
|
||||
}
|
||||
co_return response_with_latency{make_foreign(std::move(response)), std::move(latency_mark)};
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1263,14 +1291,15 @@ future<> cql_server::connection::process_request() {
|
||||
op == uint8_t (cql_binary_opcode::EXECUTE) ||
|
||||
op == uint8_t(cql_binary_opcode::BATCH));
|
||||
|
||||
future<foreign_ptr<std::unique_ptr<cql_server::response>>> request_process_future = should_paralelize ?
|
||||
future<response_with_latency> request_process_future = should_paralelize ?
|
||||
_process_request_stage(this, istream, op, stream, seastar::ref(_client_state), tracing_requested, mem_permit) :
|
||||
process_request_one(istream, op, stream, seastar::ref(_client_state), tracing_requested, mem_permit);
|
||||
|
||||
future<> request_response_future = request_process_future.then_wrapped([this, buf = std::move(buf), mem_permit, leave = std::move(leave), stream] (future<foreign_ptr<std::unique_ptr<cql_server::response>>> response_f) mutable {
|
||||
future<> request_response_future = request_process_future.then_wrapped([this, buf = std::move(buf), mem_permit, leave = std::move(leave), stream] (future<response_with_latency> response_f) mutable {
|
||||
try {
|
||||
auto& sg_stats = _server.get_cql_sg_stats();
|
||||
size_t pending_response_size = 0;
|
||||
std::optional<service::deferred_latency_mark> latency_mark;
|
||||
if (response_f.failed()) {
|
||||
const auto message = format("request processing failed, error [{}]", response_f.get_exception());
|
||||
clogger.error("{}: {}", _client_state.get_remote_address(), message);
|
||||
@@ -1278,7 +1307,9 @@ future<> cql_server::connection::process_request() {
|
||||
message,
|
||||
tracing::trace_state_ptr()));
|
||||
} else {
|
||||
auto response = response_f.get();
|
||||
auto result = response_f.get();
|
||||
latency_mark = std::move(result.latency_mark);
|
||||
auto response = std::move(result.response);
|
||||
// Account for response body size exceeding the initial estimate.
|
||||
auto resp_size = response->size();
|
||||
auto permit_size = mem_permit.count();
|
||||
@@ -1291,8 +1322,13 @@ future<> cql_server::connection::process_request() {
|
||||
sg_stats._pending_response_memory += pending_response_size;
|
||||
write_response(std::move(response), _compression);
|
||||
}
|
||||
_ready_to_respond = _ready_to_respond.finally([leave = std::move(leave), permit = std::move(mem_permit), &sg_stats, pending_response_size] {
|
||||
_ready_to_respond = _ready_to_respond.finally([leave = std::move(leave), permit = std::move(mem_permit), &sg_stats, pending_response_size, latency_mark = std::move(latency_mark)] () mutable {
|
||||
sg_stats._pending_response_memory -= pending_response_size;
|
||||
// Stop the latency counter and mark the histogram now that
|
||||
// the response has been flushed to the OS socket.
|
||||
if (latency_mark && latency_mark->histogram) {
|
||||
latency_mark->histogram->mark(latency_mark->lc.stop().latency());
|
||||
}
|
||||
});
|
||||
} catch (...) {
|
||||
clogger.error("{}: request processing failed: {}",
|
||||
@@ -1531,6 +1567,7 @@ process_query_internal(service::client_state& client_state, sharded<cql3::query_
|
||||
}
|
||||
auto q_state = std::make_unique<cql_query_state>(client_state, trace_state, std::move(permit));
|
||||
auto& query_state = q_state->query_state;
|
||||
query_state.start_latency();
|
||||
auto o = in.read_options(version, qp.local().get_cql_config());
|
||||
if (!o) {
|
||||
return make_exception_future<cql_server::process_fn_return_type>(std::move(o).assume_error());
|
||||
@@ -1552,14 +1589,15 @@ process_query_internal(service::client_state& client_state, sharded<cql3::query_
|
||||
}
|
||||
|
||||
return qp.local().execute_direct_without_checking_exception_message(query.assume_value(), query_state, dialect, options).then([q_state = std::move(q_state), stream, skip_metadata, version] (auto msg) {
|
||||
auto latency_mark = q_state->query_state.take_deferred_latency();
|
||||
if (msg->as_bounce()) {
|
||||
return cql_server::process_fn_return_type(make_foreign(static_pointer_cast<messages::result_message::bounce>(msg)));
|
||||
return cql_server::process_fn_return_type(make_foreign(static_pointer_cast<messages::result_message::bounce>(msg)), std::move(latency_mark));
|
||||
} else if (msg->is_exception()) {
|
||||
return cql_server::process_fn_return_type(convert_error_message_to_coordinator_result(msg.get()));
|
||||
return cql_server::process_fn_return_type(convert_error_message_to_coordinator_result(msg.get()), std::move(latency_mark));
|
||||
} else {
|
||||
tracing::trace(q_state->query_state.get_trace_state(), "Done processing - preparing a result");
|
||||
|
||||
return cql_server::process_fn_return_type(make_foreign(make_result(stream, *msg, q_state->query_state.get_trace_state(), version, cql_metadata_id_wrapper{}, skip_metadata)));
|
||||
return cql_server::process_fn_return_type(make_foreign(make_result(stream, *msg, q_state->query_state.get_trace_state(), version, cql_metadata_id_wrapper{}, skip_metadata)), std::move(latency_mark));
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -1629,6 +1667,7 @@ process_execute_internal(service::client_state& client_state, sharded<cql3::quer
|
||||
|
||||
auto q_state = std::make_unique<cql_query_state>(client_state, trace_state, std::move(permit));
|
||||
auto& query_state = q_state->query_state;
|
||||
query_state.start_latency();
|
||||
auto o = in.read_options(version, qp.local().get_cql_config());
|
||||
if (!o) {
|
||||
return make_exception_future<cql_server::process_fn_return_type>(std::move(o).assume_error());
|
||||
@@ -1670,13 +1709,14 @@ process_execute_internal(service::client_state& client_state, sharded<cql3::quer
|
||||
tracing::trace(trace_state, "Processing a statement");
|
||||
return qp.local().execute_prepared_without_checking_exception_message(query_state, std::move(stmt), options, std::move(prepared), std::move(cache_key), needs_authorization)
|
||||
.then([trace_state = query_state.get_trace_state(), skip_metadata, q_state = std::move(q_state), stream, version, metadata_id = std::move(metadata_id)] (auto msg) mutable {
|
||||
auto latency_mark = q_state->query_state.take_deferred_latency();
|
||||
if (msg->as_bounce()) {
|
||||
return cql_server::process_fn_return_type(make_foreign(static_pointer_cast<messages::result_message::bounce>(msg)));
|
||||
return cql_server::process_fn_return_type(make_foreign(static_pointer_cast<messages::result_message::bounce>(msg)), std::move(latency_mark));
|
||||
} else if (msg->is_exception()) {
|
||||
return cql_server::process_fn_return_type(convert_error_message_to_coordinator_result(msg.get()));
|
||||
return cql_server::process_fn_return_type(convert_error_message_to_coordinator_result(msg.get()), std::move(latency_mark));
|
||||
} else {
|
||||
tracing::trace(q_state->query_state.get_trace_state(), "Done processing - preparing a result");
|
||||
return cql_server::process_fn_return_type(make_foreign(make_result(stream, *msg, q_state->query_state.get_trace_state(), version, std::move(metadata_id), skip_metadata)));
|
||||
return cql_server::process_fn_return_type(make_foreign(make_result(stream, *msg, q_state->query_state.get_trace_state(), version, std::move(metadata_id), skip_metadata)), std::move(latency_mark));
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -1787,6 +1827,7 @@ process_batch_internal(service::client_state& client_state, sharded<cql3::query_
|
||||
|
||||
auto q_state = std::make_unique<cql_query_state>(client_state, trace_state, std::move(permit));
|
||||
auto& query_state = q_state->query_state;
|
||||
query_state.start_latency();
|
||||
// #563. CQL v2 encodes query_options in v1 format for batch requests.
|
||||
auto o = in.read_options(version, qp.local().get_cql_config());
|
||||
if (!o) {
|
||||
@@ -1810,14 +1851,15 @@ process_batch_internal(service::client_state& client_state, sharded<cql3::query_
|
||||
batch->set_audit_info(batch->audit_info());
|
||||
return qp.local().execute_batch_without_checking_exception_message(batch, query_state, options, std::move(pending_authorization_entries))
|
||||
.then([stream, batch, q_state = std::move(q_state), trace_state = query_state.get_trace_state(), version] (auto msg) {
|
||||
auto latency_mark = q_state->query_state.take_deferred_latency();
|
||||
if (msg->as_bounce()) {
|
||||
return cql_server::process_fn_return_type(make_foreign(static_pointer_cast<messages::result_message::bounce>(msg)));
|
||||
return cql_server::process_fn_return_type(make_foreign(static_pointer_cast<messages::result_message::bounce>(msg)), std::move(latency_mark));
|
||||
} else if (msg->is_exception()) {
|
||||
return cql_server::process_fn_return_type(convert_error_message_to_coordinator_result(msg.get()));
|
||||
return cql_server::process_fn_return_type(convert_error_message_to_coordinator_result(msg.get()), std::move(latency_mark));
|
||||
} else {
|
||||
tracing::trace(q_state->query_state.get_trace_state(), "Done processing - preparing a result");
|
||||
|
||||
return cql_server::process_fn_return_type(make_foreign(make_result(stream, *msg, trace_state, version, cql_metadata_id_wrapper{})));
|
||||
return cql_server::process_fn_return_type(make_foreign(make_result(stream, *msg, trace_state, version, cql_metadata_id_wrapper{})), std::move(latency_mark));
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -1859,10 +1901,10 @@ cql_server::process(uint16_t stream, request_reader in, service::client_state& c
|
||||
|
||||
bool init_trace = (bool)!bounced; // If the request was bounced, we already started the trace in the handler
|
||||
auto msg = co_await coroutine::try_future(process_fn(client_state, _query_processor, in, stream,
|
||||
version, permit, trace_state, init_trace, {}, dialect));
|
||||
while (auto* bounce_msg = std::get_if<cql_server::result_with_bounce>(&msg)) {
|
||||
version, permit, trace_state, init_trace, std::move(cached_fn_calls), dialect));
|
||||
while (auto* bounce_msg = std::get_if<cql_server::result_with_bounce>(&msg.result)) {
|
||||
auto shard = (*bounce_msg)->target_shard();
|
||||
auto&& cached_vals = (*bounce_msg)->take_cached_pk_function_calls();
|
||||
auto cached_vals = (*bounce_msg)->take_cached_pk_function_calls();
|
||||
auto target_host = (*bounce_msg)->target_host();
|
||||
auto my_host_id = _query_processor.local().proxy().get_token_metadata_ptr()->get_topology().my_host_id();
|
||||
if (target_host == my_host_id) {
|
||||
@@ -1870,13 +1912,25 @@ cql_server::process(uint16_t stream, request_reader in, service::client_state& c
|
||||
auto sg = _config.bounce_request_smp_service_group;
|
||||
auto gcs = client_state.move_to_other_shard();
|
||||
auto gt = tracing::global_trace_state_ptr(trace_state);
|
||||
msg = co_await container().invoke_on(shard, sg, [&, stream, dialect, version] (cql_server& server) -> future<process_fn_return_type> {
|
||||
msg = co_await container().invoke_on(shard, sg, [&, stream, dialect, version, cached_vals = std::move(cached_vals)] (cql_server& server) mutable -> future<process_fn_return_type> {
|
||||
bytes_ostream linearization_buffer;
|
||||
request_reader in(is, linearization_buffer);
|
||||
auto local_client_state = gcs.get(&server._abort_source);
|
||||
auto local_trace_state = gt.get();
|
||||
co_return co_await process_fn(local_client_state, server._query_processor, in, stream, version,
|
||||
/* FIXME */empty_service_permit(), std::move(local_trace_state), false, cached_vals, dialect);
|
||||
auto ret = co_await process_fn(local_client_state, server._query_processor, in, stream, version,
|
||||
/* FIXME */empty_service_permit(), std::move(local_trace_state), false, std::move(cached_vals), dialect);
|
||||
// Mark latency on the target shard before returning.
|
||||
// The histogram pointer belongs to this shard's stats and
|
||||
// must not be dereferenced from another shard.
|
||||
// Only mark when the result is not a bounce — a bounce means
|
||||
// the request will be retried elsewhere and no real work was done.
|
||||
if (!std::get_if<cql_server::result_with_bounce>(&ret.result)) {
|
||||
if (ret.latency_mark && ret.latency_mark->histogram) {
|
||||
ret.latency_mark->histogram->mark(ret.latency_mark->lc.stop().latency());
|
||||
}
|
||||
}
|
||||
ret.latency_mark.reset();
|
||||
co_return ret;
|
||||
});
|
||||
} else {
|
||||
// Node bounce
|
||||
@@ -1897,7 +1951,7 @@ cql_server::process(uint16_t stream, request_reader in, service::client_state& c
|
||||
.dialect = dialect,
|
||||
.client_state = client_state,
|
||||
.trace_info = tracing::make_trace_info(trace_state),
|
||||
.cached_fn_calls = std::move(cached_fn_calls),
|
||||
.cached_fn_calls = std::move(cached_vals),
|
||||
};
|
||||
|
||||
auto response = co_await forward_cql(
|
||||
|
||||
@@ -250,7 +250,17 @@ public:
|
||||
using response = cql_transport::response;
|
||||
using result_with_foreign_response_ptr = exceptions::coordinator_result<foreign_ptr<std::unique_ptr<cql_server::response>>>;
|
||||
using result_with_bounce = foreign_ptr<seastar::shared_ptr<messages::result_message::bounce>>;
|
||||
using process_fn_return_type = std::variant<result_with_foreign_response_ptr, result_with_bounce>;
|
||||
using process_fn_result = std::variant<result_with_foreign_response_ptr, result_with_bounce>;
|
||||
struct process_fn_return_type {
|
||||
process_fn_result result;
|
||||
std::optional<service::deferred_latency_mark> latency_mark;
|
||||
|
||||
process_fn_return_type(result_with_foreign_response_ptr r, std::optional<service::deferred_latency_mark> lm = std::nullopt);
|
||||
process_fn_return_type(result_with_bounce r, std::optional<service::deferred_latency_mark> lm = std::nullopt);
|
||||
process_fn_return_type(process_fn_return_type&&) noexcept;
|
||||
process_fn_return_type& operator=(process_fn_return_type&&) noexcept;
|
||||
~process_fn_return_type();
|
||||
};
|
||||
|
||||
service::endpoint_lifecycle_subscriber* get_lifecycle_listener() const noexcept;
|
||||
service::migration_listener* get_migration_listener() const noexcept;
|
||||
@@ -287,6 +297,11 @@ private:
|
||||
std::optional<seastar::lowres_clock::time_point> timeout_for_sleep(std::exception_ptr eptr) const;
|
||||
future<foreign_ptr<std::unique_ptr<cql_server::response>>> sleep_until_timeout_passes(const seastar::lowres_clock::time_point& timeout, foreign_ptr<std::unique_ptr<cql_server::response>>&& resp);
|
||||
|
||||
struct response_with_latency {
|
||||
foreign_ptr<std::unique_ptr<cql_server::response>> response;
|
||||
std::optional<service::deferred_latency_mark> latency_mark;
|
||||
};
|
||||
|
||||
class connection : public generic_server::connection {
|
||||
cql_server& _server;
|
||||
socket_address _server_addr;
|
||||
@@ -308,7 +323,7 @@ private:
|
||||
};
|
||||
private:
|
||||
using execution_stage_type = inheriting_concrete_execution_stage<
|
||||
future<foreign_ptr<std::unique_ptr<cql_server::response>>>,
|
||||
future<response_with_latency>,
|
||||
cql_server::connection*,
|
||||
fragmented_temporary_buffer::istream,
|
||||
uint8_t,
|
||||
@@ -330,7 +345,7 @@ private:
|
||||
private:
|
||||
friend class process_request_executor;
|
||||
|
||||
future<foreign_ptr<std::unique_ptr<cql_server::response>>> process_request_one(fragmented_temporary_buffer::istream buf, uint8_t op, uint16_t stream, service::client_state& client_state, tracing_request_type tracing_request, service_permit permit);
|
||||
future<response_with_latency> process_request_one(fragmented_temporary_buffer::istream buf, uint8_t op, uint16_t stream, service::client_state& client_state, tracing_request_type tracing_request, service_permit permit);
|
||||
unsigned frame_size() const;
|
||||
unsigned pick_request_cpu();
|
||||
utils::result_with_exception<cql_binary_frame_v3, exceptions::protocol_exception, class cql_frame_error> parse_frame(temporary_buffer<char> buf) const;
|
||||
|
||||
@@ -65,8 +65,9 @@ struct do_nothing_loading_cache_stats {
|
||||
/// The values are going to be evicted from the cache if they are not accessed during the "expiration" period or haven't
|
||||
/// been reloaded even once during the same period.
|
||||
///
|
||||
/// If "expiration" is set to zero - the caching is going to be disabled and get_XXX(...) is going to call the "loader" callback
|
||||
/// every time in order to get the requested value.
|
||||
/// If "expiration" is set to zero - the caching is going to be disabled and get(...) is going to call the "loader" callback
|
||||
/// every time in order to get the requested value. insert(...) is going to be a no-op in this mode. get_ptr(...) is not
|
||||
/// safe to call when caching is disabled (it asserts) since it returns a handle into the cache.
|
||||
///
|
||||
/// \note In order to avoid the eviction of cached entries due to "aging" of the contained value the user has to choose
|
||||
/// the "expiration" to be at least ("refresh" + "max load latency"). This way the value is going to stay in the cache and is going to be
|
||||
@@ -353,6 +354,24 @@ public:
|
||||
return get_ptr(k, _load);
|
||||
}
|
||||
|
||||
/// \brief Insert a value into the cache, loading it via \p load if not already present.
|
||||
///
|
||||
/// Equivalent to get_ptr(k, load).discard_result() when caching is enabled,
|
||||
/// but is a no-op when caching is disabled (i.e. the cache was constructed
|
||||
/// with expiry == 0). Use this when you only want the side effect of
|
||||
/// populating the cache and don't need a handle to the cached value.
|
||||
///
|
||||
/// Unlike get_ptr(), it is safe to call this on a cache configured with
|
||||
/// caching disabled.
|
||||
template <typename LoadFunc>
|
||||
requires std::is_invocable_r_v<future<value_type>, LoadFunc, const key_type&>
|
||||
future<> insert(const Key& k, LoadFunc&& load) {
|
||||
if (!caching_enabled()) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return get_ptr(k, std::forward<LoadFunc>(load)).discard_result();
|
||||
}
|
||||
|
||||
future<Tp> get(const Key& k) {
|
||||
static_assert(ReloadEnabled == loading_cache_reload_enabled::yes, "");
|
||||
|
||||
|
||||
Reference in New Issue
Block a user