Compare commits

..

7 Commits

Author SHA1 Message Date
copilot-swe-agent[bot]
ea26e4b3a5 Use tablet_task_info fields for tablet task timing
For tablet_virtual_task, set creation_time to tablet_task_info::request_time
and start_time to tablet_task_info::sched_time, matching the actual semantics
of when the request was created vs when it was scheduled for execution.

Co-authored-by: Deexie <56607372+Deexie@users.noreply.github.com>
2025-12-08 15:05:50 +00:00
copilot-swe-agent[bot]
3b793ef09f Merge topology_request_tracking_mutation_builder calls for keyspace_rf_change
Combined the two separate topology_request_tracking_mutation_builder calls
into one, setting both start_time and done status in the same mutation builder
to reduce redundancy.

Co-authored-by: Deexie <56607372+Deexie@users.noreply.github.com>
2025-12-08 14:53:23 +00:00
copilot-swe-agent[bot]
df0a59ba03 Fix start_time setting to be together with operation start
Refactored start_time setting for global requests to be included in the same
mutation batch that starts the actual operation, matching the pattern used for
node operations. This avoids an extra update_topology_state call and ensures
start_time is set atomically with the operation start.

Also updated nodetool tasks documentation to include creation_time field in
example outputs for status, list, and tree commands.

Co-authored-by: Deexie <56607372+Deexie@users.noreply.github.com>
2025-12-08 14:40:01 +00:00
copilot-swe-agent[bot]
69024a09b2 Add creation_time field to nodetool tasks subcommands
Extended scylla-nodetool.cc to display creation_time in all task-related outputs:
- tasks_print_status: Added creation_time to time field formatting
- tasks_print_trees: Added creation_time column to task tree display
- tasks_print_stats_list: Added creation_time column to task stats list

Co-authored-by: Deexie <56607372+Deexie@users.noreply.github.com>
2025-12-08 14:28:01 +00:00
copilot-swe-agent[bot]
bb8f28a1ab Fix start_time setting: remove from request creation, add to execution start
Remove incorrect start_time setting from request creation sites for:
- cleanup requests
- new_cdc_generation requests
- truncate_table requests
- keyspace_rf_change requests

Add start_time setting in topology_coordinator::handle_global_request
when execution begins, matching the pattern for node operations.

Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-08 14:09:22 +00:00
copilot-swe-agent[bot]
32bc7e3a1c Add creation_time field to task status and stats API
Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-08 13:26:09 +00:00
copilot-swe-agent[bot]
fb4e37248d Initial plan 2025-12-08 13:03:16 +00:00
103 changed files with 955 additions and 1972 deletions

8
.github/CODEOWNERS vendored
View File

@@ -1,5 +1,5 @@
# AUTH
auth/* @nuivall
auth/* @nuivall @ptrsmrn
# CACHE
row_cache* @tgrabiec
@@ -25,11 +25,11 @@ compaction/* @raphaelsc
transport/*
# CQL QUERY LANGUAGE
cql3/* @tgrabiec @nuivall
cql3/* @tgrabiec @nuivall @ptrsmrn
# COUNTERS
counters* @nuivall
tests/counter_test* @nuivall
counters* @nuivall @ptrsmrn
tests/counter_test* @nuivall @ptrsmrn
# DOCS
docs/* @annastuchlik @tzach

View File

@@ -62,7 +62,7 @@ def create_pull_request(repo, new_branch_name, base_branch_name, pr, backport_pr
if is_draft:
labels_to_add.append("conflicts")
pr_comment = f"@{pr.user.login} - This PR was marked as draft because it has conflicts\n"
pr_comment += "Please resolve them and remove the 'conflicts' label. The PR will be made ready for review automatically."
pr_comment += "Please resolve them and mark this PR as ready for review"
backport_pr.create_issue_comment(pr_comment)
# Apply all labels at once if we have any

View File

@@ -18,7 +18,7 @@ jobs:
// Regular expression pattern to check for "Fixes" prefix
// Adjusted to dynamically insert the repository full name
const pattern = `Fixes:? ((?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)|([A-Z]+-\\d+))`;
const pattern = `Fixes:? (?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)`;
const regex = new RegExp(pattern);
if (!regex.test(body)) {

View File

@@ -7,7 +7,7 @@ on:
- enterprise
paths:
- '**/*.cc'
- 'scripts/metrics-config.yml'
- 'scripts/metrics-config.yml'
- 'scripts/get_description.py'
- 'docs/_ext/scylladb_metrics.py'
@@ -15,20 +15,20 @@ jobs:
validate-metrics:
runs-on: ubuntu-latest
name: Check metrics documentation coverage
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
submodules: true
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: '3.10'
- name: Install dependencies
run: pip install PyYAML
- name: Validate metrics
run: python3 scripts/get_description.py --validate -c scripts/metrics-config.yml

View File

@@ -3,13 +3,10 @@ name: Trigger Scylla CI Route
on:
issue_comment:
types: [created]
pull_request_target:
types:
- unlabeled
jobs:
trigger-jenkins:
if: (github.event.comment.user.login != 'scylladbbot' && contains(github.event.comment.body, '@scylladbbot') && contains(github.event.comment.body, 'trigger-ci')) || github.event.label.name == 'conflicts'
if: github.event.comment.user.login != 'scylladbbot' && contains(github.event.comment.body, '@scylladbbot') && contains(github.event.comment.body, 'trigger-ci')
runs-on: ubuntu-latest
steps:
- name: Trigger Scylla-CI-Route Jenkins Job

View File

@@ -42,7 +42,7 @@ comparison_operator_type get_comparison_operator(const rjson::value& comparison_
if (!comparison_operator.IsString()) {
throw api_error::validation(fmt::format("Invalid comparison operator definition {}", rjson::print(comparison_operator)));
}
std::string op = rjson::to_string(comparison_operator);
std::string op = comparison_operator.GetString();
auto it = ops.find(op);
if (it == ops.end()) {
throw api_error::validation(fmt::format("Unsupported comparison operator {}", op));
@@ -377,8 +377,8 @@ bool check_compare(const rjson::value* v1, const rjson::value& v2, const Compara
return cmp(unwrap_number(*v1, cmp.diagnostic), unwrap_number(v2, cmp.diagnostic));
}
if (kv1.name == "S") {
return cmp(rjson::to_string_view(kv1.value),
rjson::to_string_view(kv2.value));
return cmp(std::string_view(kv1.value.GetString(), kv1.value.GetStringLength()),
std::string_view(kv2.value.GetString(), kv2.value.GetStringLength()));
}
if (kv1.name == "B") {
auto d_kv1 = unwrap_bytes(kv1.value, v1_from_query);
@@ -470,9 +470,9 @@ static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const r
return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag), bounds_from_query);
}
if (kv_v.name == "S") {
return check_BETWEEN(rjson::to_string_view(kv_v.value),
rjson::to_string_view(kv_lb.value),
rjson::to_string_view(kv_ub.value),
return check_BETWEEN(std::string_view(kv_v.value.GetString(), kv_v.value.GetStringLength()),
std::string_view(kv_lb.value.GetString(), kv_lb.value.GetStringLength()),
std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()),
bounds_from_query);
}
if (kv_v.name == "B") {

View File

@@ -8,8 +8,6 @@
#include "consumed_capacity.hh"
#include "error.hh"
#include "utils/rjson.hh"
#include <fmt/format.h>
namespace alternator {
@@ -34,12 +32,12 @@ bool consumed_capacity_counter::should_add_capacity(const rjson::value& request)
if (!return_consumed->IsString()) {
throw api_error::validation("Non-string ReturnConsumedCapacity field in request");
}
std::string_view consumed = rjson::to_string_view(*return_consumed);
std::string consumed = return_consumed->GetString();
if (consumed == "INDEXES") {
throw api_error::validation("INDEXES consumed capacity is not supported");
}
if (consumed != "TOTAL") {
throw api_error::validation(fmt::format("Unknown consumed capacity {}", consumed));
throw api_error::validation("Unknown consumed capacity "+ consumed);
}
return true;
}

View File

@@ -419,7 +419,7 @@ static std::optional<std::string> find_table_name(const rjson::value& request) {
if (!table_name_value->IsString()) {
throw api_error::validation("Non-string TableName field in request");
}
std::string table_name = rjson::to_string(*table_name_value);
std::string table_name = table_name_value->GetString();
return table_name;
}
@@ -546,7 +546,7 @@ get_table_or_view(service::storage_proxy& proxy, const rjson::value& request) {
// does exist but the index does not (ValidationException).
if (proxy.data_dictionary().has_schema(keyspace_name, orig_table_name)) {
throw api_error::validation(
fmt::format("Requested resource not found: Index '{}' for table '{}'", rjson::to_string_view(*index_name), orig_table_name));
fmt::format("Requested resource not found: Index '{}' for table '{}'", index_name->GetString(), orig_table_name));
} else {
throw api_error::resource_not_found(
fmt::format("Requested resource not found: Table: {} not found", orig_table_name));
@@ -587,7 +587,7 @@ static std::string get_string_attribute(const rjson::value& value, std::string_v
throw api_error::validation(fmt::format("Expected string value for attribute {}, got: {}",
attribute_name, value));
}
return rjson::to_string(*attribute_value);
return std::string(attribute_value->GetString(), attribute_value->GetStringLength());
}
// Convenience function for getting the value of a boolean attribute, or a
@@ -1080,8 +1080,8 @@ static void add_column(schema_builder& builder, const std::string& name, const r
}
for (auto it = attribute_definitions.Begin(); it != attribute_definitions.End(); ++it) {
const rjson::value& attribute_info = *it;
if (rjson::to_string_view(attribute_info["AttributeName"]) == name) {
std::string_view type = rjson::to_string_view(attribute_info["AttributeType"]);
if (attribute_info["AttributeName"].GetString() == name) {
auto type = attribute_info["AttributeType"].GetString();
data_type dt = parse_key_type(type);
if (computed_column) {
// Computed column for GSI (doesn't choose a real column as-is
@@ -1116,7 +1116,7 @@ static std::pair<std::string, std::string> parse_key_schema(const rjson::value&
throw api_error::validation("First element of KeySchema must be an object");
}
const rjson::value *v = rjson::find((*key_schema)[0], "KeyType");
if (!v || !v->IsString() || rjson::to_string_view(*v) != "HASH") {
if (!v || !v->IsString() || v->GetString() != std::string("HASH")) {
throw api_error::validation("First key in KeySchema must be a HASH key");
}
v = rjson::find((*key_schema)[0], "AttributeName");
@@ -1124,14 +1124,14 @@ static std::pair<std::string, std::string> parse_key_schema(const rjson::value&
throw api_error::validation("First key in KeySchema must have string AttributeName");
}
validate_attr_name_length(supplementary_context, v->GetStringLength(), true, "HASH key in KeySchema - ");
std::string hash_key = rjson::to_string(*v);
std::string hash_key = v->GetString();
std::string range_key;
if (key_schema->Size() == 2) {
if (!(*key_schema)[1].IsObject()) {
throw api_error::validation("Second element of KeySchema must be an object");
}
v = rjson::find((*key_schema)[1], "KeyType");
if (!v || !v->IsString() || rjson::to_string_view(*v) != "RANGE") {
if (!v || !v->IsString() || v->GetString() != std::string("RANGE")) {
throw api_error::validation("Second key in KeySchema must be a RANGE key");
}
v = rjson::find((*key_schema)[1], "AttributeName");
@@ -1887,8 +1887,8 @@ future<executor::request_return_type> executor::create_table(client_state& clien
std::string def_type = type_to_string(def.type);
for (auto it = attribute_definitions.Begin(); it != attribute_definitions.End(); ++it) {
const rjson::value& attribute_info = *it;
if (rjson::to_string_view(attribute_info["AttributeName"]) == def.name_as_text()) {
std::string_view type = rjson::to_string_view(attribute_info["AttributeType"]);
if (attribute_info["AttributeName"].GetString() == def.name_as_text()) {
auto type = attribute_info["AttributeType"].GetString();
if (type != def_type) {
throw api_error::validation(fmt::format("AttributeDefinitions redefined {} to {} already a key attribute of type {} in this table", def.name_as_text(), type, def_type));
}
@@ -2362,7 +2362,7 @@ put_or_delete_item::put_or_delete_item(const rjson::value& item, schema_ptr sche
_cells = std::vector<cell>();
_cells->reserve(item.MemberCount());
for (auto it = item.MemberBegin(); it != item.MemberEnd(); ++it) {
bytes column_name = to_bytes(rjson::to_string_view(it->name));
bytes column_name = to_bytes(it->name.GetString());
validate_value(it->value, "PutItem");
const column_definition* cdef = find_attribute(*schema, column_name);
validate_attr_name_length("", column_name.size(), cdef && cdef->is_primary_key());
@@ -2783,10 +2783,10 @@ static void verify_all_are_used(const rjson::value* field,
return;
}
for (auto it = field->MemberBegin(); it != field->MemberEnd(); ++it) {
if (!used.contains(rjson::to_string(it->name))) {
if (!used.contains(it->name.GetString())) {
throw api_error::validation(
format("{} has spurious '{}', not used in {}",
field_name, rjson::to_string_view(it->name), operation));
field_name, it->name.GetString(), operation));
}
}
}
@@ -3000,7 +3000,7 @@ future<executor::request_return_type> executor::delete_item(client_state& client
}
static schema_ptr get_table_from_batch_request(const service::storage_proxy& proxy, const rjson::value::ConstMemberIterator& batch_request) {
sstring table_name = rjson::to_sstring(batch_request->name); // JSON keys are always strings
sstring table_name = batch_request->name.GetString(); // JSON keys are always strings
try {
return proxy.data_dictionary().find_schema(sstring(executor::KEYSPACE_NAME_PREFIX) + table_name, table_name);
} catch(data_dictionary::no_such_column_family&) {
@@ -3055,44 +3055,17 @@ public:
}
};
future<> executor::cas_write(schema_ptr schema, service::cas_shard cas_shard, const dht::decorated_key& dk,
const std::vector<put_or_delete_item>& mutation_builders, service::client_state& client_state,
tracing::trace_state_ptr trace_state, service_permit permit)
{
if (!cas_shard.this_shard()) {
_stats.shard_bounce_for_lwt++;
return container().invoke_on(cas_shard.shard(), _ssg,
[cs = client_state.move_to_other_shard(),
&mb = mutation_builders,
&dk,
ks = schema->ks_name(),
cf = schema->cf_name(),
gt = tracing::global_trace_state_ptr(trace_state),
permit = std::move(permit)]
(executor& self) mutable {
return do_with(cs.get(), [&mb, &dk, ks = std::move(ks), cf = std::move(cf),
trace_state = tracing::trace_state_ptr(gt), &self]
(service::client_state& client_state) mutable {
auto schema = self._proxy.data_dictionary().find_schema(ks, cf);
service::cas_shard cas_shard(*schema, dk.token());
//FIXME: Instead of passing empty_service_permit() to the background operation,
// the current permit's lifetime should be prolonged, so that it's destructed
// only after all background operations are finished as well.
return self.cas_write(schema, std::move(cas_shard), dk, mb, client_state, std::move(trace_state), empty_service_permit());
});
});
}
static future<> cas_write(service::storage_proxy& proxy, schema_ptr schema, service::cas_shard cas_shard, const dht::decorated_key& dk, const std::vector<put_or_delete_item>& mutation_builders,
service::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit) {
auto timeout = executor::default_timeout();
auto op = std::make_unique<put_or_delete_item_cas_request>(schema, mutation_builders);
auto* op_ptr = op.get();
auto cdc_opts = cdc::per_request_options{
.alternator = true,
.alternator_streams_increased_compatibility =
schema->cdc_options().enabled() && _proxy.data_dictionary().get_config().alternator_streams_increased_compatibility(),
schema->cdc_options().enabled() && proxy.data_dictionary().get_config().alternator_streams_increased_compatibility(),
};
return _proxy.cas(schema, std::move(cas_shard), *op_ptr, nullptr, to_partition_ranges(dk),
return proxy.cas(schema, std::move(cas_shard), *op_ptr, nullptr, to_partition_ranges(dk),
{timeout, std::move(permit), client_state, trace_state},
db::consistency_level::LOCAL_SERIAL, db::consistency_level::LOCAL_QUORUM,
timeout, timeout, true, std::move(cdc_opts)).finally([op = std::move(op)]{}).discard_result();
@@ -3118,11 +3091,13 @@ struct schema_decorated_key_equal {
// FIXME: if we failed writing some of the mutations, need to return a list
// of these failed mutations rather than fail the whole write (issue #5650).
future<> executor::do_batch_write(
static future<> do_batch_write(service::storage_proxy& proxy,
smp_service_group ssg,
std::vector<std::pair<schema_ptr, put_or_delete_item>> mutation_builders,
service::client_state& client_state,
tracing::trace_state_ptr trace_state,
service_permit permit) {
service_permit permit,
stats& stats) {
if (mutation_builders.empty()) {
return make_ready_future<>();
}
@@ -3144,7 +3119,7 @@ future<> executor::do_batch_write(
mutations.push_back(b.second.build(b.first, now));
any_cdc_enabled |= b.first->cdc_options().enabled();
}
return _proxy.mutate(std::move(mutations),
return proxy.mutate(std::move(mutations),
db::consistency_level::LOCAL_QUORUM,
executor::default_timeout(),
trace_state,
@@ -3153,7 +3128,7 @@ future<> executor::do_batch_write(
false,
cdc::per_request_options{
.alternator = true,
.alternator_streams_increased_compatibility = any_cdc_enabled && _proxy.data_dictionary().get_config().alternator_streams_increased_compatibility(),
.alternator_streams_increased_compatibility = any_cdc_enabled && proxy.data_dictionary().get_config().alternator_streams_increased_compatibility(),
});
} else {
// Do the write via LWT:
@@ -3165,35 +3140,46 @@ future<> executor::do_batch_write(
schema_decorated_key_hash,
schema_decorated_key_equal>;
auto key_builders = std::make_unique<map_type>(1, schema_decorated_key_hash{}, schema_decorated_key_equal{});
for (auto&& b : std::move(mutation_builders)) {
auto [it, added] = key_builders->try_emplace(schema_decorated_key {
.schema = b.first,
.dk = dht::decorate_key(*b.first, b.second.pk())
});
for (auto& b : mutation_builders) {
auto dk = dht::decorate_key(*b.first, b.second.pk());
auto [it, added] = key_builders->try_emplace(schema_decorated_key{b.first, dk});
it->second.push_back(std::move(b.second));
}
auto* key_builders_ptr = key_builders.get();
return parallel_for_each(*key_builders_ptr, [this, &client_state, trace_state, permit = std::move(permit)] (const auto& e) {
_stats.write_using_lwt++;
return parallel_for_each(*key_builders_ptr, [&proxy, &client_state, &stats, trace_state, ssg, permit = std::move(permit)] (const auto& e) {
stats.write_using_lwt++;
auto desired_shard = service::cas_shard(*e.first.schema, e.first.dk.token());
auto s = e.first.schema;
if (desired_shard.this_shard()) {
return cas_write(proxy, e.first.schema, std::move(desired_shard), e.first.dk, e.second, client_state, trace_state, permit);
} else {
stats.shard_bounce_for_lwt++;
return proxy.container().invoke_on(desired_shard.shard(), ssg,
[cs = client_state.move_to_other_shard(),
&mb = e.second,
&dk = e.first.dk,
ks = e.first.schema->ks_name(),
cf = e.first.schema->cf_name(),
gt = tracing::global_trace_state_ptr(trace_state),
permit = std::move(permit)]
(service::storage_proxy& proxy) mutable {
return do_with(cs.get(), [&proxy, &mb, &dk, ks = std::move(ks), cf = std::move(cf),
trace_state = tracing::trace_state_ptr(gt)]
(service::client_state& client_state) mutable {
auto schema = proxy.data_dictionary().find_schema(ks, cf);
static const auto* injection_name = "alternator_executor_batch_write_wait";
return utils::get_local_injector().inject(injection_name, [s = std::move(s)] (auto& handler) -> future<> {
const auto ks = handler.get("keyspace");
const auto cf = handler.get("table");
const auto shard = std::atoll(handler.get("shard")->data());
if (ks == s->ks_name() && cf == s->cf_name() && shard == this_shard_id()) {
elogger.info("{}: hit", injection_name);
co_await handler.wait_for_message(std::chrono::steady_clock::now() + std::chrono::minutes{5});
elogger.info("{}: continue", injection_name);
}
}).then([&e, desired_shard = std::move(desired_shard),
&client_state, trace_state = std::move(trace_state), permit = std::move(permit), this]() mutable
{
return cas_write(e.first.schema, std::move(desired_shard), e.first.dk,
std::move(e.second), client_state, std::move(trace_state), std::move(permit));
});
// The desired_shard on the original shard remains alive for the duration
// of cas_write on this shard and prevents any tablet operations.
// However, we need a local instance of cas_shard on this shard
// to pass it to sp::cas, so we just create a new one.
service::cas_shard cas_shard(*schema, dk.token());
//FIXME: Instead of passing empty_service_permit() to the background operation,
// the current permit's lifetime should be prolonged, so that it's destructed
// only after all background operations are finished as well.
return cas_write(proxy, schema, std::move(cas_shard), dk, mb, client_state, std::move(trace_state), empty_service_permit());
});
}).finally([desired_shard = std::move(desired_shard)]{});
}
}).finally([key_builders = std::move(key_builders)]{});
}
}
@@ -3341,7 +3327,7 @@ future<executor::request_return_type> executor::batch_write_item(client_state& c
_stats.wcu_total[stats::DELETE_ITEM] += wcu_delete_units;
_stats.api_operations.batch_write_item_batch_total += total_items;
_stats.api_operations.batch_write_item_histogram.add(total_items);
co_await do_batch_write(std::move(mutation_builders), client_state, trace_state, std::move(permit));
co_await do_batch_write(_proxy, _ssg, std::move(mutation_builders), client_state, trace_state, std::move(permit), _stats);
// FIXME: Issue #5650: If we failed writing some of the updates,
// need to return a list of these failed updates in UnprocessedItems
// rather than fail the whole write (issue #5650).
@@ -3386,7 +3372,7 @@ static bool hierarchy_filter(rjson::value& val, const attribute_path_map_node<T>
}
rjson::value newv = rjson::empty_object();
for (auto it = v.MemberBegin(); it != v.MemberEnd(); ++it) {
std::string attr = rjson::to_string(it->name);
std::string attr = it->name.GetString();
auto x = members.find(attr);
if (x != members.end()) {
if (x->second) {
@@ -3606,7 +3592,7 @@ static std::optional<attrs_to_get> calculate_attrs_to_get(const rjson::value& re
const rjson::value& attributes_to_get = req["AttributesToGet"];
attrs_to_get ret;
for (auto it = attributes_to_get.Begin(); it != attributes_to_get.End(); ++it) {
attribute_path_map_add("AttributesToGet", ret, rjson::to_string(*it));
attribute_path_map_add("AttributesToGet", ret, it->GetString());
validate_attr_name_length("AttributesToGet", it->GetStringLength(), false);
}
if (ret.empty()) {
@@ -4272,12 +4258,12 @@ inline void update_item_operation::apply_attribute_updates(const std::unique_ptr
attribute_collector& modified_attrs, bool& any_updates, bool& any_deletes) const {
for (auto it = _attribute_updates->MemberBegin(); it != _attribute_updates->MemberEnd(); ++it) {
// Note that it.key() is the name of the column, *it is the operation
bytes column_name = to_bytes(rjson::to_string_view(it->name));
bytes column_name = to_bytes(it->name.GetString());
const column_definition* cdef = _schema->get_column_definition(column_name);
if (cdef && cdef->is_primary_key()) {
throw api_error::validation(format("UpdateItem cannot update key column {}", rjson::to_string_view(it->name)));
throw api_error::validation(format("UpdateItem cannot update key column {}", it->name.GetString()));
}
std::string action = rjson::to_string((it->value)["Action"]);
std::string action = (it->value)["Action"].GetString();
if (action == "DELETE") {
// The DELETE operation can do two unrelated tasks. Without a
// "Value" option, it is used to delete an attribute. With a
@@ -5474,7 +5460,7 @@ calculate_bounds_conditions(schema_ptr schema, const rjson::value& conditions) {
std::vector<query::clustering_range> ck_bounds;
for (auto it = conditions.MemberBegin(); it != conditions.MemberEnd(); ++it) {
sstring key = rjson::to_sstring(it->name);
std::string key = it->name.GetString();
const rjson::value& condition = it->value;
const rjson::value& comp_definition = rjson::get(condition, "ComparisonOperator");
@@ -5482,13 +5468,13 @@ calculate_bounds_conditions(schema_ptr schema, const rjson::value& conditions) {
const column_definition& pk_cdef = schema->partition_key_columns().front();
const column_definition* ck_cdef = schema->clustering_key_size() > 0 ? &schema->clustering_key_columns().front() : nullptr;
if (key == pk_cdef.name_as_text()) {
if (sstring(key) == pk_cdef.name_as_text()) {
if (!partition_ranges.empty()) {
throw api_error::validation("Currently only a single restriction per key is allowed");
}
partition_ranges.push_back(calculate_pk_bound(schema, pk_cdef, comp_definition, attr_list));
}
if (ck_cdef && key == ck_cdef->name_as_text()) {
if (ck_cdef && sstring(key) == ck_cdef->name_as_text()) {
if (!ck_bounds.empty()) {
throw api_error::validation("Currently only a single restriction per key is allowed");
}
@@ -5889,7 +5875,7 @@ future<executor::request_return_type> executor::list_tables(client_state& client
rjson::value* exclusive_start_json = rjson::find(request, "ExclusiveStartTableName");
rjson::value* limit_json = rjson::find(request, "Limit");
std::string exclusive_start = exclusive_start_json ? rjson::to_string(*exclusive_start_json) : "";
std::string exclusive_start = exclusive_start_json ? exclusive_start_json->GetString() : "";
int limit = limit_json ? limit_json->GetInt() : 100;
if (limit < 1 || limit > 100) {
co_return api_error::validation("Limit must be greater than 0 and no greater than 100");

View File

@@ -40,7 +40,6 @@ namespace cql3::selection {
namespace service {
class storage_proxy;
class cas_shard;
}
namespace cdc {
@@ -58,7 +57,6 @@ class schema_builder;
namespace alternator {
class rmw_operation;
class put_or_delete_item;
schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& request);
bool is_alternator_keyspace(const sstring& ks_name);
@@ -221,16 +219,6 @@ private:
static void describe_key_schema(rjson::value& parent, const schema&, std::unordered_map<std::string,std::string> * = nullptr, const std::map<sstring, sstring> *tags = nullptr);
future<> do_batch_write(
std::vector<std::pair<schema_ptr, put_or_delete_item>> mutation_builders,
service::client_state& client_state,
tracing::trace_state_ptr trace_state,
service_permit permit);
future<> cas_write(schema_ptr schema, service::cas_shard cas_shard, const dht::decorated_key& dk,
const std::vector<put_or_delete_item>& mutation_builders, service::client_state& client_state,
tracing::trace_state_ptr trace_state, service_permit permit);
public:
static void describe_key_schema(rjson::value& parent, const schema& schema, std::unordered_map<std::string,std::string>&, const std::map<sstring, sstring> *tags = nullptr);

View File

@@ -496,7 +496,7 @@ const std::pair<std::string, const rjson::value*> unwrap_set(const rjson::value&
return {"", nullptr};
}
auto it = v.MemberBegin();
const std::string it_key = rjson::to_string(it->name);
const std::string it_key = it->name.GetString();
if (it_key != "SS" && it_key != "BS" && it_key != "NS") {
return {std::move(it_key), nullptr};
}

View File

@@ -93,7 +93,7 @@ future<executor::request_return_type> executor::update_time_to_live(client_state
if (v->GetStringLength() < 1 || v->GetStringLength() > 255) {
co_return api_error::validation("The length of AttributeName must be between 1 and 255");
}
sstring attribute_name = rjson::to_sstring(*v);
sstring attribute_name(v->GetString(), v->GetStringLength());
co_await verify_permission(_enforce_authorization, _warn_authorization, client_state, schema, auth::permission::ALTER, _stats);
co_await db::modify_tags(_mm, schema->ks_name(), schema->cf_name(), [&](std::map<sstring, sstring>& tags_map) {

View File

@@ -3051,7 +3051,7 @@
},
{
"name":"incremental_mode",
"description":"Set the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to 'disabled' mode.",
"description":"Set the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to incremental mode.",
"required":false,
"allowMultiple":false,
"type":"string",

View File

@@ -349,9 +349,13 @@
"type":"long",
"description":"The shard the task is running on"
},
"creation_time":{
"type":"datetime",
"description":"The creation time of the task (when it was queued); extracted from the task_id UUID"
},
"start_time":{
"type":"datetime",
"description":"The start time of the task; unspecified (equal to epoch) when state == created"
"description":"The start time of the task (when execution began); unspecified (equal to epoch) when state == created"
},
"end_time":{
"type":"datetime",
@@ -398,13 +402,17 @@
"type":"boolean",
"description":"Boolean flag indicating whether the task can be aborted"
},
"creation_time":{
"type":"datetime",
"description":"The creation time of the task (when it was queued); extracted from the task_id UUID"
},
"start_time":{
"type":"datetime",
"description":"The start time of the task"
"description":"The start time of the task (when execution began); unspecified (equal to epoch) when state == created"
},
"end_time":{
"type":"datetime",
"description":"The end time of the task (unspecified when the task is not completed)"
"description":"The end time of the task (when execution completed); unspecified (equal to epoch) when the task is not completed"
},
"error":{
"type":"string",

View File

@@ -55,6 +55,7 @@ tm::task_status make_status(tasks::task_status status, sharded<gms::gossiper>& g
res.scope = status.scope;
res.state = status.state;
res.is_abortable = bool(status.is_abortable);
res.creation_time = get_time(status.creation_time);
res.start_time = get_time(status.start_time);
res.end_time = get_time(status.end_time);
res.error = status.error;
@@ -83,6 +84,7 @@ tm::task_stats make_stats(tasks::task_stats stats) {
res.table = stats.table;
res.entity = stats.entity;
res.shard = stats.shard;
res.creation_time = get_time(stats.creation_time);
res.start_time = get_time(stats.start_time);
res.end_time = get_time(stats.end_time);;
return res;

View File

@@ -9,6 +9,7 @@
#include "auth/allow_all_authenticator.hh"
#include "service/migration_manager.hh"
#include "utils/alien_worker.hh"
#include "utils/class_registrator.hh"
namespace auth {
@@ -22,6 +23,7 @@ static const class_registrator<
cql3::query_processor&,
::service::raft_group0_client&,
::service::migration_manager&,
cache&> registration("org.apache.cassandra.auth.AllowAllAuthenticator");
cache&,
utils::alien_worker&> registration("org.apache.cassandra.auth.AllowAllAuthenticator");
}

View File

@@ -14,6 +14,7 @@
#include "auth/authenticator.hh"
#include "auth/cache.hh"
#include "auth/common.hh"
#include "utils/alien_worker.hh"
namespace cql3 {
class query_processor;
@@ -29,7 +30,7 @@ extern const std::string_view allow_all_authenticator_name;
class allow_all_authenticator final : public authenticator {
public:
allow_all_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&) {
allow_all_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&) {
}
virtual future<> start() override {

View File

@@ -35,13 +35,14 @@ static const class_registrator<auth::authenticator
, cql3::query_processor&
, ::service::raft_group0_client&
, ::service::migration_manager&
, auth::cache&> cert_auth_reg(CERT_AUTH_NAME);
, auth::cache&
, utils::alien_worker&> cert_auth_reg(CERT_AUTH_NAME);
enum class auth::certificate_authenticator::query_source {
subject, altname
};
auth::certificate_authenticator::certificate_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, auth::cache&)
auth::certificate_authenticator::certificate_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, auth::cache&, utils::alien_worker&)
: _queries([&] {
auto& conf = qp.db().get_config();
auto queries = conf.auth_certificate_role_queries();
@@ -76,9 +77,9 @@ auth::certificate_authenticator::certificate_authenticator(cql3::query_processor
throw std::invalid_argument(fmt::format("Invalid source: {}", map.at(cfg_source_attr)));
}
continue;
} catch (const std::out_of_range&) {
} catch (std::out_of_range&) {
// just fallthrough
} catch (const boost::regex_error&) {
} catch (boost::regex_error&) {
std::throw_with_nested(std::invalid_argument(fmt::format("Invalid query expression: {}", map.at(cfg_query_attr))));
}
}

View File

@@ -10,6 +10,7 @@
#pragma once
#include "auth/authenticator.hh"
#include "utils/alien_worker.hh"
#include <boost/regex_fwd.hpp> // IWYU pragma: keep
namespace cql3 {
@@ -33,7 +34,7 @@ class certificate_authenticator : public authenticator {
enum class query_source;
std::vector<std::pair<query_source, boost::regex>> _queries;
public:
certificate_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);
certificate_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&);
~certificate_authenticator();
future<> start() override;

View File

@@ -94,7 +94,7 @@ static future<> create_legacy_metadata_table_if_missing_impl(
try {
co_return co_await mm.announce(co_await ::service::prepare_new_column_family_announcement(qp.proxy(), table, ts),
std::move(group0_guard), format("auth: create {} metadata table", table->cf_name()));
} catch (const exceptions::already_exists_exception&) {}
} catch (exceptions::already_exists_exception&) {}
}
}

View File

@@ -256,7 +256,7 @@ future<> default_authorizer::revoke_all(std::string_view role_name, ::service::g
} else {
co_await collect_mutations(_qp, mc, query, {sstring(role_name)});
}
} catch (const exceptions::request_execution_exception& e) {
} catch (exceptions::request_execution_exception& e) {
alogger.warn("CassandraAuthorizer failed to revoke all permissions of {}: {}", role_name, e);
}
}
@@ -293,13 +293,13 @@ future<> default_authorizer::revoke_all_legacy(const resource& resource) {
[resource](auto ep) {
try {
std::rethrow_exception(ep);
} catch (const exceptions::request_execution_exception& e) {
} catch (exceptions::request_execution_exception& e) {
alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
}
});
});
} catch (const exceptions::request_execution_exception& e) {
} catch (exceptions::request_execution_exception& e) {
alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
return make_ready_future();
}

View File

@@ -49,7 +49,8 @@ static const class_registrator<
cql3::query_processor&,
::service::raft_group0_client&,
::service::migration_manager&,
cache&> password_auth_reg("org.apache.cassandra.auth.PasswordAuthenticator");
cache&,
utils::alien_worker&> password_auth_reg("org.apache.cassandra.auth.PasswordAuthenticator");
static thread_local auto rng_for_salt = std::default_random_engine(std::random_device{}());
@@ -63,13 +64,14 @@ std::string password_authenticator::default_superuser(const db::config& cfg) {
password_authenticator::~password_authenticator() {
}
password_authenticator::password_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache)
password_authenticator::password_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache, utils::alien_worker& hashing_worker)
: _qp(qp)
, _group0_client(g0)
, _migration_manager(mm)
, _cache(cache)
, _stopped(make_ready_future<>())
, _superuser(default_superuser(qp.db().get_config()))
, _hashing_worker(hashing_worker)
{}
static bool has_salted_hash(const cql3::untyped_result_set_row& row) {
@@ -328,18 +330,20 @@ future<authenticated_user> password_authenticator::authenticate(
}
salted_hash = role->salted_hash;
}
const bool password_match = co_await passwords::check(password, *salted_hash);
const bool password_match = co_await _hashing_worker.submit<bool>([password = std::move(password), salted_hash] {
return passwords::check(password, *salted_hash);
});
if (!password_match) {
throw exceptions::authentication_exception("Username and/or password are incorrect");
}
co_return username;
} catch (const std::system_error &) {
} catch (std::system_error &) {
std::throw_with_nested(exceptions::authentication_exception("Could not verify password"));
} catch (const exceptions::request_execution_exception& e) {
} catch (exceptions::request_execution_exception& e) {
std::throw_with_nested(exceptions::authentication_exception(e.what()));
} catch (const exceptions::authentication_exception& e) {
} catch (exceptions::authentication_exception& e) {
std::throw_with_nested(e);
} catch (const exceptions::unavailable_exception& e) {
} catch (exceptions::unavailable_exception& e) {
std::throw_with_nested(exceptions::authentication_exception(e.get_message()));
} catch (...) {
std::throw_with_nested(exceptions::authentication_exception("authentication failed"));

View File

@@ -18,6 +18,7 @@
#include "auth/passwords.hh"
#include "auth/cache.hh"
#include "service/raft/raft_group0_client.hh"
#include "utils/alien_worker.hh"
namespace db {
class config;
@@ -48,12 +49,13 @@ class password_authenticator : public authenticator {
shared_promise<> _superuser_created_promise;
// We used to also support bcrypt, SHA-256, and MD5 (ref. scylladb#24524).
constexpr static auth::passwords::scheme _scheme = passwords::scheme::sha_512;
utils::alien_worker& _hashing_worker;
public:
static db::consistency_level consistency_for_user(std::string_view role_name);
static std::string default_superuser(const db::config&);
password_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);
password_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&);
~password_authenticator();

View File

@@ -7,8 +7,6 @@
*/
#include "auth/passwords.hh"
#include "utils/crypt_sha512.hh"
#include <seastar/core/coroutine.hh>
#include <cerrno>
@@ -23,46 +21,25 @@ static thread_local crypt_data tlcrypt = {};
namespace detail {
void verify_hashing_output(const char * res) {
if (!res || (res[0] == '*')) {
throw std::system_error(errno, std::system_category());
}
}
void verify_scheme(scheme scheme) {
const sstring random_part_of_salt = "aaaabbbbccccdddd";
const sstring salt = sstring(prefix_for_scheme(scheme)) + random_part_of_salt;
const char* e = crypt_r("fisk", salt.c_str(), &tlcrypt);
try {
verify_hashing_output(e);
} catch (const std::system_error& ex) {
throw no_supported_schemes();
if (e && (e[0] != '*')) {
return;
}
throw no_supported_schemes();
}
sstring hash_with_salt(const sstring& pass, const sstring& salt) {
auto res = crypt_r(pass.c_str(), salt.c_str(), &tlcrypt);
verify_hashing_output(res);
return res;
}
seastar::future<sstring> hash_with_salt_async(const sstring& pass, const sstring& salt) {
sstring res;
// Only SHA-512 hashes for passphrases shorter than 256 bytes can be computed using
// the __crypt_sha512 method. For other computations, we fall back to the
// crypt_r implementation from `<crypt.h>`, which can stall.
if (salt.starts_with(prefix_for_scheme(scheme::sha_512)) && pass.size() <= 255) {
char buf[128];
const char * output_ptr = co_await __crypt_sha512(pass.c_str(), salt.c_str(), buf);
verify_hashing_output(output_ptr);
res = output_ptr;
} else {
const char * output_ptr = crypt_r(pass.c_str(), salt.c_str(), &tlcrypt);
verify_hashing_output(output_ptr);
res = output_ptr;
if (!res || (res[0] == '*')) {
throw std::system_error(errno, std::system_category());
}
co_return res;
return res;
}
std::string_view prefix_for_scheme(scheme c) noexcept {
@@ -81,9 +58,8 @@ no_supported_schemes::no_supported_schemes()
: std::runtime_error("No allowed hashing schemes are supported on this system") {
}
seastar::future<bool> check(const sstring& pass, const sstring& salted_hash) {
const auto pwd_hash = co_await detail::hash_with_salt_async(pass, salted_hash);
co_return pwd_hash == salted_hash;
bool check(const sstring& pass, const sstring& salted_hash) {
return detail::hash_with_salt(pass, salted_hash) == salted_hash;
}
} // namespace auth::passwords

View File

@@ -11,7 +11,6 @@
#include <random>
#include <stdexcept>
#include <seastar/core/future.hh>
#include <seastar/core/sstring.hh>
#include "seastarx.hh"
@@ -76,19 +75,10 @@ sstring generate_salt(RandomNumberEngine& g, scheme scheme) {
///
/// Hash a password combined with an implementation-specific salt string.
/// Deprecated in favor of `hash_with_salt_async`.
///
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
///
[[deprecated("Use hash_with_salt_async instead")]] sstring hash_with_salt(const sstring& pass, const sstring& salt);
///
/// Async version of `hash_with_salt` that returns a future.
/// If possible, hashing uses `coroutine::maybe_yield` to prevent reactor stalls.
///
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
///
seastar::future<sstring> hash_with_salt_async(const sstring& pass, const sstring& salt);
sstring hash_with_salt(const sstring& pass, const sstring& salt);
} // namespace detail
@@ -117,6 +107,6 @@ sstring hash(const sstring& pass, RandomNumberEngine& g, scheme scheme) {
///
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
///
seastar::future<bool> check(const sstring& pass, const sstring& salted_hash);
bool check(const sstring& pass, const sstring& salted_hash);
} // namespace auth::passwords

View File

@@ -35,9 +35,10 @@ static const class_registrator<
cql3::query_processor&,
::service::raft_group0_client&,
::service::migration_manager&,
cache&> saslauthd_auth_reg("com.scylladb.auth.SaslauthdAuthenticator");
cache&,
utils::alien_worker&> saslauthd_auth_reg("com.scylladb.auth.SaslauthdAuthenticator");
saslauthd_authenticator::saslauthd_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, cache&)
saslauthd_authenticator::saslauthd_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&)
: _socket_path(qp.db().get_config().saslauthd_socket_path())
{}

View File

@@ -12,6 +12,7 @@
#include "auth/authenticator.hh"
#include "auth/cache.hh"
#include "utils/alien_worker.hh"
namespace cql3 {
class query_processor;
@@ -29,7 +30,7 @@ namespace auth {
class saslauthd_authenticator : public authenticator {
sstring _socket_path; ///< Path to the domain socket on which saslauthd is listening.
public:
saslauthd_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);
saslauthd_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&,utils::alien_worker&);
future<> start() override;

View File

@@ -191,7 +191,8 @@ service::service(
::service::migration_manager& mm,
const service_config& sc,
maintenance_socket_enabled used_by_maintenance_socket,
cache& cache)
cache& cache,
utils::alien_worker& hashing_worker)
: service(
std::move(c),
cache,
@@ -199,7 +200,7 @@ service::service(
g0,
mn,
create_object<authorizer>(sc.authorizer_java_name, qp, g0, mm),
create_object<authenticator>(sc.authenticator_java_name, qp, g0, mm, cache),
create_object<authenticator>(sc.authenticator_java_name, qp, g0, mm, cache, hashing_worker),
create_object<role_manager>(sc.role_manager_java_name, qp, g0, mm, cache),
used_by_maintenance_socket) {
}
@@ -225,7 +226,7 @@ future<> service::create_legacy_keyspace_if_missing(::service::migration_manager
try {
co_return co_await mm.announce(::service::prepare_new_keyspace_announcement(db.real_database(), ksm, ts),
std::move(group0_guard), seastar::format("auth_service: create {} keyspace", meta::legacy::AUTH_KS));
} catch (const ::service::group0_concurrent_modification&) {
} catch (::service::group0_concurrent_modification&) {
log.info("Concurrent operation is detected while creating {} keyspace, retrying.", meta::legacy::AUTH_KS);
}
}

View File

@@ -27,6 +27,7 @@
#include "cql3/description.hh"
#include "seastarx.hh"
#include "service/raft/raft_group0_client.hh"
#include "utils/alien_worker.hh"
#include "utils/observable.hh"
#include "utils/serialized_action.hh"
#include "service/maintenance_mode.hh"
@@ -130,7 +131,8 @@ public:
::service::migration_manager&,
const service_config&,
maintenance_socket_enabled,
cache&);
cache&,
utils::alien_worker&);
future<> start(::service::migration_manager&, db::system_keyspace&);

View File

@@ -192,7 +192,7 @@ future<> standard_role_manager::legacy_create_default_role_if_missing() {
{_superuser},
cql3::query_processor::cache_internal::no).discard_result();
log.info("Created default superuser role '{}'.", _superuser);
} catch (const exceptions::unavailable_exception& e) {
} catch(const exceptions::unavailable_exception& e) {
log.warn("Skipped default role setup: some nodes were not ready; will retry");
throw e;
}

View File

@@ -38,8 +38,8 @@ class transitional_authenticator : public authenticator {
public:
static const sstring PASSWORD_AUTHENTICATOR_NAME;
transitional_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache)
: transitional_authenticator(std::make_unique<password_authenticator>(qp, g0, mm, cache)) {
transitional_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache, utils::alien_worker& hashing_worker)
: transitional_authenticator(std::make_unique<password_authenticator>(qp, g0, mm, cache, hashing_worker)) {
}
transitional_authenticator(std::unique_ptr<authenticator> a)
: _authenticator(std::move(a)) {
@@ -81,7 +81,7 @@ public:
}).handle_exception([](auto ep) {
try {
std::rethrow_exception(ep);
} catch (const exceptions::authentication_exception&) {
} catch (exceptions::authentication_exception&) {
// return anon user
return make_ready_future<authenticated_user>(anonymous_user());
}
@@ -126,7 +126,7 @@ public:
virtual bytes evaluate_response(bytes_view client_response) override {
try {
return _sasl->evaluate_response(client_response);
} catch (const exceptions::authentication_exception&) {
} catch (exceptions::authentication_exception&) {
_complete = true;
return {};
}
@@ -141,7 +141,7 @@ public:
return _sasl->get_authenticated_user().handle_exception([](auto ep) {
try {
std::rethrow_exception(ep);
} catch (const exceptions::authentication_exception&) {
} catch (exceptions::authentication_exception&) {
// return anon user
return make_ready_future<authenticated_user>(anonymous_user());
}
@@ -241,7 +241,8 @@ static const class_registrator<
cql3::query_processor&,
::service::raft_group0_client&,
::service::migration_manager&,
auth::cache&> transitional_authenticator_reg(auth::PACKAGE_NAME + "TransitionalAuthenticator");
auth::cache&,
utils::alien_worker&> transitional_authenticator_reg(auth::PACKAGE_NAME + "TransitionalAuthenticator");
static const class_registrator<
auth::authorizer,

View File

@@ -859,7 +859,6 @@ scylla_core = (['message/messaging_service.cc',
'utils/alien_worker.cc',
'utils/array-search.cc',
'utils/base64.cc',
'utils/crypt_sha512.cc',
'utils/logalloc.cc',
'utils/large_bitset.cc',
'utils/buffer_input_stream.cc',
@@ -1480,6 +1479,7 @@ deps = {
pure_boost_tests = set([
'test/boost/anchorless_list_test',
'test/boost/auth_passwords_test',
'test/boost/auth_resource_test',
'test/boost/big_decimal_test',
'test/boost/caching_options_test',

View File

@@ -165,8 +165,7 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
service::topology_mutation_builder builder(ts);
service::topology_request_tracking_mutation_builder rtbuilder{global_request_id, qp.proxy().features().topology_requests_type_column};
rtbuilder.set("done", false)
.set("start_time", db_clock::now());
rtbuilder.set("done", false);
if (!qp.proxy().features().topology_global_request_queue) {
builder.set_global_topology_request(service::global_topology_request::keyspace_rf_change);
builder.set_global_topology_request_id(global_request_id);

View File

@@ -1744,115 +1744,6 @@ bool should_generate_view_updates_on_this_shard(const schema_ptr& base, const lo
&& std::ranges::contains(shards, this_shard_id());
}
static endpoints_to_update get_view_natural_endpoint_vnodes(
locator::host_id me,
std::vector<std::reference_wrapper<const locator::node>> base_nodes,
std::vector<std::reference_wrapper<const locator::node>> view_nodes,
locator::endpoint_dc_rack my_location,
const locator::network_topology_strategy* network_topology,
replica::cf_stats& cf_stats) {
using node_vector = std::vector<std::reference_wrapper<const locator::node>>;
node_vector base_endpoints, view_endpoints;
auto& my_datacenter = my_location.dc;
auto process_candidate = [&] (node_vector& nodes, std::reference_wrapper<const locator::node> node) {
if (!network_topology || node.get().dc() == my_datacenter) {
nodes.emplace_back(node);
}
};
for (auto&& base_node : base_nodes) {
process_candidate(base_endpoints, base_node);
}
for (auto&& view_node : view_nodes) {
auto it = std::ranges::find(base_endpoints, view_node.get().host_id(), std::mem_fn(&locator::node::host_id));
// If this base replica is also one of the view replicas, we use
// ourselves as the view replica.
// We don't return an extra endpoint, as it's only needed when
// using tablets (so !use_legacy_self_pairing)
if (view_node.get().host_id() == me && it != base_endpoints.end()) {
return {.natural_endpoint = me};
}
// We have to remove any endpoint which is shared between the base
// and the view, as it will select itself and throw off the counts
// otherwise.
if (it != base_endpoints.end()) {
base_endpoints.erase(it);
} else if (!network_topology || view_node.get().dc() == my_datacenter) {
view_endpoints.push_back(view_node);
}
}
auto base_it = std::ranges::find(base_endpoints, me, std::mem_fn(&locator::node::host_id));
if (base_it == base_endpoints.end()) {
// This node is not a base replica of this key, so we return empty
// FIXME: This case shouldn't happen, and if it happens, a view update
// would be lost.
++cf_stats.total_view_updates_on_wrong_node;
vlogger.warn("Could not find {} in base_endpoints={}", me,
base_endpoints | std::views::transform(std::mem_fn(&locator::node::host_id)));
return {};
}
size_t idx = base_it - base_endpoints.begin();
return {.natural_endpoint = view_endpoints[idx].get().host_id()};
}
static std::optional<locator::host_id> get_unpaired_view_endpoint(
std::vector<std::reference_wrapper<const locator::node>> base_nodes,
std::vector<std::reference_wrapper<const locator::node>> view_nodes,
replica::cf_stats& cf_stats) {
std::unordered_set<locator::endpoint_dc_rack> base_dc_racks;
for (auto&& base_node : base_nodes) {
if (base_dc_racks.contains(base_node.get().dc_rack())) {
// We can't do rack-aware pairing if there are multiple replicas in the same rack.
++cf_stats.total_view_updates_failed_pairing;
vlogger.warn("Can't perform base-view pairing in this topology. There are multiple base table replicas in the same dc/rack({}/{}):",
base_node.get().dc(), base_node.get().rack());
return std::nullopt;
}
base_dc_racks.insert(base_node.get().dc_rack());
}
std::unordered_set<locator::endpoint_dc_rack> paired_view_dc_racks;
std::unordered_map<locator::endpoint_dc_rack, locator::host_id> unpaired_view_dc_rack_replicas;
for (auto&& view_node : view_nodes) {
if (paired_view_dc_racks.contains(view_node.get().dc_rack()) || unpaired_view_dc_rack_replicas.contains(view_node.get().dc_rack())) {
// We can't do rack-aware pairing if there are multiple replicas in the same rack.
++cf_stats.total_view_updates_failed_pairing;
vlogger.warn("Can't perform base-view pairing in this topology. There are multiple view table replicas in the same dc/rack({}/{}):",
view_node.get().dc(), view_node.get().rack());
return std::nullopt;
}
// Track unpaired replicas in both sets
if (base_dc_racks.contains(view_node.get().dc_rack())) {
paired_view_dc_racks.insert(view_node.get().dc_rack());
} else {
unpaired_view_dc_rack_replicas.insert({view_node.get().dc_rack(), view_node.get().host_id()});
}
}
if (unpaired_view_dc_rack_replicas.size() > 0) {
// There are view replicas that can't be paired with any base replica
// This can happen as a result of an RF change when the view replica finishes streaming
// before the base replica.
// Because of this, a view replica might not get paired with any base replica, so we need
// to send an additional update to it.
++cf_stats.total_view_updates_due_to_replica_count_mismatch;
auto extra_replica = unpaired_view_dc_rack_replicas.begin()->second;
unpaired_view_dc_rack_replicas.erase(unpaired_view_dc_rack_replicas.begin());
if (unpaired_view_dc_rack_replicas.size() > 0) {
// We only expect one extra replica to appear due to an RF change. If there's more, that's an error,
// but we'll still perform updates to the paired and last replicas to minimize degradation.
vlogger.warn("There are too many view endpoints for base-view pairing. View updates may get lost on view_endpoints={}",
unpaired_view_dc_rack_replicas | std::views::values);
}
return extra_replica;
}
return std::nullopt;
}
// Calculate the node ("natural endpoint") to which this node should send
// a view update.
//
@@ -1865,19 +1756,29 @@ static std::optional<locator::host_id> get_unpaired_view_endpoint(
// of this function is to find, assuming that this node is one of the base
// replicas for a given partition, the paired view replica.
//
// When using vnodes, we have an optimization called "self-pairing" - if a single
// node is both a base replica and a view replica for a write, the pairing is
// modified so that this node sends the update to itself and this node is removed
// from the lists of nodes paired by index. This self-pairing optimization can
// cause the pairing to change after view ranges are moved between nodes.
// In the past, we used an optimization called "self-pairing" that if a single
// node was both a base replica and a view replica for a write, the pairing is
// modified so that this node would send the update to itself. This self-
// pairing optimization could cause the pairing to change after view ranges
// are moved between nodes, so currently we only use it if
// use_legacy_self_pairing is set to true. When using tablets - where range
// movements are common - it is strongly recommended to set it to false.
//
// If the keyspace's replication strategy is a NetworkTopologyStrategy,
// we pair only nodes in the same datacenter.
//
// If the table uses tablets, then pairing is rack-aware. In this case, in each
// rack where we have a base replica there is also one replica of each view tablet.
// Therefore, the base replicas are naturally paired with the view replicas that
// are in the same rack.
// When use_legacy_self_pairing is enabled, if one of the base replicas
// also happens to be a view replica, it is paired with itself
// (with the other nodes paired by order in the list
// after taking this node out).
//
// If the table uses tablets and the replication strategy is NetworkTopologyStrategy
// and the replication factor in the node's datacenter is a multiple of the number
// of racks in the datacenter, then pairing is rack-aware. In this case,
// all racks have the same number of replicas, and those are never migrated
// outside their racks. Therefore, the base replicas are naturally paired with the
// view replicas that are in the same rack, based on the ordinal position.
// Note that typically, there is a single replica per rack and pairing is trivial.
//
// If the assumption that the given base token belongs to this replica
// does not hold, we return an empty optional.
@@ -1905,12 +1806,19 @@ endpoints_to_update get_view_natural_endpoint(
const locator::abstract_replication_strategy& replication_strategy,
const dht::token& base_token,
const dht::token& view_token,
bool use_tablets,
bool use_legacy_self_pairing,
bool use_tablets_rack_aware_view_pairing,
replica::cf_stats& cf_stats) {
auto& topology = base_erm->get_token_metadata_ptr()->get_topology();
auto& view_topology = view_erm->get_token_metadata_ptr()->get_topology();
auto& my_location = topology.get_location(me);
auto& my_datacenter = my_location.dc;
auto* network_topology = dynamic_cast<const locator::network_topology_strategy*>(&replication_strategy);
auto rack_aware_pairing = use_tablets_rack_aware_view_pairing && network_topology;
bool simple_rack_aware_pairing = false;
using node_vector = std::vector<std::reference_wrapper<const locator::node>>;
node_vector orig_base_endpoints, orig_view_endpoints;
node_vector base_endpoints, view_endpoints;
auto resolve = [&] (const locator::topology& topology, const locator::host_id& ep, bool is_view) -> const locator::node& {
if (auto* np = topology.find_node(ep)) {
@@ -1921,7 +1829,6 @@ endpoints_to_update get_view_natural_endpoint(
// We need to use get_replicas() for pairing to be stable in case base or view tablet
// is rebuilding a replica which has left the ring. get_natural_endpoints() filters such replicas.
using node_vector = std::vector<std::reference_wrapper<const locator::node>>;
auto base_nodes = base_erm->get_replicas(base_token) | std::views::transform([&] (const locator::host_id& ep) -> const locator::node& {
return resolve(topology, ep, false);
}) | std::ranges::to<node_vector>();
@@ -1945,43 +1852,231 @@ endpoints_to_update get_view_natural_endpoint(
// note that the recursive call will not recurse again because leaving_base is in base_nodes.
auto leaving_base = it->get().host_id();
return get_view_natural_endpoint(leaving_base, base_erm, view_erm, replication_strategy, base_token,
view_token, use_tablets, cf_stats);
view_token, use_legacy_self_pairing, use_tablets_rack_aware_view_pairing, cf_stats);
}
}
}
if (!use_tablets) {
return get_view_natural_endpoint_vnodes(
me,
base_nodes,
view_nodes,
my_location,
network_topology,
cf_stats);
std::function<bool(const locator::node&)> is_candidate;
if (network_topology) {
is_candidate = [&] (const locator::node& node) { return node.dc() == my_datacenter; };
} else {
is_candidate = [&] (const locator::node&) { return true; };
}
auto process_candidate = [&] (node_vector& nodes, std::reference_wrapper<const locator::node> node) {
if (is_candidate(node)) {
nodes.emplace_back(node);
}
};
for (auto&& base_node : base_nodes) {
process_candidate(base_endpoints, base_node);
}
std::optional<locator::host_id> paired_replica;
for (auto&& view_node : view_nodes) {
if (view_node.get().dc_rack() == my_location) {
paired_replica = view_node.get().host_id();
break;
if (use_legacy_self_pairing) {
for (auto&& view_node : view_nodes) {
auto it = std::ranges::find(base_endpoints, view_node.get().host_id(), std::mem_fn(&locator::node::host_id));
// If this base replica is also one of the view replicas, we use
// ourselves as the view replica.
// We don't return an extra endpoint, as it's only needed when
// using tablets (so !use_legacy_self_pairing)
if (view_node.get().host_id() == me && it != base_endpoints.end()) {
return {.natural_endpoint = me};
}
// We have to remove any endpoint which is shared between the base
// and the view, as it will select itself and throw off the counts
// otherwise.
if (it != base_endpoints.end()) {
base_endpoints.erase(it);
} else if (is_candidate(view_node)) {
view_endpoints.push_back(view_node);
}
}
} else {
for (auto&& view_node : view_nodes) {
process_candidate(view_endpoints, view_node);
}
}
if (paired_replica && base_nodes.size() == view_nodes.size()) {
// We don't need to find any extra replicas, so we can return early
return {.natural_endpoint = paired_replica};
// Try optimizing for simple rack-aware pairing
// If the numbers of base and view replica differ, that means an RF change is taking place
// and we can't use simple rack-aware pairing.
if (rack_aware_pairing && base_endpoints.size() == view_endpoints.size()) {
auto dc_rf = network_topology->get_replication_factor(my_datacenter);
const auto& racks = topology.get_datacenter_rack_nodes().at(my_datacenter);
// Simple rack-aware pairing is possible when the datacenter replication factor
// is a multiple of the number of racks in the datacenter.
if (dc_rf % racks.size() == 0) {
simple_rack_aware_pairing = true;
size_t rack_rf = dc_rf / racks.size();
// If any rack doesn't have enough nodes to satisfy the per-rack rf
// simple rack-aware pairing is disabled.
for (const auto& [rack, nodes] : racks) {
if (nodes.size() < rack_rf) {
simple_rack_aware_pairing = false;
break;
}
}
}
if (dc_rf != base_endpoints.size()) {
// If the datacenter replication factor is not equal to the number of base replicas,
// we're in progress of a RF change and we can't use simple rack-aware pairing.
simple_rack_aware_pairing = false;
}
if (simple_rack_aware_pairing) {
std::erase_if(base_endpoints, [&] (const locator::node& node) { return node.dc_rack() != my_location; });
std::erase_if(view_endpoints, [&] (const locator::node& node) { return node.dc_rack() != my_location; });
}
}
if (!paired_replica) {
// We couldn't find any view replica in our rack
orig_base_endpoints = base_endpoints;
orig_view_endpoints = view_endpoints;
// For the complex rack_aware_pairing case, nodes are already filtered by datacenter
// Use best-match, for the minimum number of base and view replicas in each rack,
// and ordinal match for the rest.
std::optional<std::reference_wrapper<const locator::node>> paired_replica;
if (rack_aware_pairing && !simple_rack_aware_pairing) {
struct indexed_replica {
size_t idx;
std::reference_wrapper<const locator::node> node;
};
std::unordered_map<sstring, std::vector<indexed_replica>> base_racks, view_racks;
// First, index all replicas by rack
auto index_replica_set = [] (std::unordered_map<sstring, std::vector<indexed_replica>>& racks, const node_vector& replicas) {
size_t idx = 0;
for (const auto& r: replicas) {
racks[r.get().rack()].emplace_back(idx++, r);
}
};
index_replica_set(base_racks, base_endpoints);
index_replica_set(view_racks, view_endpoints);
// Try optimistically pairing `me` first
const auto& my_base_replicas = base_racks[my_location.rack];
auto base_it = std::ranges::find(my_base_replicas, me, [] (const indexed_replica& ir) { return ir.node.get().host_id(); });
if (base_it == my_base_replicas.end()) {
return {};
}
const auto& my_view_replicas = view_racks[my_location.rack];
size_t idx = base_it - my_base_replicas.begin();
if (idx < my_view_replicas.size()) {
if (orig_view_endpoints.size() <= orig_base_endpoints.size()) {
return {.natural_endpoint = my_view_replicas[idx].node.get().host_id()};
} else {
// If the number of view replicas is larger than the number of base replicas,
// we need to find the unpaired view replica, so we can't return yet.
paired_replica = my_view_replicas[idx].node;
}
}
// Collect all unpaired base and view replicas,
// where the number of replicas in the base rack is different than the respective view rack
std::vector<indexed_replica> unpaired_base_replicas, unpaired_view_replicas;
for (const auto& [rack, base_replicas] : base_racks) {
const auto& view_replicas = view_racks[rack];
for (auto i = view_replicas.size(); i < base_replicas.size(); ++i) {
unpaired_base_replicas.emplace_back(base_replicas[i]);
}
}
for (const auto& [rack, view_replicas] : view_racks) {
const auto& base_replicas = base_racks[rack];
for (auto i = base_replicas.size(); i < view_replicas.size(); ++i) {
unpaired_view_replicas.emplace_back(view_replicas[i]);
}
}
// Sort by the original ordinality, and copy the sorted results
// back into {base,view}_endpoints, for backward compatible processing below.
std::ranges::sort(unpaired_base_replicas, std::less(), std::mem_fn(&indexed_replica::idx));
base_endpoints.clear();
std::ranges::transform(unpaired_base_replicas, std::back_inserter(base_endpoints), std::mem_fn(&indexed_replica::node));
std::ranges::sort(unpaired_view_replicas, std::less(), std::mem_fn(&indexed_replica::idx));
view_endpoints.clear();
std::ranges::transform(unpaired_view_replicas, std::back_inserter(view_endpoints), std::mem_fn(&indexed_replica::node));
}
auto base_it = std::ranges::find(base_endpoints, me, std::mem_fn(&locator::node::host_id));
if (!paired_replica && base_it == base_endpoints.end()) {
// This node is not a base replica of this key, so we return empty
// FIXME: This case shouldn't happen, and if it happens, a view update
// would be lost.
++cf_stats.total_view_updates_on_wrong_node;
vlogger.warn("Could not find {} in base_endpoints={}", me,
orig_base_endpoints | std::views::transform(std::mem_fn(&locator::node::host_id)));
return {};
}
size_t idx = base_it - base_endpoints.begin();
std::optional<std::reference_wrapper<const locator::node>> no_pairing_replica;
if (!paired_replica && idx >= view_endpoints.size()) {
// There are fewer view replicas than base replicas
// FIXME: This might still happen when reducing replication factor with tablets,
// see https://github.com/scylladb/scylladb/issues/21492
++cf_stats.total_view_updates_failed_pairing;
vlogger.warn("Could not find a view replica in the same rack as base replica {} for base_endpoints={} view_endpoints={}",
me,
base_nodes | std::views::transform(std::mem_fn(&locator::node::host_id)),
view_nodes | std::views::transform(std::mem_fn(&locator::node::host_id)));
vlogger.warn("Could not pair {}: rack_aware={} base_endpoints={} view_endpoints={}", me,
rack_aware_pairing ? (simple_rack_aware_pairing ? "simple" : "complex") : "none",
orig_base_endpoints | std::views::transform(std::mem_fn(&locator::node::host_id)),
orig_view_endpoints | std::views::transform(std::mem_fn(&locator::node::host_id)));
return {};
} else if (base_endpoints.size() < view_endpoints.size()) {
// There are fewer base replicas than view replicas.
// This can happen as a result of an RF change when the view replica finishes streaming
// before the base replica.
// Because of this, a view replica might not get paired with any base replica, so we need
// to send an additional update to it.
++cf_stats.total_view_updates_due_to_replica_count_mismatch;
no_pairing_replica = view_endpoints.back();
if (base_endpoints.size() < view_endpoints.size() - 1) {
// We only expect one extra replica to appear due to an RF change. If there's more, that's an error,
// but we'll still perform updates to the paired and last replicas to minimize degradation.
vlogger.warn("There are too many view endpoints for base-view pairing. View updates may get lost on view_endpoints={}",
std::span(view_endpoints.begin() + base_endpoints.size(), view_endpoints.end() - 1) | std::views::transform(std::mem_fn(&locator::node::host_id)));
}
}
std::optional<locator::host_id> no_pairing_replica = get_unpaired_view_endpoint(base_nodes, view_nodes, cf_stats);
return {.natural_endpoint = paired_replica,
.endpoint_with_no_pairing = no_pairing_replica};
if (!paired_replica) {
paired_replica = view_endpoints[idx];
}
if (!no_pairing_replica && base_nodes.size() < view_nodes.size()) {
// This can happen when the view replica with no pairing is in another DC.
// We need to send an update to it if there are no base replicas in that DC yet,
// as it won't receive updates otherwise.
std::unordered_set<sstring> dcs_with_base_replicas;
for (const auto& base_node : base_nodes) {
dcs_with_base_replicas.insert(base_node.get().dc());
}
for (const auto& view_node : view_nodes) {
if (!dcs_with_base_replicas.contains(view_node.get().dc())) {
++cf_stats.total_view_updates_due_to_replica_count_mismatch;
no_pairing_replica = view_node;
break;
}
}
}
// https://github.com/scylladb/scylladb/issues/19439
// With tablets, a node being replaced might transition to "left" state
// but still be kept as a replica.
// As of writing this hints are not prepared to handle nodes that are left
// but are still replicas. Therefore, there is no other sensible option
// right now but to give up attempt to send the update or write a hint
// to the paired, permanently down replica.
// We use the same workaround for the extra replica.
auto return_host_id_if_not_left = [] (const auto& replica) -> std::optional<locator::host_id> {
if (!replica) {
return std::nullopt;
}
const auto& node = replica->get();
if (!node.left()) {
return node.host_id();
} else {
return std::nullopt;
}
};
return {.natural_endpoint = return_host_id_if_not_left(paired_replica),
.endpoint_with_no_pairing = return_host_id_if_not_left(no_pairing_replica)};
}
static future<> apply_to_remote_endpoints(service::storage_proxy& proxy, locator::effective_replication_map_ptr ermp,
@@ -2041,6 +2136,12 @@ future<> view_update_generator::mutate_MV(
{
auto& ks = _db.find_keyspace(base->ks_name());
auto& replication = ks.get_replication_strategy();
// We set legacy self-pairing for old vnode-based tables (for backward
// compatibility), and unset it for tablets - where range movements
// are more frequent and backward compatibility is less important.
// TODO: Maybe allow users to set use_legacy_self_pairing explicitly
// on a view, like we have the synchronous_updates_flag.
bool use_legacy_self_pairing = !ks.uses_tablets();
std::unordered_map<table_id, locator::effective_replication_map_ptr> erms;
auto get_erm = [&] (table_id id) {
auto it = erms.find(id);
@@ -2053,6 +2154,10 @@ future<> view_update_generator::mutate_MV(
for (const auto& mut : view_updates) {
(void)get_erm(mut.s->id());
}
// Enable rack-aware view updates pairing for tablets
// when the cluster feature is enabled so that all replicas agree
// on the pairing algorithm.
bool use_tablets_rack_aware_view_pairing = _db.features().tablet_rack_aware_view_pairing && ks.uses_tablets();
auto me = base_ermp->get_topology().my_host_id();
static constexpr size_t max_concurrent_updates = 128;
co_await utils::get_local_injector().inject("delay_before_get_view_natural_endpoint", 8000ms);
@@ -2060,7 +2165,7 @@ future<> view_update_generator::mutate_MV(
auto view_token = dht::get_token(*mut.s, mut.fm.key());
auto view_ermp = erms.at(mut.s->id());
auto [target_endpoint, no_pairing_endpoint] = get_view_natural_endpoint(me, base_ermp, view_ermp, replication, base_token, view_token,
ks.uses_tablets(), cf_stats);
use_legacy_self_pairing, use_tablets_rack_aware_view_pairing, cf_stats);
auto remote_endpoints = view_ermp->get_pending_replicas(view_token);
auto memory_units = seastar::make_lw_shared<db::timeout_semaphore_units>(pending_view_update_memory_units.split(memory_usage_of(mut)));
if (no_pairing_endpoint) {

View File

@@ -305,7 +305,8 @@ endpoints_to_update get_view_natural_endpoint(
const locator::abstract_replication_strategy& replication_strategy,
const dht::token& base_token,
const dht::token& view_token,
bool use_tablets,
bool use_legacy_self_pairing,
bool use_tablets_basic_rack_aware_view_pairing,
replica::cf_stats& cf_stats);
/// Verify that the provided keyspace is eligible for storing materialized views.

View File

@@ -1 +1 @@
SCYLLA_NODE_EXPORTER_ARGS="--collector.interrupts --collector.ethtool.metrics-include='(bw_in_allowance_exceeded|bw_out_allowance_exceeded|conntrack_allowance_exceeded|conntrack_allowance_available|linklocal_allowance_exceeded)' --collector.ethtool --no-collector.hwmon --no-collector.bcache --no-collector.btrfs --no-collector.fibrechannel --no-collector.infiniband --no-collector.ipvs --no-collector.nfs --no-collector.nfsd --no-collector.powersupplyclass --no-collector.rapl --no-collector.tapestats --no-collector.thermal_zone --no-collector.udp_queues --no-collector.zfs"
SCYLLA_NODE_EXPORTER_ARGS="--collector.interrupts --no-collector.hwmon --no-collector.bcache --no-collector.btrfs --no-collector.fibrechannel --no-collector.infiniband --no-collector.ipvs --no-collector.nfs --no-collector.nfsd --no-collector.powersupplyclass --no-collector.rapl --no-collector.tapestats --no-collector.thermal_zone --no-collector.udp_queues --no-collector.zfs"

View File

@@ -41,8 +41,6 @@ class MetricsProcessor:
# Get metrics from the file
try:
metrics_file = metrics.get_metrics_from_file(relative_path, "scylla_", metrics_info, strict=strict)
except SystemExit:
pass
finally:
os.chdir(old_cwd)
if metrics_file:

View File

@@ -45,6 +45,22 @@ immediately after it's finished.
A flag which determines if a task can be aborted through API.
# Task timing fields
Tasks have three timing fields that track different stages of their lifecycle:
- `creation_time` - When the task was created/queued. This is extracted from the task's
UUID (which is a timeuuid) and represents the moment the task request was submitted.
- `start_time` - When the task actually began executing. For tasks that are queued, this
will be unspecified (equal to epoch) until execution starts. For node operations
like decommission, this is set when the request is picked up for execution by the
topology coordinator.
- `end_time` - When the task completed (successfully or with an error). This is
unspecified (equal to epoch) until the task finishes.
The difference between `creation_time` and `start_time` represents the time a task
spent waiting in the queue before execution began.
# Type vs scope vs kind
`type` of a task describes what operation is covered by a task,

View File

@@ -28,8 +28,7 @@ Incremental Repair is only supported for tables that use the tablets architectur
Incremental Repair Modes
------------------------
Incremental is currently disabled by default. You can control its behavior for a given repair operation using the ``incremental_mode`` parameter.
This is useful for enabling incremental repair, or in situations where you might need to force a full data validation.
While incremental repair is the default and recommended mode, you can control its behavior for a given repair operation using the ``incremental_mode`` parameter. This is useful for situations where you might need to force a full data validation.
The available modes are:

View File

@@ -53,13 +53,13 @@ ScyllaDB nodetool cluster repair command supports the following options:
nodetool cluster repair --tablet-tokens 1,10474535988
- ``--incremental-mode`` specifies the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to 'disabled'.
- ``--incremental-mode`` specifies the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to incremental.
For example:
::
nodetool cluster repair --incremental-mode disabled
nodetool cluster repair --incremental-mode regular
- ``keyspace`` executes a repair on a specific keyspace. The default is all keyspaces.

View File

@@ -42,21 +42,21 @@ For single list:
.. code-block:: shell
task_id type kind scope state sequence_number keyspace table entity shard start_time end_time
5116ddb6-85b5-4c3e-94fb-72128f15d7b4 repair node keyspace done 3 abc 0 2025-01-16T16:12:11Z 2025-01-16T16:12:13Z
task_id type kind scope state sequence_number keyspace table entity shard creation_time start_time end_time
5116ddb6-85b5-4c3e-94fb-72128f15d7b4 repair node keyspace done 3 abc 0 2025-01-16T16:12:08Z 2025-01-16T16:12:11Z 2025-01-16T16:12:13Z
With repetition:
.. code-block:: shell
task_id type kind scope state sequence_number keyspace table entity shard start_time end_time
d8926ee7-0faf-47b7-bfeb-82477e0c7b33 repair node keyspace running 5 abc 0 2025-01-16T16:12:57Z
1e028cb8-31a3-45ed-8728-af7a1ab586f6 repair node keyspace done 4 abc 0 2025-01-16T16:12:45Z 2025-01-16T16:12:47Z
task_id type kind scope state sequence_number keyspace table entity shard creation_time start_time end_time
d8926ee7-0faf-47b7-bfeb-82477e0c7b33 repair node keyspace running 5 abc 0 2025-01-16T16:12:54Z 2025-01-16T16:12:57Z
1e028cb8-31a3-45ed-8728-af7a1ab586f6 repair node keyspace done 4 abc 0 2025-01-16T16:12:42Z 2025-01-16T16:12:45Z 2025-01-16T16:12:47Z
task_id type kind scope state sequence_number keyspace table entity shard start_time end_time
1e535f9b-97fa-4788-a956-8f3216a6ea8d repair node keyspace created 6 abc 0
d8926ee7-0faf-47b7-bfeb-82477e0c7b33 repair node keyspace running 5 abc 0 2025-01-16T16:12:57Z
1e028cb8-31a3-45ed-8728-af7a1ab586f6 repair node keyspace done 4 abc 0 2025-01-16T16:12:45Z 2025-01-16T16:12:47Z
task_id type kind scope state sequence_number keyspace table entity shard creation_time start_time end_time
1e535f9b-97fa-4788-a956-8f3216a6ea8d repair node keyspace created 6 abc 0 2025-01-16T16:13:02Z
d8926ee7-0faf-47b7-bfeb-82477e0c7b33 repair node keyspace running 5 abc 0 2025-01-16T16:12:54Z 2025-01-16T16:12:57Z
1e028cb8-31a3-45ed-8728-af7a1ab586f6 repair node keyspace done 4 abc 0 2025-01-16T16:12:42Z 2025-01-16T16:12:45Z 2025-01-16T16:12:47Z
See also
--------

View File

@@ -25,6 +25,7 @@ Example output
scope: keyspace
state: running
is_abortable: true
creation_time: 2024-07-29T15:48:50Z
start_time: 2024-07-29T15:48:55Z
end_time:
error:

View File

@@ -26,22 +26,22 @@ For single task:
.. code-block:: shell
id type kind scope state is_abortable start_time end_time error parent_id sequence_number shard keyspace table entity progress_units total completed children_ids
be5559ea-bc5a-428c-b8ce-d14eac7a1765 repair node keyspace done true 2024-07-29T16:06:46Z 2024-07-29T16:06:46Z none 1 0 abc ranges 4 4 [{task_id: 542e38cb-9ad4-40aa-9010-de2630004e55, node: 127.0.0.1 }, {task_id: 8974ebcc-1e87-4040-88fe-f2438261f7fb, node: 127.0.0.1 }]
542e38cb-9ad4-40aa-9010-de2630004e55 repair node shard done false 2024-07-29T16:06:46Z 2024-07-29T16:06:46Z be5559ea-bc5a-428c-b8ce-d14eac7a1765 1 0 abc ranges 2 2 []
8974ebcc-1e87-4040-88fe-f2438261f7fb repair node shard done false 2024-07-29T16:06:46Z 2024-07-29T16:06:46Z be5559ea-bc5a-428c-b8ce-d14eac7a1765 1 1 abc ranges 2 2 []
id type kind scope state is_abortable creation_time start_time end_time error parent_id sequence_number shard keyspace table entity progress_units total completed children_ids
be5559ea-bc5a-428c-b8ce-d14eac7a1765 repair node keyspace done true 2024-07-29T16:06:43Z 2024-07-29T16:06:46Z 2024-07-29T16:06:46Z none 1 0 abc ranges 4 4 [{task_id: 542e38cb-9ad4-40aa-9010-de2630004e55, node: 127.0.0.1 }, {task_id: 8974ebcc-1e87-4040-88fe-f2438261f7fb, node: 127.0.0.1 }]
542e38cb-9ad4-40aa-9010-de2630004e55 repair node shard done false 2024-07-29T16:06:43Z 2024-07-29T16:06:46Z 2024-07-29T16:06:46Z be5559ea-bc5a-428c-b8ce-d14eac7a1765 1 0 abc ranges 2 2 []
8974ebcc-1e87-4040-88fe-f2438261f7fb repair node shard done false 2024-07-29T16:06:43Z 2024-07-29T16:06:46Z 2024-07-29T16:06:46Z be5559ea-bc5a-428c-b8ce-d14eac7a1765 1 1 abc ranges 2 2 []
For all tasks:
.. code-block:: shell
id type kind scope state is_abortable start_time end_time error parent_id sequence_number shard keyspace table entity progress_units total completed children_ids
16eafb1e-8b2e-48e6-bd7a-432ca3d8b9fc repair node keyspace done true 2024-07-29T16:34:46Z 2024-07-29T16:34:46Z none 1 0 abc ranges 4 4 [{task_id: e0aa1aa4-58ca-4bfb-b3e6-74e5f3a0f6ee, node: 127.0.0.1 }, {task_id: 49eb5797-b67e-46b0-9365-4460f7cf988a, node: 127.0.0.1 }]
e0aa1aa4-58ca-4bfb-b3e6-74e5f3a0f6ee repair node shard done false 2024-07-29T16:34:46Z 2024-07-29T16:34:46Z 16eafb1e-8b2e-48e6-bd7a-432ca3d8b9fc 1 0 abc ranges 2 2 []
49eb5797-b67e-46b0-9365-4460f7cf988a repair node shard done false 2024-07-29T16:34:46Z 2024-07-29T16:34:46Z 16eafb1e-8b2e-48e6-bd7a-432ca3d8b9fc 1 1 abc ranges 2 2 []
82d7b2a4-146e-4a72-ba93-c66d5b4e9867 offstrategy compaction node keyspace done true 2024-07-29T16:34:16Z 2024-07-29T16:34:16Z none 954 0 abc 1 1 [{task_id: 9818277b-238d-4298-a56b-c0d2153bf140, node: 127.0.0.1 }, {task_id: c1eb0701-ad7a-45ff-956f-7b8d671fc5db, node: 127.0.0.1 }
9818277b-238d-4298-a56b-c0d2153bf140 offstrategy compaction node shard done false 2024-07-29T16:34:16Z 2024-07-29T16:34:16Z 82d7b2a4-146e-4a72-ba93-c66d5b4e9867 954 0 abc 1 1 []
c1eb0701-ad7a-45ff-956f-7b8d671fc5db offstrategy compaction node shard done false 2024-07-29T16:34:16Z 2024-07-29T16:34:16Z 82d7b2a4-146e-4a72-ba93-c66d5b4e9867 954 1 abc 1 1 []
id type kind scope state is_abortable creation_time start_time end_time error parent_id sequence_number shard keyspace table entity progress_units total completed children_ids
16eafb1e-8b2e-48e6-bd7a-432ca3d8b9fc repair node keyspace done true 2024-07-29T16:34:43Z 2024-07-29T16:34:46Z 2024-07-29T16:34:46Z none 1 0 abc ranges 4 4 [{task_id: e0aa1aa4-58ca-4bfb-b3e6-74e5f3a0f6ee, node: 127.0.0.1 }, {task_id: 49eb5797-b67e-46b0-9365-4460f7cf988a, node: 127.0.0.1 }]
e0aa1aa4-58ca-4bfb-b3e6-74e5f3a0f6ee repair node shard done false 2024-07-29T16:34:43Z 2024-07-29T16:34:46Z 2024-07-29T16:34:46Z 16eafb1e-8b2e-48e6-bd7a-432ca3d8b9fc 1 0 abc ranges 2 2 []
49eb5797-b67e-46b0-9365-4460f7cf988a repair node shard done false 2024-07-29T16:34:43Z 2024-07-29T16:34:46Z 2024-07-29T16:34:46Z 16eafb1e-8b2e-48e6-bd7a-432ca3d8b9fc 1 1 abc ranges 2 2 []
82d7b2a4-146e-4a72-ba93-c66d5b4e9867 offstrategy compaction node keyspace done true 2024-07-29T16:34:13Z 2024-07-29T16:34:16Z 2024-07-29T16:34:16Z none 954 0 abc 1 1 [{task_id: 9818277b-238d-4298-a56b-c0d2153bf140, node: 127.0.0.1 }, {task_id: c1eb0701-ad7a-45ff-956f-7b8d671fc5db, node: 127.0.0.1 }
9818277b-238d-4298-a56b-c0d2153bf140 offstrategy compaction node shard done false 2024-07-29T16:34:13Z 2024-07-29T16:34:16Z 2024-07-29T16:34:16Z 82d7b2a4-146e-4a72-ba93-c66d5b4e9867 954 0 abc 1 1 []
c1eb0701-ad7a-45ff-956f-7b8d671fc5db offstrategy compaction node shard done false 2024-07-29T16:34:13Z 2024-07-29T16:34:16Z 2024-07-29T16:34:16Z 82d7b2a4-146e-4a72-ba93-c66d5b4e9867 954 1 abc 1 1 []
See also
--------

8
docs/poetry.lock generated
View File

@@ -1018,14 +1018,14 @@ sphinx-markdown-tables = "0.0.17"
[[package]]
name = "sphinx-scylladb-theme"
version = "1.8.10"
version = "1.8.9"
description = "A Sphinx Theme for ScyllaDB documentation projects"
optional = false
python-versions = "<4.0,>=3.10"
groups = ["main"]
files = [
{file = "sphinx_scylladb_theme-1.8.10-py3-none-any.whl", hash = "sha256:8b930f33bec7308ccaa92698ebb5ad85059bcbf93a463f92917aeaf473fce632"},
{file = "sphinx_scylladb_theme-1.8.10.tar.gz", hash = "sha256:8a78a9b692d9a946be2c4a64aa472fd82204cc8ea0b1ee7f60de6db35b356326"},
{file = "sphinx_scylladb_theme-1.8.9-py3-none-any.whl", hash = "sha256:f8649a7753a29494fd2b417d1cb855035dddb9ebd498ea033fd73f5f9338271e"},
{file = "sphinx_scylladb_theme-1.8.9.tar.gz", hash = "sha256:ab7cda4c10a0d067c5c3a45f7b1f68cb8ebefe135a0be0738bfa282a344769b6"},
]
[package.dependencies]
@@ -1603,4 +1603,4 @@ files = [
[metadata]
lock-version = "2.1"
python-versions = "^3.10"
content-hash = "0ae673106f45d3465cbdabbf511e165ca44feadd34d7753f2e68093afaa95c79"
content-hash = "74912627a3f424290ed7889451c0bdb1a862ab85b1d07c85f4f3b8c34f32a020"

View File

@@ -9,7 +9,7 @@ package-mode = false
python = "^3.10"
pygments = "^2.18.0"
redirects_cli ="^0.1.3"
sphinx-scylladb-theme = "^1.8.10"
sphinx-scylladb-theme = "^1.8.9"
sphinx-sitemap = "^2.6.0"
sphinx-autobuild = "^2024.4.19"
Sphinx = "^7.3.7"

View File

@@ -38,7 +38,6 @@ debian_base_packages=(
python3-aiohttp
python3-pyparsing
python3-colorama
python3-dev
python3-tabulate
python3-pytest
python3-pytest-asyncio
@@ -66,7 +65,6 @@ debian_base_packages=(
git-lfs
e2fsprogs
fuse3
libev-dev # for python driver
)
fedora_packages=(
@@ -92,7 +90,6 @@ fedora_packages=(
patchelf
python3
python3-aiohttp
python3-devel
python3-pip
python3-file-magic
python3-colorama
@@ -157,8 +154,6 @@ fedora_packages=(
https://github.com/scylladb/cassandra-stress/releases/download/v3.18.1/cassandra-stress-java21-3.18.1-1.noarch.rpm
elfutils
jq
libev-devel # for python driver
)
fedora_python3_packages=(

View File

@@ -15,22 +15,3 @@ with the Apache License (version 2) and ScyllaDB-Source-Available-1.0.
They contain the following tag:
SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
### `musl libc` files
`licenses/musl-license.txt` is obtained from:
https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
`utils/crypt_sha512.cc` is obtained from:
https://git.musl-libc.org/cgit/musl/tree/src/crypt/crypt_sha512.c
Both files are obtained from git.musl-libc.org.
Import commit:
commit 1b76ff0767d01df72f692806ee5adee13c67ef88
Author: Alex Rønne Petersen <alex@alexrp.com>
Date: Sun Oct 12 05:35:19 2025 +0200
s390x: shuffle register usage in __tls_get_offset to avoid r0 as address
musl as a whole is licensed under the standard MIT license included in
`licenses/musl-license.txt`.

View File

@@ -1,193 +0,0 @@
musl as a whole is licensed under the following standard MIT license:
----------------------------------------------------------------------
Copyright © 2005-2020 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------
Authors/contributors include:
A. Wilcox
Ada Worcester
Alex Dowad
Alex Suykov
Alexander Monakov
Andre McCurdy
Andrew Kelley
Anthony G. Basile
Aric Belsito
Arvid Picciani
Bartosz Brachaczek
Benjamin Peterson
Bobby Bingham
Boris Brezillon
Brent Cook
Chris Spiegel
Clément Vasseur
Daniel Micay
Daniel Sabogal
Daurnimator
David Carlier
David Edelsohn
Denys Vlasenko
Dmitry Ivanov
Dmitry V. Levin
Drew DeVault
Emil Renner Berthing
Fangrui Song
Felix Fietkau
Felix Janda
Gianluca Anzolin
Hauke Mehrtens
He X
Hiltjo Posthuma
Isaac Dunham
Jaydeep Patil
Jens Gustedt
Jeremy Huntwork
Jo-Philipp Wich
Joakim Sindholt
John Spencer
Julien Ramseier
Justin Cormack
Kaarle Ritvanen
Khem Raj
Kylie McClain
Leah Neukirchen
Luca Barbato
Luka Perkov
Lynn Ochs
M Farkas-Dyck (Strake)
Mahesh Bodapati
Markus Wichmann
Masanori Ogino
Michael Clark
Michael Forney
Mikhail Kremnyov
Natanael Copa
Nicholas J. Kain
orc
Pascal Cuoq
Patrick Oppenlander
Petr Hosek
Petr Skocik
Pierre Carrier
Reini Urban
Rich Felker
Richard Pennington
Ryan Fairfax
Samuel Holland
Segev Finer
Shiz
sin
Solar Designer
Stefan Kristiansson
Stefan O'Rear
Szabolcs Nagy
Timo Teräs
Trutz Behn
Will Dietz
William Haddon
William Pitcock
Portions of this software are derived from third-party works licensed
under terms compatible with the above MIT license:
The TRE regular expression implementation (src/regex/reg* and
src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed
under a 2-clause BSD license (license text in the source files). The
included version has been heavily modified by Rich Felker in 2012, in
the interests of size, simplicity, and namespace cleanliness.
Much of the math library code (src/math/* and src/complex/*) is
Copyright © 1993,2004 Sun Microsystems or
Copyright © 2003-2011 David Schultz or
Copyright © 2003-2009 Steven G. Kargl or
Copyright © 2003-2009 Bruce D. Evans or
Copyright © 2008 Stephen L. Moshier or
Copyright © 2017-2018 Arm Limited
and labelled as such in comments in the individual source files. All
have been licensed under extremely permissive terms.
The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
The Android Open Source Project and is licensed under a two-clause BSD
license. It was taken from Bionic libc, used on Android.
The AArch64 memcpy and memset code (src/string/aarch64/*) are
Copyright © 1999-2019, Arm Limited.
The implementation of DES for crypt (src/crypt/crypt_des.c) is
Copyright © 1994 David Burren. It is licensed under a BSD license.
The implementation of blowfish crypt (src/crypt/crypt_blowfish.c) was
originally written by Solar Designer and placed into the public
domain. The code also comes with a fallback permissive license for use
in jurisdictions that may not recognize the public domain.
The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011
Lynn Ochs and is licensed under an MIT-style license.
The x86_64 port was written by Nicholas J. Kain and is licensed under
the standard MIT terms.
The mips and microblaze ports were originally written by Richard
Pennington for use in the ellcc project. The original code was adapted
by Rich Felker for build system and code conventions during upstream
integration. It is licensed under the standard MIT terms.
The mips64 port was contributed by Imagination Technologies and is
licensed under the standard MIT terms.
The powerpc port was also originally written by Richard Pennington,
and later supplemented and integrated by John Spencer. It is licensed
under the standard MIT terms.
All other files which have no copyright comments are original works
produced specifically for use as part of this library, written either
by Rich Felker, the main author of the library, or by one or more
contibutors listed above. Details on authorship of individual files
can be found in the git version control history of the project. The
omission of copyright and license comments in each file is in the
interest of source tree size.
In addition, permission is hereby granted for all public header files
(include/* and arch/*/bits/*) and crt files intended to be linked into
applications (crt/*, ldso/dlstart.c, and arch/*/crt_arch.h) to omit
the copyright notice and permission notice otherwise required by the
license, and to use these files without any requirement of
attribution. These files include substantial contributions from:
Bobby Bingham
John Spencer
Nicholas J. Kain
Rich Felker
Richard Pennington
Stefan Kristiansson
Szabolcs Nagy
all of whom have explicitly granted such permission.
This file previously contained text expressing a belief that most of
the files covered by the above exception were sufficiently trivial not
to be subject to copyright, resulting in confusion over whether it
negated the permissions granted in the license. In the spirit of
permissive licensing, and of not having licensing issues being an
obstacle to adoption, that text has been removed.

View File

@@ -200,10 +200,7 @@ enum class tablet_repair_incremental_mode : uint8_t {
disabled,
};
// FIXME: Incremental repair is disabled by default due to
// https://github.com/scylladb/scylladb/issues/26041 and
// https://github.com/scylladb/scylladb/issues/27414
constexpr tablet_repair_incremental_mode default_tablet_repair_incremental_mode{tablet_repair_incremental_mode::disabled};
constexpr tablet_repair_incremental_mode default_tablet_repair_incremental_mode{tablet_repair_incremental_mode::incremental};
sstring tablet_repair_incremental_mode_to_string(tablet_repair_incremental_mode);
tablet_repair_incremental_mode tablet_repair_incremental_mode_from_string(const sstring&);

View File

@@ -748,6 +748,8 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
// inherit Seastar's CPU affinity masks. We want this thread to be free
// to migrate between CPUs; we think that's what makes the most sense.
auto rpc_dict_training_worker = utils::alien_worker(startlog, 19, "rpc-dict");
// niceness=10 is ~10% of normal process time
auto hashing_worker = utils::alien_worker(startlog, 10, "pwd-hash");
return app.run(ac, av, [&] () -> future<int> {
@@ -777,7 +779,8 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
return seastar::async([&app, cfg, ext, &disk_space_monitor_shard0, &cm, &sstm, &db, &qp, &bm, &proxy, &mapreduce_service, &mm, &mm_notifier, &ctx, &opts, &dirs,
&prometheus_server, &cf_cache_hitrate_calculator, &load_meter, &feature_service, &gossiper, &snitch,
&token_metadata, &erm_factory, &snapshot_ctl, &messaging, &sst_dir_semaphore, &raft_gr, &service_memory_limiter,
&repair, &sst_loader, &auth_cache, &ss, &lifecycle_notifier, &stream_manager, &task_manager, &rpc_dict_training_worker, &vector_store_client] {
&repair, &sst_loader, &auth_cache, &ss, &lifecycle_notifier, &stream_manager, &task_manager, &rpc_dict_training_worker,
&hashing_worker, &vector_store_client] {
try {
if (opts.contains("relabel-config-file") && !opts["relabel-config-file"].as<sstring>().empty()) {
// calling update_relabel_config_from_file can cause an exception that would stop startup
@@ -2057,7 +2060,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
maintenance_auth_config.authenticator_java_name = sstring{auth::allow_all_authenticator_name};
maintenance_auth_config.role_manager_java_name = sstring{auth::maintenance_socket_role_manager_name};
maintenance_auth_service.start(perm_cache_config, std::ref(qp), std::ref(group0_client), std::ref(mm_notifier), std::ref(mm), maintenance_auth_config, maintenance_socket_enabled::yes, std::ref(auth_cache)).get();
maintenance_auth_service.start(perm_cache_config, std::ref(qp), std::ref(group0_client), std::ref(mm_notifier), std::ref(mm), maintenance_auth_config, maintenance_socket_enabled::yes, std::ref(auth_cache), std::ref(hashing_worker)).get();
cql_maintenance_server_ctl.emplace(maintenance_auth_service, mm_notifier, gossiper, qp, service_memory_limiter, sl_controller, lifecycle_notifier, *cfg, maintenance_cql_sg_stats_key, maintenance_socket_enabled::yes, dbcfg.statement_scheduling_group);
@@ -2333,7 +2336,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
auth_config.authenticator_java_name = qualified_authenticator_name;
auth_config.role_manager_java_name = qualified_role_manager_name;
auth_service.start(std::move(perm_cache_config), std::ref(qp), std::ref(group0_client), std::ref(mm_notifier), std::ref(mm), auth_config, maintenance_socket_enabled::no, std::ref(auth_cache)).get();
auth_service.start(std::move(perm_cache_config), std::ref(qp), std::ref(group0_client), std::ref(mm_notifier), std::ref(mm), auth_config, maintenance_socket_enabled::no, std::ref(auth_cache), std::ref(hashing_worker)).get();
std::any stop_auth_service;
// Has to be called after node joined the cluster (join_cluster())

View File

@@ -14,6 +14,7 @@
#include "tasks/task_handler.hh"
#include "tasks/virtual_task_hint.hh"
#include "utils/error_injection.hh"
#include "utils/UUID_gen.hh"
#include <variant>
#include "utils/overloaded_functor.hh"
@@ -90,6 +91,7 @@ future<std::optional<tasks::task_status>> node_ops_virtual_task::get_status_help
.scope = "cluster",
.state = get_state(entry),
.is_abortable = co_await is_abortable(std::move(hint)),
.creation_time = db_clock::time_point(utils::UUID_gen::unix_timestamp(id.uuid())),
.start_time = entry.start_time,
.end_time = entry.end_time,
.error = entry.error,
@@ -167,6 +169,7 @@ future<std::vector<tasks::task_stats>> node_ops_virtual_task::get_stats() {
.table = "",
.entity = "",
.shard = 0,
.creation_time = db_clock::time_point(utils::UUID_gen::unix_timestamp(id)),
.start_time = entry.start_time,
.end_time = entry.end_time
};

View File

@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3cbe2dd05945f8fb76ebce2ea70864063d2b282c4d5080af1f290ead43321ab3
size 6444732
oid sha256:80a47fe93866989aaf7e949168fcd308e95841e78c976a61f9eac20bfdd34d96
size 6448960

View File

@@ -297,17 +297,17 @@ public:
const dht::token_range& token_range() const noexcept;
size_t memtable_count() const;
size_t memtable_count() const noexcept;
const compaction_group_ptr& main_compaction_group() const noexcept;
const std::vector<compaction_group_ptr>& split_ready_compaction_groups() const;
compaction_group_ptr& select_compaction_group(locator::tablet_range_side) noexcept;
uint64_t live_disk_space_used() const;
uint64_t live_disk_space_used() const noexcept;
void for_each_compaction_group(std::function<void(const compaction_group_ptr&)> action) const;
utils::small_vector<compaction_group_ptr, 3> compaction_groups();
utils::small_vector<const_compaction_group_ptr, 3> compaction_groups() const;
void for_each_compaction_group(std::function<void(const compaction_group_ptr&)> action) const noexcept;
utils::small_vector<compaction_group_ptr, 3> compaction_groups() noexcept;
utils::small_vector<const_compaction_group_ptr, 3> compaction_groups() const noexcept;
utils::small_vector<compaction_group_ptr, 3> split_unready_groups() const;
bool split_unready_groups_are_empty() const;
@@ -430,7 +430,7 @@ public:
virtual storage_group& storage_group_for_token(dht::token) const = 0;
virtual utils::chunked_vector<storage_group_ptr> storage_groups_for_token_range(dht::token_range tr) const = 0;
virtual locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const = 0;
virtual locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const noexcept = 0;
virtual bool all_storage_groups_split() = 0;
virtual future<> split_all_storage_groups(tasks::task_info tablet_split_task_info) = 0;
virtual future<> maybe_split_compaction_group_of(size_t idx) = 0;

View File

@@ -1133,7 +1133,7 @@ public:
// The tablet filter is used to not double account migrating tablets, so it's important that
// only one of pending or leaving replica is accounted based on current migration stage.
locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const;
locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const noexcept;
const db::view::stats& get_view_stats() const {
return _view_stats;

View File

@@ -234,12 +234,18 @@ distributed_loader::get_sstables_from_upload_dir(sharded<replica::database>& db,
}
future<std::tuple<table_id, std::vector<std::vector<sstables::shared_sstable>>>>
distributed_loader::get_sstables_from_object_store(sharded<replica::database>& db, sstring ks, sstring cf, std::vector<sstring> sstables, sstring endpoint, sstring type, sstring bucket, sstring prefix, sstables::sstable_open_config cfg, std::function<seastar::abort_source*()> get_abort_src) {
return get_sstables_from(db, ks, cf, cfg, [bucket, endpoint, type, prefix, sstables=std::move(sstables), &get_abort_src] (auto& global_table, auto& directory) {
distributed_loader::get_sstables_from_object_store(sharded<replica::database>& db, sstring ks, sstring cf, std::vector<sstring> sstables, sstring endpoint, sstring bucket, sstring prefix, sstables::sstable_open_config cfg, std::function<seastar::abort_source*()> get_abort_src) {
return get_sstables_from(db, ks, cf, cfg, [bucket, endpoint, prefix, sstables=std::move(sstables), &get_abort_src, &db] (auto& global_table, auto& directory) {
return directory.start(global_table.as_sharded_parameter(),
sharded_parameter([bucket, endpoint, type, prefix, &get_abort_src] {
sharded_parameter([bucket, endpoint, prefix, &get_abort_src, &db] {
auto eps = db.local().get_config().object_storage_endpoints()
| std::views::filter([&endpoint](auto& ep) { return ep.key() == endpoint; })
;
if (eps.empty()) {
throw std::invalid_argument(fmt::format("Undefined endpoint {}", endpoint));
}
seastar::abort_source* as = get_abort_src ? get_abort_src() : nullptr;
auto opts = data_dictionary::make_object_storage_options(endpoint, type, bucket, prefix, as);
auto opts = data_dictionary::make_object_storage_options(endpoint, eps.front().type(), bucket, prefix, as);
return make_lw_shared<const data_dictionary::storage_options>(std::move(opts));
}),
sstables,

View File

@@ -92,7 +92,7 @@ public:
static future<std::tuple<table_id, std::vector<std::vector<sstables::shared_sstable>>>>
get_sstables_from_upload_dir(sharded<replica::database>& db, sstring ks, sstring cf, sstables::sstable_open_config cfg);
static future<std::tuple<table_id, std::vector<std::vector<sstables::shared_sstable>>>>
get_sstables_from_object_store(sharded<replica::database>& db, sstring ks, sstring cf, std::vector<sstring> sstables, sstring endpoint, sstring type, sstring bucket, sstring prefix, sstables::sstable_open_config cfg, std::function<seastar::abort_source*()> = {});
get_sstables_from_object_store(sharded<replica::database>& db, sstring ks, sstring cf, std::vector<sstring> sstables, sstring endpoint, sstring bucket, sstring prefix, sstables::sstable_open_config cfg, std::function<seastar::abort_source*()> = {});
static future<> process_upload_dir(sharded<replica::database>& db, sharded<db::view::view_builder>& vb, sharded<db::view::view_building_worker>& vbw, sstring ks_name, sstring cf_name, bool skip_cleanup, bool skip_reshape);
};

View File

@@ -708,7 +708,7 @@ public:
return *_single_sg;
}
locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)>) const override {
locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)>) const noexcept override {
return locator::combined_load_stats{
.table_ls = locator::table_load_stats{
.size_in_bytes = _single_sg->live_disk_space_used(),
@@ -874,7 +874,7 @@ public:
return storage_group_for_id(storage_group_of(token).first);
}
locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const override;
locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const noexcept override;
bool all_storage_groups_split() override;
future<> split_all_storage_groups(tasks::task_info tablet_split_task_info) override;
future<> maybe_split_compaction_group_of(size_t idx) override;
@@ -922,7 +922,7 @@ compaction_group_ptr& storage_group::select_compaction_group(locator::tablet_ran
return _main_cg;
}
void storage_group::for_each_compaction_group(std::function<void(const compaction_group_ptr&)> action) const {
void storage_group::for_each_compaction_group(std::function<void(const compaction_group_ptr&)> action) const noexcept {
action(_main_cg);
for (auto& cg : _merging_groups) {
action(cg);
@@ -932,7 +932,7 @@ void storage_group::for_each_compaction_group(std::function<void(const compactio
}
}
utils::small_vector<compaction_group_ptr, 3> storage_group::compaction_groups() {
utils::small_vector<compaction_group_ptr, 3> storage_group::compaction_groups() noexcept {
utils::small_vector<compaction_group_ptr, 3> cgs;
for_each_compaction_group([&cgs] (const compaction_group_ptr& cg) {
cgs.push_back(cg);
@@ -940,7 +940,7 @@ utils::small_vector<compaction_group_ptr, 3> storage_group::compaction_groups()
return cgs;
}
utils::small_vector<const_compaction_group_ptr, 3> storage_group::compaction_groups() const {
utils::small_vector<const_compaction_group_ptr, 3> storage_group::compaction_groups() const noexcept {
utils::small_vector<const_compaction_group_ptr, 3> cgs;
for_each_compaction_group([&cgs] (const compaction_group_ptr& cg) {
cgs.push_back(cg);
@@ -1890,7 +1890,7 @@ sstables::file_size_stats compaction_group::live_disk_space_used_full_stats() co
return _main_sstables->get_file_size_stats() + _maintenance_sstables->get_file_size_stats();
}
uint64_t storage_group::live_disk_space_used() const {
uint64_t storage_group::live_disk_space_used() const noexcept {
auto cgs = const_cast<storage_group&>(*this).compaction_groups();
return std::ranges::fold_left(cgs | std::views::transform(std::mem_fn(&compaction_group::live_disk_space_used)), uint64_t(0), std::plus{});
}
@@ -2813,7 +2813,7 @@ void table::on_flush_timer() {
});
}
locator::combined_load_stats tablet_storage_group_manager::table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const {
locator::combined_load_stats tablet_storage_group_manager::table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const noexcept {
locator::table_load_stats table_stats;
table_stats.split_ready_seq_number = _split_ready_seq_number;
@@ -2836,7 +2836,7 @@ locator::combined_load_stats tablet_storage_group_manager::table_load_stats(std:
};
}
locator::combined_load_stats table::table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const {
locator::combined_load_stats table::table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const noexcept {
return _sg_manager->table_load_stats(std::move(tablet_filter));
}
@@ -3453,7 +3453,7 @@ size_t compaction_group::memtable_count() const noexcept {
return _memtables->size();
}
size_t storage_group::memtable_count() const {
size_t storage_group::memtable_count() const noexcept {
return std::ranges::fold_left(compaction_groups() | std::views::transform(std::mem_fn(&compaction_group::memtable_count)), size_t(0), std::plus{});
}

View File

@@ -22,7 +22,7 @@ format_match = re.compile(r'\s*(?:seastar::)?format\(\s*"([^"]+)"\s*,\s*(.*)\s*'
def handle_error(message, strict=True, verbose_mode=False):
if strict:
print(f"[ERROR] {message}")
exit(1)
exit(-1)
elif verbose_mode:
print(f"[WARNING] {message}")
@@ -180,11 +180,12 @@ def get_metrics_from_file(file_name, prefix, metrics_information, verb=None, str
groups = {}
if clean_name in metrics_information:
if (isinstance(metrics_information[clean_name], str) and metrics_information[clean_name] == "skip") or "skip" in metrics_information[clean_name]:
return {}
exit(0)
param_mapping = metrics_information[clean_name]["params"] if clean_name in metrics_information and "params" in metrics_information[clean_name] else {}
groups = metrics_information[clean_name]["groups"] if clean_name in metrics_information and "groups" in metrics_information[clean_name] else {}
metrics = {}
multi_line = False
names = undefined
typ = undefined
line_number = 0;

View File

@@ -1,6 +1,6 @@
"cdc/log.cc":
params:
cdc_group_name: "cdc"
cdc_group_name: cdc
part_name;suffix: [["static_row", "total"],["clustering_row", "total"], ["map", "total"], ["set", "total"], ["list", "total"], ["udt", "total"], ["range_tombstone", "total"],["partition_delete", "total"],["row_delete", "total"], ["static_row", "failed"],["clustering_row", "failed"], ["map", "failed"], ["set", "failed"], ["list", "failed"], ["udt", "failed"], ["range_tombstone", "failed"],["partition_delete", "failed"],["row_delete", "failed"]]
kind: ["total", "failed"]
"db/commitlog/commitlog.cc":
@@ -9,7 +9,7 @@
"cfg.max_active_flushes": "cfg.max_active_flushes"
"cql3/query_processor.cc":
groups:
"80": "query_processor"
"80": query_processor
"replica/dirty_memory_manager.cc":
params:
namestr: ["regular", "system"]
@@ -19,11 +19,10 @@
"replica/database.cc":
params:
"_dirty_memory_manager.throttle_threshold()": "throttle threshold"
"seastar/apps/metrics_tester/metrics_tester.cc": "skip"
"seastar/tests/unit/metrics_test.cc": "skip"
"seastar/tests/unit/metrics_tester.cc": "skip"
"seastar/tests/unit/prometheus_http_test.cc": "skip"
"seastar/tests/unit/prometheus_text_test.cc": "skip"
"seastar/apps/metrics_tester/metrics_tester.cc": skip
"seastar/tests/unit/metrics_test.cc": skip
"seastar/tests/unit/metrics_tester.cc": skip
"seastar/tests/unit/prometheus_http_test.cc": skip
"service/storage_proxy.cc":
params:
COORDINATOR_STATS_CATEGORY: "storage_proxy_coordinator"
@@ -33,25 +32,25 @@
_short_description_prefix: ["total_write_attempts", "write_errors", "background_replica_writes_failed", "read_repair_write_attempts"]
_long_description_prefix: ["total number of write requests", "number of write requests that failed", "background_replica_writes_failed", "number of write operations in a read repair context"]
_category: "storage_proxy_coordinator"
"thrift/server.cc": "skip"
"thrift/server.cc": skip
"tracing/tracing.cc":
params:
"max_pending_trace_records + write_event_records_threshold": "max_pending_trace_records + write_event_records_threshold"
"transport/server.cc":
groups:
"200": "transport"
"200": transport
params:
"_config.max_request_size": "max_request_size"
"seastar/src/net/dpdk.cc": "skip"
"seastar/src/net/dpdk.cc": skip
"db/hints/manager.cc":
params:
"group_name": ["hints_for_views_manager", "hints_manager"]
"seastar/src/core/execution_stage.cc":
groups:
"100": "execution_stages"
"100": execution_stages
"seastar/src/core/fair_queue.cc":
groups:
"300": "io_queue"
"300": io_queue
"seastar/src/net/net.cc":
params:
_stats_plugin_name: ["stats_plugin_name"]

View File

@@ -38,9 +38,8 @@ for required in jq curl; do
fi
done
FORCE=0
ALLOW_SUBMODULE=0
ALLOW_UNSTABLE=0
ALLOW_ANY_BRANCH=0
function print_usage {
cat << EOF
@@ -61,18 +60,12 @@ Options:
-h
Print this help message and exit.
--allow-submodule
Allow a PR to update a submudule
--allow-unstable
--force
Do not check current branch to be next*
Do not check jenkins job status
--allow-any-branch
Merge PR even if target branch is not next
--force
Sets all above --allow-* options
--allow-submodule
Allow a PR to update a submudule
EOF
}
@@ -80,23 +73,13 @@ while [[ $# -gt 0 ]]
do
case $1 in
"--force"|"-f")
ALLOW_UNSTABLE=1
ALLOW_SUBMODULE=1
ALLOW_ANY_BRANCH=1
FORCE=1
shift 1
;;
--allow-submodule)
ALLOW_SUBMODULE=1
shift
;;
--allow-unstable)
ALLOW_UNSTABLE=1
shift
;;
--allow-any-branch)
ALLOW_ANY_BRANCH=1
shift
;;
+([0-9]))
PR_NUM=$1
shift 1
@@ -164,7 +147,7 @@ check_jenkins_job_status() {
fi
}
if [[ $ALLOW_UNSTABLE -eq 0 ]]; then
if [[ $FORCE -eq 0 ]]; then
check_jenkins_job_status
fi
@@ -196,19 +179,17 @@ echo -n "Fetching full name of author $PR_LOGIN... "
USER_NAME=$(curl -s "https://api.github.com/users/$PR_LOGIN" | jq -r .name)
echo "$USER_NAME"
if [[ $ALLOW_ANY_BRANCH -eq 0 ]]; then
BASE_BRANCH=$(jq -r .base.ref <<< $PR_DATA)
CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
TARGET_BASE="unknown"
if [[ ${BASE_BRANCH} == master ]]; then
TARGET_BASE="next"
elif [[ ${BASE_BRANCH} == branch-* ]]; then
TARGET_BASE=${BASE_BRANCH//branch/next}
fi
if [[ "${CURRENT_BRANCH}" != "${TARGET_BASE}" ]]; then
echo "Merging into wrong next, want ${TARGET_BASE}, have ${CURRENT_BRANCH}. Use --allow-any-branch or --force to skip this check"
exit 1
fi
BASE_BRANCH=$(jq -r .base.ref <<< $PR_DATA)
CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
TARGET_BASE="unknown"
if [[ ${BASE_BRANCH} == master ]]; then
TARGET_BASE="next"
elif [[ ${BASE_BRANCH} == branch-* ]]; then
TARGET_BASE=${BASE_BRANCH//branch/next}
fi
if [[ "${CURRENT_BRANCH}" != "${TARGET_BASE}" ]]; then
echo "Merging into wrong next, want ${TARGET_BASE}, have ${CURRENT_BRANCH}"
exit 1
fi
git fetch "$REMOTE" pull/$PR_NUM/head

View File

@@ -1138,8 +1138,7 @@ private:
topology_mutation_builder builder(guard.write_timestamp());
topology_request_tracking_mutation_builder trbuilder(global_request_id, _sp._features.topology_requests_type_column);
trbuilder.set_truncate_table_data(table_id)
.set("done", false)
.set("start_time", db_clock::now());
.set("done", false);
if (!_sp._features.topology_global_request_queue) {
builder.set_global_topology_request(global_topology_request::truncate_table)

View File

@@ -4940,7 +4940,6 @@ future<> storage_service::do_clusterwide_vnodes_cleanup() {
builder.queue_global_topology_request_id(request_id);
topology_request_tracking_mutation_builder rtbuilder(request_id, _feature_service.topology_requests_type_column);
rtbuilder.set("done", false)
.set("start_time", db_clock::now())
.set("request_type", global_topology_request::cleanup);
muts.push_back(rtbuilder.build());
} else {
@@ -5265,7 +5264,6 @@ future<> storage_service::raft_check_and_repair_cdc_streams() {
topology_request_tracking_mutation_builder rtbuilder(request_id, _feature_service.topology_requests_type_column);
builder.queue_global_topology_request_id(request_id);
rtbuilder.set("done", false)
.set("start_time", db_clock::now())
.set("request_type", global_topology_request::new_cdc_generation);
muts.push_back(rtbuilder.build());
} else {

View File

@@ -13,6 +13,7 @@
#include "service/task_manager_module.hh"
#include "tasks/task_handler.hh"
#include "tasks/virtual_task_hint.hh"
#include "utils/UUID_gen.hh"
#include <seastar/coroutine/maybe_yield.hh>
namespace service {
@@ -57,9 +58,14 @@ static std::optional<tasks::task_stats> maybe_make_task_stats(const locator::tab
.kind = tasks::task_kind::cluster,
.scope = get_scope(task_info.request_type),
.state = tasks::task_manager::task_state::running,
.sequence_number = 0,
.keyspace = schema->ks_name(),
.table = schema->cf_name(),
.start_time = task_info.request_time
.entity = "",
.shard = 0,
.creation_time = task_info.request_time,
.start_time = task_info.sched_time,
.end_time = db_clock::time_point{}
};
}
@@ -225,7 +231,8 @@ static void update_status(const locator::tablet_task_info& task_info, tasks::tas
sched_nr += task_info.sched_nr;
status.type = locator::tablet_task_type_to_string(task_info.request_type);
status.scope = get_scope(task_info.request_type);
status.start_time = task_info.request_time;
status.creation_time = task_info.request_time;
status.start_time = task_info.sched_time;
}
future<std::optional<status_helper>> tablet_virtual_task::get_status_helper(tasks::task_id id, tasks::virtual_task_hint hint) {

View File

@@ -956,6 +956,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
req_entry = co_await _sys_ks.get_topology_request_entry(req_id, true);
req = std::get<global_topology_request>(req_entry.request_type);
}
switch (req) {
case global_topology_request::new_cdc_generation: {
rtlogger.info("new CDC generation requested");
@@ -975,9 +976,14 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
.set_global_topology_request(req)
.set_global_topology_request_id(req_id)
.drop_first_global_topology_request_id(_topo_sm._topology.global_requests_queue, req_id);
// Set start_time when we begin executing the request
topology_request_tracking_mutation_builder rtbuilder(req_id);
rtbuilder.set("start_time", db_clock::now());
auto reason = ::format(
"insert CDC generation data (UUID: {})", gen_uuid);
co_await update_topology_state(std::move(guard), {std::move(mutation), builder.build()}, reason);
co_await update_topology_state(std::move(guard), {std::move(mutation), builder.build(), rtbuilder.build()}, reason);
}
break;
case global_topology_request::cleanup:
@@ -1068,7 +1074,9 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
.del_global_topology_request_id()
.drop_first_global_topology_request_id(_topo_sm._topology.global_requests_queue, req_id)
.build()));
// Set start_time when we begin executing the request and mark as done
updates.push_back(canonical_mutation(topology_request_tracking_mutation_builder(req_id)
.set("start_time", db_clock::now())
.done(error)
.build()));
@@ -1088,7 +1096,12 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
.set_global_topology_request_id(req_id)
.drop_first_global_topology_request_id(_topo_sm._topology.global_requests_queue, req_id)
.set_session(session_id(req_id));
co_await update_topology_state(std::move(guard), {builder.build()}, "TRUNCATE TABLE requested");
// Set start_time when we begin executing the request
topology_request_tracking_mutation_builder rtbuilder(req_id);
rtbuilder.set("start_time", db_clock::now());
co_await update_topology_state(std::move(guard), {builder.build(), rtbuilder.build()}, "TRUNCATE TABLE requested");
}
break;
}
@@ -3279,6 +3292,11 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
topology_mutation_builder builder(guard.write_timestamp());
builder.del_global_topology_request();
if (_feature_service.topology_global_request_queue) {
// Set start_time when we begin executing the request
topology_request_tracking_mutation_builder start_rtbuilder(*global_request_id);
start_rtbuilder.set("start_time", db_clock::now());
muts.emplace_back(start_rtbuilder.build());
topology_request_tracking_mutation_builder rtbuilder(*global_request_id);
builder.del_global_topology_request_id()
.drop_first_global_topology_request_id(_topo_sm._topology.global_requests_queue, *global_request_id);

View File

@@ -2485,6 +2485,11 @@ void sstable::validate_originating_host_id() const {
}
return;
}
if (*originating_host_id != local_host_id) {
// FIXME refrain from throwing an exception because of #10148
sstlog.warn("Host id {} does not match local host id {} while validating SSTable: {}. Load foreign SSTables via the upload dir instead.", *originating_host_id, local_host_id, get_filename());
}
}
sstring sstable::component_basename(const sstring& ks, const sstring& cf, version_types version, generation_type generation,

View File

@@ -135,17 +135,13 @@ future<> storage_manager::update_config(const db::config& cfg) {
co_return;
}
auto storage_manager::get_endpoint(const sstring& endpoint) -> object_storage_endpoint& {
shared_ptr<sstables::object_storage_client> storage_manager::get_endpoint_client(sstring endpoint) {
auto found = _object_storage_endpoints.find(endpoint);
if (found == _object_storage_endpoints.end()) {
smlogger.error("unable to find {} in configured object-storage endpoints", endpoint);
throw std::invalid_argument(format("endpoint {} not found", endpoint));
}
return found->second;
}
shared_ptr<sstables::object_storage_client> storage_manager::get_endpoint_client(sstring endpoint) {
auto& ep = get_endpoint(endpoint);
auto& ep = found->second;
if (ep.client == nullptr) {
ep.client = make_object_storage_client(ep.cfg, _object_storage_clients_memory, [&ct = container()] (std::string ep) {
return ct.local().get_endpoint_client(ep);
@@ -154,10 +150,6 @@ shared_ptr<sstables::object_storage_client> storage_manager::get_endpoint_client
return ep.client;
}
sstring storage_manager::get_endpoint_type(sstring endpoint) {
return get_endpoint(endpoint).cfg.type();
}
bool storage_manager::is_known_endpoint(sstring endpoint) const {
return _object_storage_endpoints.contains(endpoint);
}

View File

@@ -70,7 +70,6 @@ class storage_manager : public peering_sharded_service<storage_manager> {
seastar::metrics::metric_groups metrics;
future<> update_config(const db::config&);
object_storage_endpoint& get_endpoint(const sstring& ep);
public:
struct config {
@@ -81,7 +80,6 @@ public:
storage_manager(const db::config&, config cfg);
shared_ptr<object_storage_client> get_endpoint_client(sstring endpoint);
bool is_known_endpoint(sstring endpoint) const;
sstring get_endpoint_type(sstring endpoint);
future<> stop();
std::vector<sstring> endpoints(sstring type = "") const noexcept;
};

View File

@@ -205,13 +205,6 @@ private:
}
bool tablet_in_scope(locator::tablet_id) const;
friend future<std::vector<tablet_sstable_collection>> get_sstables_for_tablets_for_tests(const std::vector<sstables::shared_sstable>& sstables,
std::vector<dht::token_range>&& tablets_ranges);
// Pay attention, while working with tablet ranges, the `erm` must be held alive as long as we retrieve (and use here) tablet ranges from
// the tablet map. This is already done when using `tablet_sstable_streamer` class but tread carefully if you plan to use this method somewhere else.
static future<std::vector<tablet_sstable_collection>> get_sstables_for_tablets(const std::vector<sstables::shared_sstable>& sstables,
std::vector<dht::token_range>&& tablets_ranges);
};
host_id_vector_replica_set sstable_streamer::get_endpoints(const dht::token& token) const {
@@ -350,52 +343,55 @@ public:
}
};
future<std::vector<tablet_sstable_collection>> tablet_sstable_streamer::get_sstables_for_tablets(const std::vector<sstables::shared_sstable>& sstables,
std::vector<dht::token_range>&& tablets_ranges) {
auto tablets_sstables =
tablets_ranges | std::views::transform([](auto range) { return tablet_sstable_collection{.tablet_range = range}; }) | std::ranges::to<std::vector>();
if (sstables.empty() || tablets_sstables.empty()) {
co_return std::move(tablets_sstables);
}
// sstables are sorted by first key in reverse order.
auto reversed_sstables = sstables | std::views::reverse;
for (auto& [tablet_range, sstables_fully_contained, sstables_partially_contained] : tablets_sstables) {
for (const auto& sst : reversed_sstables) {
auto sst_first = sst->get_first_decorated_key().token();
auto sst_last = sst->get_last_decorated_key().token();
// SSTable entirely after tablet -> no further SSTables (larger keys) can overlap
if (tablet_range.after(sst_first, dht::token_comparator{})) {
break;
}
// SSTable entirely before tablet -> skip and continue scanning later (larger keys)
if (tablet_range.before(sst_last, dht::token_comparator{})) {
continue;
}
if (tablet_range.contains(dht::token_range{sst_first, sst_last}, dht::token_comparator{})) {
sstables_fully_contained.push_back(sst);
} else {
sstables_partially_contained.push_back(sst);
}
co_await coroutine::maybe_yield();
}
}
co_return std::move(tablets_sstables);
}
future<> tablet_sstable_streamer::stream(shared_ptr<stream_progress> progress) {
if (progress) {
progress->start(_tablet_map.tablet_count());
}
auto classified_sstables = co_await get_sstables_for_tablets(
_sstables, _tablet_map.tablet_ids() | std::views::filter([this](auto tid) { return tablet_in_scope(tid); }) | std::views::transform([this](auto tid) {
return _tablet_map.get_token_range(tid);
}) | std::ranges::to<std::vector>());
// sstables are sorted by first key in reverse order.
auto sstable_it = _sstables.rbegin();
for (auto tablet_id : _tablet_map.tablet_ids() | std::views::filter([this] (auto tid) { return tablet_in_scope(tid); })) {
auto tablet_range = _tablet_map.get_token_range(tablet_id);
auto sstable_token_range = [] (const sstables::shared_sstable& sst) {
return dht::token_range(sst->get_first_decorated_key().token(),
sst->get_last_decorated_key().token());
};
std::vector<sstables::shared_sstable> sstables_fully_contained;
std::vector<sstables::shared_sstable> sstables_partially_contained;
// sstable is exhausted if its last key is before the current tablet range
auto exhausted = [&tablet_range] (const sstables::shared_sstable& sst) {
return tablet_range.before(sst->get_last_decorated_key().token(), dht::token_comparator{});
};
while (sstable_it != _sstables.rend() && exhausted(*sstable_it)) {
sstable_it++;
}
for (auto sst_it = sstable_it; sst_it != _sstables.rend(); sst_it++) {
auto sst_token_range = sstable_token_range(*sst_it);
// sstables are sorted by first key, so should skip this SSTable since it
// doesn't overlap with the current tablet range.
if (!tablet_range.overlaps(sst_token_range, dht::token_comparator{})) {
// If the start of the next SSTable's token range lies beyond the current tablet's token
// range, we can safely conclude that no more relevant SSTables remain for this tablet.
if (tablet_range.after(sst_token_range.start()->value(), dht::token_comparator{})) {
break;
}
continue;
}
if (tablet_range.contains(sst_token_range, dht::token_comparator{})) {
sstables_fully_contained.push_back(*sst_it);
} else {
sstables_partially_contained.push_back(*sst_it);
}
co_await coroutine::maybe_yield();
}
for (auto& [tablet_range, sstables_fully_contained, sstables_partially_contained] : classified_sstables) {
auto per_tablet_progress = make_shared<per_tablet_stream_progress>(
progress,
sstables_fully_contained.size() + sstables_partially_contained.size());
@@ -755,9 +751,8 @@ future<> sstables_loader::download_task_impl::run() {
};
llog.debug("Loading sstables from {}({}/{})", _endpoint, _bucket, _prefix);
auto ep_type = _loader.local()._storage_manager.get_endpoint_type(_endpoint);
std::vector<seastar::abort_source> shard_aborts(smp::count);
auto [ table_id, sstables_on_shards ] = co_await replica::distributed_loader::get_sstables_from_object_store(_loader.local()._db, _ks, _cf, _sstables, _endpoint, ep_type, _bucket, _prefix, cfg, [&] {
auto [ table_id, sstables_on_shards ] = co_await replica::distributed_loader::get_sstables_from_object_store(_loader.local()._db, _ks, _cf, _sstables, _endpoint, _bucket, _prefix, cfg, [&] {
return &shard_aborts[this_shard_id()];
});
llog.debug("Streaming sstables from {}({}/{})", _endpoint, _bucket, _prefix);
@@ -837,7 +832,3 @@ future<tasks::task_id> sstables_loader::download_new_sstables(sstring ks_name, s
std::move(prefix), std::move(sstables), scope, primary_replica_only(primary_replica));
co_return task->id();
}
future<std::vector<tablet_sstable_collection>> get_sstables_for_tablets_for_tests(const std::vector<sstables::shared_sstable>& sstables,
std::vector<dht::token_range>&& tablets_ranges) {
return tablet_sstable_streamer::get_sstables_for_tablets(sstables, std::move(tablets_ranges));
}

View File

@@ -10,8 +10,6 @@
#include <vector>
#include <seastar/core/sharded.hh>
#include "dht/i_partitioner_fwd.hh"
#include "dht/token.hh"
#include "schema/schema_fwd.hh"
#include "sstables/shared_sstable.hh"
#include "tasks/task_manager.hh"
@@ -154,18 +152,3 @@ struct fmt::formatter<sstables_loader::stream_scope> : fmt::formatter<string_vie
}
}
};
struct tablet_sstable_collection {
dht::token_range tablet_range;
std::vector<sstables::shared_sstable> sstables_fully_contained;
std::vector<sstables::shared_sstable> sstables_partially_contained;
};
// This function is intended for test purposes only.
// It assigns the given sstables to the given tablet ranges based on token containment.
// It returns a vector of tablet_sstable_collection, each containing the tablet range
// and the sstables that are fully or partially contained within that range.
// The prerequisite is the tablet ranges are sorted by the range in ascending order and non-overlapping.
// Another prerequisite is that the sstables' token ranges are sorted by its `start` in descending order.
future<std::vector<tablet_sstable_collection>> get_sstables_for_tablets_for_tests(const std::vector<sstables::shared_sstable>& sstables,
std::vector<dht::token_range>&& tablets_ranges);

View File

@@ -10,6 +10,7 @@
#include "tasks/task_handler.hh"
#include "tasks/virtual_task_hint.hh"
#include "utils/overloaded_functor.hh"
#include "utils/UUID_gen.hh"
#include <seastar/core/with_timeout.hh>
@@ -19,6 +20,11 @@ namespace tasks {
using task_status_variant = std::variant<tasks::task_manager::foreign_task_ptr, tasks::task_manager::task::task_essentials>;
static db_clock::time_point get_creation_time_from_task_id(task_id id) {
// Task IDs are timeuuids (version 1 UUIDs), so we can extract the timestamp from them
return db_clock::time_point(utils::UUID_gen::unix_timestamp(id.uuid()));
}
static future<task_status> get_task_status(task_manager::task_ptr task) {
auto host_id = task->get_module()->get_task_manager().get_host_id();
auto local_task_status = task->get_status();
@@ -29,6 +35,7 @@ static future<task_status> get_task_status(task_manager::task_ptr task) {
.scope = local_task_status.scope,
.state = local_task_status.state,
.is_abortable = task->is_abortable(),
.creation_time = get_creation_time_from_task_id(local_task_status.id),
.start_time = local_task_status.start_time,
.end_time = local_task_status.end_time,
.error = local_task_status.error,
@@ -173,6 +180,7 @@ future<utils::chunked_vector<task_status>> task_handler::get_status_recursively(
.scope = task.task_status.scope,
.state = task.task_status.state,
.is_abortable = task.abortable,
.creation_time = get_creation_time_from_task_id(task.task_status.id),
.start_time = task.task_status.start_time,
.end_time = task.task_status.end_time,
.error = task.task_status.error,

View File

@@ -26,6 +26,7 @@ struct task_status {
std::string scope;
task_manager::task_state state;
is_abortable is_abortable;
db_clock::time_point creation_time;
db_clock::time_point start_time;
db_clock::time_point end_time;
std::string error;
@@ -51,6 +52,7 @@ struct task_stats {
std::string table;
std::string entity;
unsigned shard;
db_clock::time_point creation_time;
db_clock::time_point start_time;
db_clock::time_point end_time;
};

View File

@@ -21,6 +21,7 @@
#include "utils/assert.hh"
#include "utils/chunked_vector.hh"
#include "utils/overloaded_functor.hh"
#include "utils/UUID_gen.hh"
#include "service/storage_service.hh"
#include "tasks/task_handler.hh"
#include "task_manager.hh"
@@ -559,6 +560,7 @@ future<utils::chunked_vector<task_stats>> task_manager::module::get_stats(is_int
.table = task->get_status().table,
.entity = task->get_status().entity,
.shard = task->get_status().shard,
.creation_time = db_clock::time_point(utils::UUID_gen::unix_timestamp(task->id().uuid())),
.start_time = task->get_status().start_time,
.end_time = task->get_status().end_time,
});

View File

@@ -12,7 +12,7 @@ add_scylla_test(alternator_unit_test
add_scylla_test(anchorless_list_test
KIND BOOST)
add_scylla_test(auth_passwords_test
KIND SEASTAR
KIND BOOST
LIBRARIES auth)
add_scylla_test(auth_resource_test
KIND BOOST)
@@ -370,7 +370,6 @@ add_scylla_test(combined_tests
sstable_compression_config_test.cc
sstable_directory_test.cc
sstable_set_test.cc
sstable_tablet_streaming.cc
statement_restrictions_test.cc
storage_proxy_test.cc
tablets_test.cc

View File

@@ -6,7 +6,7 @@
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#include <seastar/testing/test_case.hh>
#define BOOST_TEST_MODULE core
#include <array>
#include <random>
@@ -16,21 +16,15 @@
#include <boost/test/unit_test.hpp>
#include <seastar/core/sstring.hh>
#include <seastar/core/coroutine.hh>
#include "seastarx.hh"
extern "C" {
#include <crypt.h>
#include <unistd.h>
}
static auto rng_for_salt = std::default_random_engine(std::random_device{}());
//
// The same password hashed multiple times will result in different strings because the salt will be different.
//
SEASTAR_TEST_CASE(passwords_are_salted) {
BOOST_AUTO_TEST_CASE(passwords_are_salted) {
const char* const cleartext = "my_excellent_password";
std::unordered_set<sstring> observed_passwords{};
@@ -39,13 +33,12 @@ SEASTAR_TEST_CASE(passwords_are_salted) {
BOOST_REQUIRE(!observed_passwords.contains(e));
observed_passwords.insert(e);
}
co_return;
}
//
// A hashed password will authenticate against the same password in cleartext.
//
SEASTAR_TEST_CASE(correct_passwords_authenticate) {
BOOST_AUTO_TEST_CASE(correct_passwords_authenticate) {
// Common passwords.
std::array<const char*, 3> passwords{
"12345",
@@ -54,85 +47,14 @@ SEASTAR_TEST_CASE(correct_passwords_authenticate) {
};
for (const char* p : passwords) {
BOOST_REQUIRE(co_await auth::passwords::check(p, auth::passwords::hash(p, rng_for_salt, auth::passwords::scheme::sha_512)));
BOOST_REQUIRE(auth::passwords::check(p, auth::passwords::hash(p, rng_for_salt, auth::passwords::scheme::sha_512)));
}
}
std::string long_password(uint32_t len) {
std::string out;
auto pattern = "0123456789";
for (uint32_t i = 0; i < len; ++i) {
out.push_back(pattern[i % strlen(pattern)]);
}
return out;
}
SEASTAR_TEST_CASE(same_hashes_as_crypt_h) {
std::string long_pwd_254 = long_password(254);
std::string long_pwd_255 = long_password(255);
std::string long_pwd_511 = long_password(511);
std::array<const char*, 8> passwords{
"12345",
"1_am_the_greatest!",
"password1",
// Some special characters
"!@#$%^&*()_+-=[]{}|\n;:'\",.<>/?",
// UTF-8 characters
"こんにちは、世界!",
// Passwords close to __crypt_sha512 length limit
long_pwd_254.c_str(),
long_pwd_255.c_str(),
// Password of maximal accepted length
long_pwd_511.c_str(),
};
auto salt = "$6$aaaabbbbccccdddd";
for (const char* p : passwords) {
auto res = co_await auth::passwords::detail::hash_with_salt_async(p, salt);
BOOST_REQUIRE(res == auth::passwords::detail::hash_with_salt(p, salt));
}
}
SEASTAR_TEST_CASE(too_long_password) {
auto p1 = long_password(71);
auto p2 = long_password(72);
auto p3 = long_password(73);
auto too_long_password = long_password(512);
auto salt_bcrypt = "$2a$05$mAyzaIeJu41dWUkxEbn8hO";
auto h1_bcrypt = co_await auth::passwords::detail::hash_with_salt_async(p1, salt_bcrypt);
auto h2_bcrypt = co_await auth::passwords::detail::hash_with_salt_async(p2, salt_bcrypt);
auto h3_bcrypt = co_await auth::passwords::detail::hash_with_salt_async(p3, salt_bcrypt);
BOOST_REQUIRE(h1_bcrypt != h2_bcrypt);
// The check below documents the behavior of the current bcrypt
// implementation that compares only the first 72 bytes of the password.
// Although we don't typically use bcrypt for password hashing, it is
// possible to insert such a hash using`CREATE ROLE ... WITH HASHED PASSWORD ...`.
// Refs: scylladb/scylladb#26842
BOOST_REQUIRE(h2_bcrypt == h3_bcrypt);
// The current implementation of bcrypt password hasing fails with passwords of length 512 and above
BOOST_CHECK_THROW(co_await auth::passwords::detail::hash_with_salt_async(too_long_password, salt_bcrypt), std::system_error);
auto salt_sha512 = "$6$aaaabbbbccccdddd";
auto h1_sha512 = co_await auth::passwords::detail::hash_with_salt_async(p1, salt_sha512);
auto h2_sha512 = co_await auth::passwords::detail::hash_with_salt_async(p2, salt_sha512);
auto h3_sha512 = co_await auth::passwords::detail::hash_with_salt_async(p3, salt_sha512);
BOOST_REQUIRE(h1_sha512 != h2_sha512);
BOOST_REQUIRE(h2_sha512 != h3_sha512);
// The current implementation of SHA-512 password hasing fails with passwords of length 512 and above
BOOST_CHECK_THROW(co_await auth::passwords::detail::hash_with_salt_async(too_long_password, salt_sha512), std::system_error);
}
//
// A hashed password that does not match the password in cleartext does not authenticate.
//
SEASTAR_TEST_CASE(incorrect_passwords_do_not_authenticate) {
BOOST_AUTO_TEST_CASE(incorrect_passwords_do_not_authenticate) {
const sstring hashed_password = auth::passwords::hash("actual_password", rng_for_salt,auth::passwords::scheme::sha_512);
BOOST_REQUIRE(!co_await auth::passwords::check("password_guess", hashed_password));
BOOST_REQUIRE(!auth::passwords::check("password_guess", hashed_password));
}

View File

@@ -1450,7 +1450,8 @@ SEASTAR_THREAD_TEST_CASE(tablets_simple_rack_aware_view_pairing_test) {
std::map<sstring, replication_strategy_config_option> options;
for (const auto& dc : option_dcs) {
auto num_racks = node_count_per_rack.at(dc).size();
auto rf = num_racks;
auto max_rf_factor = std::ranges::min(std::ranges::views::transform(node_count_per_rack.at(dc), [] (auto& x) { return x.second; }));
auto rf = num_racks * tests::random::get_int(1UL, max_rf_factor);
options.emplace(dc, fmt::to_string(rf));
}
return options;
@@ -1486,7 +1487,8 @@ SEASTAR_THREAD_TEST_CASE(tablets_simple_rack_aware_view_pairing_test) {
// Test tablets rack-aware base-view pairing
auto base_token = dht::token::get_random_token();
auto view_token = dht::token::get_random_token();
bool use_tablets = true;
bool use_legacy_self_pairing = false;
bool use_tablets_basic_rack_aware_view_pairing = true;
const auto& base_replicas = base_tmap.get_tablet_info(base_tmap.get_tablet_id(base_token)).replicas;
replica::cf_stats cf_stats;
std::unordered_map<locator::host_id, locator::host_id> base_to_view_pairing;
@@ -1500,7 +1502,8 @@ SEASTAR_THREAD_TEST_CASE(tablets_simple_rack_aware_view_pairing_test) {
*ars_ptr,
base_token,
view_token,
use_tablets,
use_legacy_self_pairing,
use_tablets_basic_rack_aware_view_pairing,
cf_stats).natural_endpoint;
// view pair must be found
@@ -1522,6 +1525,181 @@ SEASTAR_THREAD_TEST_CASE(tablets_simple_rack_aware_view_pairing_test) {
}
}
// Called in a seastar thread
void test_complex_rack_aware_view_pairing_test(bool more_or_less) {
auto my_address = gms::inet_address("localhost");
// Create the RackInferringSnitch
snitch_config cfg;
cfg.listen_address = my_address;
cfg.broadcast_address = my_address;
cfg.name = "RackInferringSnitch";
sharded<snitch_ptr> snitch;
snitch.start(cfg).get();
auto stop_snitch = defer([&snitch] { snitch.stop().get(); });
snitch.invoke_on_all(&snitch_ptr::start).get();
locator::token_metadata::config tm_cfg;
tm_cfg.topo_cfg.this_endpoint = my_address;
tm_cfg.topo_cfg.local_dc_rack = { snitch.local()->get_datacenter(), snitch.local()->get_rack() };
std::map<sstring, size_t> node_count_per_dc;
std::map<sstring, std::map<sstring, size_t>> node_count_per_rack;
std::vector<ring_point> ring_points;
auto& random_engine = seastar::testing::local_random_engine;
unsigned shard_count = 2;
size_t num_dcs = 1 + tests::random::get_int(3);
// Generate a random cluster
double point = 1;
for (size_t dc = 0; dc < num_dcs; ++dc) {
sstring dc_name = fmt::format("{}", 100 + dc);
size_t num_racks = 2 + tests::random::get_int(4);
for (size_t rack = 0; rack < num_racks; ++rack) {
sstring rack_name = fmt::format("{}", 10 + rack);
size_t rack_nodes = 1 + tests::random::get_int(2);
for (size_t i = 1; i <= rack_nodes; ++i) {
ring_points.emplace_back(point, inet_address(format("192.{}.{}.{}", dc_name, rack_name, i)));
node_count_per_dc[dc_name]++;
node_count_per_rack[dc_name][rack_name]++;
point++;
}
}
}
testlog.debug("node_count_per_rack={}", node_count_per_rack);
// Initialize the token_metadata
locator::shared_token_metadata stm([] () noexcept { return db::schema_tables::hold_merge_lock(); }, tm_cfg);
auto stop_stm = deferred_stop(stm);
stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> {
auto& topo = tm.get_topology();
for (const auto& [ring_point, endpoint, id] : ring_points) {
std::unordered_set<token> tokens;
tokens.insert(token{tests::d2t(ring_point / ring_points.size())});
topo.add_node(id, make_endpoint_dc_rack(endpoint), locator::node::state::normal, shard_count);
co_await tm.update_normal_tokens(std::move(tokens), id);
}
}).get();
auto base_schema = schema_builder("ks", "base")
.with_column("k", utf8_type, column_kind::partition_key)
.with_column("v", utf8_type)
.build();
auto view_schema = schema_builder("ks", "view")
.with_column("v", utf8_type, column_kind::partition_key)
.with_column("k", utf8_type)
.build();
auto tmptr = stm.get();
// Create the replication strategy
auto make_random_options = [&] () {
auto option_dcs = node_count_per_dc | std::views::keys | std::ranges::to<std::vector>();
std::shuffle(option_dcs.begin(), option_dcs.end(), random_engine);
std::map<sstring, replication_strategy_config_option> options;
for (const auto& dc : option_dcs) {
auto num_racks = node_count_per_rack.at(dc).size();
auto rf = more_or_less ?
tests::random::get_int(num_racks, node_count_per_dc[dc]) :
tests::random::get_int(1UL, num_racks);
options.emplace(dc, fmt::to_string(rf));
}
return options;
};
auto options = make_random_options();
size_t tablet_count = 1 + tests::random::get_int(99);
testlog.debug("tablet_count={} rf_options={}", tablet_count, options);
locator::replication_strategy_params params(options, tablet_count, std::nullopt);
auto ars_ptr = abstract_replication_strategy::create_replication_strategy(
"NetworkTopologyStrategy", params, tmptr->get_topology());
auto tab_awr_ptr = ars_ptr->maybe_as_tablet_aware();
BOOST_REQUIRE(tab_awr_ptr);
auto base_tmap = tab_awr_ptr->allocate_tablets_for_new_table(base_schema, tmptr, 1).get();
auto base_table_id = base_schema->id();
testlog.debug("base_table_id={}", base_table_id);
auto view_table_id = view_schema->id();
auto view_tmap = tab_awr_ptr->allocate_tablets_for_new_table(view_schema, tmptr, 1).get();
testlog.debug("view_table_id={}", view_table_id);
stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> {
tm.tablets().set_tablet_map(base_table_id, co_await base_tmap.clone_gently());
tm.tablets().set_tablet_map(view_table_id, co_await view_tmap.clone_gently());
}).get();
tmptr = stm.get();
auto base_erm = tab_awr_ptr->make_replication_map(base_table_id, tmptr);
auto view_erm = tab_awr_ptr->make_replication_map(view_table_id, tmptr);
auto& topology = tmptr->get_topology();
testlog.debug("topology: {}", topology.get_datacenter_racks());
// Test tablets rack-aware base-view pairing
auto base_token = dht::token::get_random_token();
auto view_token = dht::token::get_random_token();
bool use_legacy_self_pairing = false;
bool use_tablets_basic_rack_aware_view_pairing = true;
const auto& base_replicas = base_tmap.get_tablet_info(base_tmap.get_tablet_id(base_token)).replicas;
replica::cf_stats cf_stats;
std::unordered_map<locator::host_id, locator::host_id> base_to_view_pairing;
std::unordered_map<locator::host_id, locator::host_id> view_to_base_pairing;
std::unordered_map<sstring, size_t> same_rack_pairs;
std::unordered_map<sstring, size_t> cross_rack_pairs;
for (const auto& base_replica : base_replicas) {
auto& base_host = base_replica.host;
auto view_ep_opt = db::view::get_view_natural_endpoint(
base_host,
base_erm,
view_erm,
*ars_ptr,
base_token,
view_token,
use_legacy_self_pairing,
use_tablets_basic_rack_aware_view_pairing,
cf_stats).natural_endpoint;
// view pair must be found
if (!view_ep_opt) {
BOOST_FAIL(format("Could not pair base_host={} base_token={} view_token={}", base_host, base_token, view_token));
}
BOOST_REQUIRE(view_ep_opt);
auto& view_ep = *view_ep_opt;
// Assert pairing uniqueness
auto [base_it, inserted_base_pair] = base_to_view_pairing.emplace(base_host, view_ep);
BOOST_REQUIRE(inserted_base_pair);
auto [view_it, inserted_view_pair] = view_to_base_pairing.emplace(view_ep, base_host);
BOOST_REQUIRE(inserted_view_pair);
auto& base_location = topology.find_node(base_host)->dc_rack();
auto& view_location = topology.find_node(view_ep)->dc_rack();
// Assert dc- and rack- aware pairing
BOOST_REQUIRE_EQUAL(base_location.dc, view_location.dc);
if (base_location.rack == view_location.rack) {
same_rack_pairs[base_location.dc]++;
} else {
cross_rack_pairs[base_location.dc]++;
}
}
for (const auto& [dc, rf_opt] : options) {
auto rf = locator::get_replication_factor(rf_opt);
BOOST_REQUIRE_EQUAL(same_rack_pairs[dc] + cross_rack_pairs[dc], rf);
}
}
SEASTAR_THREAD_TEST_CASE(tablets_complex_rack_aware_view_pairing_test_rf_lt_racks) {
test_complex_rack_aware_view_pairing_test(false);
}
SEASTAR_THREAD_TEST_CASE(tablets_complex_rack_aware_view_pairing_test_rf_gt_racks) {
test_complex_rack_aware_view_pairing_test(true);
}
SEASTAR_THREAD_TEST_CASE(test_rack_diff) {
BOOST_REQUIRE(diff_racks({}, {}).empty());

View File

@@ -1,367 +0,0 @@
/*
* Copyright (C) 2025-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#undef SEASTAR_TESTING_MAIN
#include <seastar/testing/test_case.hh>
#include "dht/token.hh"
#include "sstable_test.hh"
#include "sstables_loader.hh"
#include "test/lib/sstable_test_env.hh"
BOOST_AUTO_TEST_SUITE(sstable_tablet_streaming_test)
using namespace sstables;
std::vector<shared_sstable> make_sstables_with_ranges(test_env& env, const std::vector<std::pair<int64_t, int64_t>>& ranges) {
std::vector<shared_sstable> ssts;
for (const auto& [first, last] : ranges) {
auto sst = env.make_sstable(uncompressed_schema(), uncompressed_dir());
test(sst).set_first_and_last_keys(dht::decorated_key(dht::token{first}, partition_key(std::vector<bytes>{"1"})),
dht::decorated_key(dht::token{last}, partition_key(std::vector<bytes>{"1"})));
ssts.push_back(std::move(sst));
}
// By sorting SSTables by their primary key, we enable runs to be
// streamed incrementally. Overlapping fragments can be deduplicated,
// reducing the amount of data sent over the wire. Elements are
// popped from the back of the vector, so we sort in descending
// order to begin with the smaller tokens.
// See sstable_streamer constructor for more details.
std::ranges::sort(ssts, [](const shared_sstable& x, const shared_sstable& y) { return x->compare_by_first_key(*y) > 0; });
return ssts;
}
std::vector<dht::token_range> get_tablet_sstable_collection(auto&&... tablet_ranges) {
// tablet ranges are left-non-inclusive, see `tablet_map::get_token_range` for details
std::vector<dht::token_range> collections{dht::token_range::make({tablet_ranges.start()->value(), false}, {tablet_ranges.end()->value(), true})...};
std::sort(collections.begin(), collections.end(), [](auto const& a, auto const& b) { return a.start()->value() < b.start()->value(); });
return collections;
}
#define REQUIRE_WITH_CONTEXT(sstables, expected_size) \
BOOST_TEST_CONTEXT("Testing with ranges: " << [&] { \
std::stringstream ss; \
for (const auto& sst : (sstables)) { \
ss << dht::token_range(sst->get_first_decorated_key().token(), sst->get_last_decorated_key().token()) << ", "; \
} \
return ss.str(); \
}()) \
BOOST_REQUIRE_EQUAL(sstables.size(), expected_size)
SEASTAR_TEST_CASE(test_streaming_ranges_distribution) {
return test_env::do_with_async([](test_env& env) {
// 1) Exact boundary equality: SSTable == tablet
{
auto collection = get_tablet_sstable_collection(dht::token_range{dht::token{5}, dht::token{10}});
auto ssts = make_sstables_with_ranges(env,
{
{5, 10},
});
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
REQUIRE_WITH_CONTEXT(res[0].sstables_fully_contained, 0);
REQUIRE_WITH_CONTEXT(res[0].sstables_partially_contained, 1);
}
// 2) Single-point overlaps at start/end
{
auto collection = get_tablet_sstable_collection(dht::token_range{dht::token{5}, dht::token{10}});
auto ssts = make_sstables_with_ranges(env,
{
{4, 5}, // touches start, non-inclusive, skip
{10, 11}, // touches end
});
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
REQUIRE_WITH_CONTEXT(res[0].sstables_fully_contained, 0);
REQUIRE_WITH_CONTEXT(res[0].sstables_partially_contained, 1);
}
// 3) Tablet fully inside a large SSTable
{
auto collection = get_tablet_sstable_collection(dht::token_range{dht::token{5}, dht::token{10}});
auto ssts = make_sstables_with_ranges(env,
{
{0, 20},
});
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
REQUIRE_WITH_CONTEXT(res[0].sstables_fully_contained, 0);
REQUIRE_WITH_CONTEXT(res[0].sstables_partially_contained, 1);
}
// 4) Multiple SSTables fully contained in tablet
{
auto collection = get_tablet_sstable_collection(dht::token_range{dht::token{5}, dht::token{10}});
auto ssts = make_sstables_with_ranges(env,
{
{6, 7},
{7, 8},
{8, 9},
});
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
REQUIRE_WITH_CONTEXT(res[0].sstables_fully_contained, 3);
REQUIRE_WITH_CONTEXT(res[0].sstables_partially_contained, 0);
}
// 5) Two overlapping but not fully contained SSTables
{
auto collection = get_tablet_sstable_collection(dht::token_range{dht::token{5}, dht::token{10}});
auto ssts = make_sstables_with_ranges(env,
{
{0, 6}, // overlaps at left
{9, 15}, // overlaps at right
});
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
REQUIRE_WITH_CONTEXT(res[0].sstables_fully_contained, 0);
REQUIRE_WITH_CONTEXT(res[0].sstables_partially_contained, 2);
}
// 6) Unsorted input (helper sorts) + mixed overlaps
{
auto collection = get_tablet_sstable_collection(dht::token_range{dht::token{50}, dht::token{100}});
// Intentionally unsorted by first token
auto ssts = make_sstables_with_ranges(env,
{
{120, 130},
{0, 10},
{60, 70}, // fully contained
{40, 55}, // partial
{95, 105}, // partial
{80, 90}, // fully contained
});
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
REQUIRE_WITH_CONTEXT(res[0].sstables_fully_contained, 2);
REQUIRE_WITH_CONTEXT(res[0].sstables_partially_contained, 2);
}
// 7) Empty SSTable list
{
auto collection = get_tablet_sstable_collection(dht::token_range{dht::token{5}, dht::token{10}});
std::vector<shared_sstable> ssts;
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
REQUIRE_WITH_CONTEXT(res[0].sstables_fully_contained, 0);
REQUIRE_WITH_CONTEXT(res[0].sstables_partially_contained, 0);
}
// 8) Tablet outside all SSTables
{
auto collection = get_tablet_sstable_collection(dht::token_range{dht::token{100}, dht::token{200}});
auto ssts = make_sstables_with_ranges(env,
{
{1, 2},
{3, 4},
{10, 20},
{300, 400},
});
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
REQUIRE_WITH_CONTEXT(res[0].sstables_fully_contained, 0);
REQUIRE_WITH_CONTEXT(res[0].sstables_partially_contained, 0);
}
// 9) Boundary adjacency with multiple fragments
{
auto collection = get_tablet_sstable_collection(dht::token_range{dht::token{100}, dht::token{200}});
auto ssts = make_sstables_with_ranges(env,
{
{50, 100}, // touches start -> non-inclusive, skip
{100, 120}, // starts at start -> partially contained
{180, 200}, // ends at end -> fully contained
{200, 220}, // touches end -> partial
});
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
REQUIRE_WITH_CONTEXT(res[0].sstables_fully_contained, 1);
REQUIRE_WITH_CONTEXT(res[0].sstables_partially_contained, 2);
}
// 10) Large SSTable set where early break should occur
{
auto collection = get_tablet_sstable_collection(dht::token_range{dht::token{1000}, dht::token{2000}});
auto ssts = make_sstables_with_ranges(env,
{
{100, 200},
{300, 400},
{900, 950},
{1001, 1100}, // fully contained
{1500, 1600}, // fully contained
{2101, 2200}, // entirely after -> should trigger early break in ascending scan
{1999, 2100}, // overlap, partially contained
{3000, 3100},
});
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
REQUIRE_WITH_CONTEXT(res[0].sstables_fully_contained, 2);
REQUIRE_WITH_CONTEXT(res[0].sstables_partially_contained, 1);
}
// 10) https://github.com/scylladb/scylladb/pull/26980 example, tested
{
auto collection = get_tablet_sstable_collection(dht::token_range{dht::token{4}, dht::token{5}});
auto ssts = make_sstables_with_ranges(env,
{
{0, 5},
{0, 3},
{2, 5},
});
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
// None fully contained; three partial overlaps
REQUIRE_WITH_CONTEXT(res[0].sstables_fully_contained, 0);
REQUIRE_WITH_CONTEXT(res[0].sstables_partially_contained, 2);
}
});
}
SEASTAR_TEST_CASE(test_streaming_ranges_distribution_in_tablets) {
return test_env::do_with_async([](test_env& env) {
{
auto collection = get_tablet_sstable_collection(dht::token_range{dht::token{5}, dht::token{10}}, dht::token_range{dht::token{11}, dht::token{15}});
auto ssts = make_sstables_with_ranges(env,
{
{5, 10},
});
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
REQUIRE_WITH_CONTEXT(res[0].sstables_fully_contained, 0);
REQUIRE_WITH_CONTEXT(res[0].sstables_partially_contained, 1);
REQUIRE_WITH_CONTEXT(res[1].sstables_fully_contained, 0);
REQUIRE_WITH_CONTEXT(res[1].sstables_partially_contained, 0);
}
{
// Multiple tablets with a hole between [10,11]
auto collection = get_tablet_sstable_collection(dht::token_range{dht::token{0}, dht::token{4}},
dht::token_range{dht::token{5}, dht::token{9}},
dht::token_range{dht::token{12}, dht::token{15}});
auto ssts = make_sstables_with_ranges(env,
{
{0, 4}, // T.start==S.start, but non-inclusive -> partial
{5, 9}, // same as above
{6, 8}, // fully in second tablet
{10, 11}, // falls in the hole, should be rejected
{8, 13}, // overlaps second and third tablets (partial in both)
});
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
REQUIRE_WITH_CONTEXT(res[0].sstables_fully_contained, 0);
REQUIRE_WITH_CONTEXT(res[0].sstables_partially_contained, 1);
REQUIRE_WITH_CONTEXT(res[1].sstables_fully_contained, 1);
REQUIRE_WITH_CONTEXT(res[1].sstables_partially_contained, 2);
REQUIRE_WITH_CONTEXT(res[2].sstables_fully_contained, 0);
REQUIRE_WITH_CONTEXT(res[2].sstables_partially_contained, 1);
}
{
// SSTables outside any tablet range
auto collection = get_tablet_sstable_collection(dht::token_range{dht::token{20}, dht::token{25}});
auto ssts = make_sstables_with_ranges(env,
{
{0, 5}, // before
{30, 35}, // after
});
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
REQUIRE_WITH_CONTEXT(res[0].sstables_fully_contained, 0);
REQUIRE_WITH_CONTEXT(res[0].sstables_partially_contained, 0);
}
{
// Edge case: SSTable touching tablet boundary
auto collection = get_tablet_sstable_collection(dht::token_range{dht::token{5}, dht::token{10}});
auto ssts = make_sstables_with_ranges(env,
{
{4, 5}, // touches start, non-inclusive, skip
{10, 11}, // touches end
});
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
REQUIRE_WITH_CONTEXT(res[0].sstables_fully_contained, 0);
REQUIRE_WITH_CONTEXT(res[0].sstables_partially_contained, 1);
}
{
// No tablets, but some SSTables
auto collection = get_tablet_sstable_collection();
auto ssts = make_sstables_with_ranges(env,
{
{0, 5},
{10, 15},
});
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
BOOST_REQUIRE_EQUAL(res.size(), 0); // no tablets → nothing to classify
}
{
// No SSTables, but some tablets
auto collection = get_tablet_sstable_collection(dht::token_range{dht::token{0}, dht::token{5}}, dht::token_range{dht::token{10}, dht::token{15}});
std::vector<shared_sstable> ssts; // empty
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
REQUIRE_WITH_CONTEXT(res[0].sstables_fully_contained, 0);
REQUIRE_WITH_CONTEXT(res[0].sstables_partially_contained, 0);
REQUIRE_WITH_CONTEXT(res[1].sstables_fully_contained, 0);
REQUIRE_WITH_CONTEXT(res[1].sstables_partially_contained, 0);
}
{
// No tablets and no SSTables
auto collection = get_tablet_sstable_collection();
std::vector<shared_sstable> ssts; // empty
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
BOOST_REQUIRE_EQUAL(res.size(), 0);
}
{
// SSTable spanning two tablets
auto collection = get_tablet_sstable_collection(dht::token_range{dht::token{0}, dht::token{4}}, dht::token_range{dht::token{5}, dht::token{9}});
auto ssts = make_sstables_with_ranges(env,
{
{2, 7}, // spans both tablets
});
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
// Tablet [0,4] sees partial overlap
REQUIRE_WITH_CONTEXT(res[0].sstables_fully_contained, 0);
REQUIRE_WITH_CONTEXT(res[0].sstables_partially_contained, 1);
// Tablet [5,9] sees partial overlap
REQUIRE_WITH_CONTEXT(res[1].sstables_fully_contained, 0);
REQUIRE_WITH_CONTEXT(res[1].sstables_partially_contained, 1);
}
{
// SSTable spanning three tablets with a hole in between
auto collection = get_tablet_sstable_collection(dht::token_range{dht::token{0}, dht::token{3}},
dht::token_range{dht::token{4}, dht::token{6}},
dht::token_range{dht::token{8}, dht::token{10}});
auto ssts = make_sstables_with_ranges(env,
{
{2, 9}, // spans across tablets 1,2,3 and hole [7]
});
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
REQUIRE_WITH_CONTEXT(res[0].sstables_partially_contained, 1);
REQUIRE_WITH_CONTEXT(res[1].sstables_partially_contained, 1);
REQUIRE_WITH_CONTEXT(res[2].sstables_partially_contained, 1);
}
{
// SSTable fully covering one tablet and partially overlapping another
auto collection = get_tablet_sstable_collection(dht::token_range{dht::token{0}, dht::token{5}}, dht::token_range{dht::token{6}, dht::token{10}});
auto ssts = make_sstables_with_ranges(env,
{
{0, 7}, // fully covers first tablet, partial in second
});
auto res = get_sstables_for_tablets_for_tests(ssts, std::move(collection)).get();
REQUIRE_WITH_CONTEXT(res[0].sstables_fully_contained, 0);
REQUIRE_WITH_CONTEXT(res[0].sstables_partially_contained, 1);
REQUIRE_WITH_CONTEXT(res[1].sstables_fully_contained, 0);
REQUIRE_WITH_CONTEXT(res[1].sstables_partially_contained, 1);
}
});
}
BOOST_AUTO_TEST_SUITE_END()

View File

@@ -395,7 +395,7 @@ SEASTAR_TEST_CASE(test_builder_with_concurrent_drop) {
assert_that(msg).is_rows().is_empty();
msg = e.execute_cql("select * from system_distributed.view_build_status").get();
assert_that(msg).is_rows().is_empty();
}, 30);
});
});
}

View File

@@ -25,7 +25,7 @@ from typing import Any, Optional, override
import pytest
import requests
from cassandra import AlreadyExists, AuthenticationFailed, ConsistencyLevel, InvalidRequest, Unauthorized, Unavailable, WriteFailure
from cassandra.cluster import NoHostAvailable, Session, EXEC_PROFILE_DEFAULT
from cassandra.cluster import NoHostAvailable, Session
from cassandra.query import SimpleStatement, named_tuple_factory
from ccmlib.scylla_node import ScyllaNode, NodeError
@@ -1135,14 +1135,6 @@ class TestCQLAudit(AuditTester):
session.execute("DROP TABLE test1")
def _get_attempt_count(self, session: Session, *, execution_profile=EXEC_PROFILE_DEFAULT, consistency_level: ConsistencyLevel = ConsistencyLevel.ONE) -> int:
# dtest env is using FlakyRetryPolicy which has `max_retries` attribute
cl_profile = session.execution_profile_clone_update(execution_profile, consistency_level=consistency_level)
policy = cl_profile.retry_policy
retries = getattr(policy, "max_retries", None)
assert retries is not None
return 1 + retries
def _test_insert_failure_doesnt_report_success_assign_nodes(self, session: Session = None):
all_nodes: set[ScyllaNode] = set(self.cluster.nodelist())
assert len(all_nodes) == 7
@@ -1162,7 +1154,6 @@ class TestCQLAudit(AuditTester):
for i in range(256):
stmt = SimpleStatement(f"INSERT INTO ks.test1 (k, v1) VALUES ({i}, 1337)", consistency_level=ConsistencyLevel.THREE)
session.execute(stmt)
attempt_count = self._get_attempt_count(session, consistency_level=ConsistencyLevel.THREE)
token = rows_to_list(session.execute(f"SELECT token(k) FROM ks.test1 WHERE k = {i}"))[0][0]
@@ -1177,9 +1168,9 @@ class TestCQLAudit(AuditTester):
audit_partition_nodes = [address_to_node[address] for address in audit_nodes]
insert_node = address_to_node[insert_node.pop()]
kill_node = address_to_node[partitions.pop()]
return audit_partition_nodes, insert_node, kill_node, stmt.query_string, attempt_count
return audit_partition_nodes, insert_node, kill_node, stmt.query_string
return [], [], None, None, None
return [], [], None, None
@pytest.mark.exclude_errors("audit - Unexpected exception when writing log with: node_ip")
def test_insert_failure_doesnt_report_success(self):
@@ -1201,7 +1192,7 @@ class TestCQLAudit(AuditTester):
with self.assert_exactly_n_audit_entries_were_added(session, 1):
conn.execute(stmt)
audit_paritition_nodes, insert_node, node_to_stop, query_to_fail, query_fail_count = self._test_insert_failure_doesnt_report_success_assign_nodes(session=session)
audit_paritition_nodes, insert_node, node_to_stop, query_to_fail = self._test_insert_failure_doesnt_report_success_assign_nodes(session=session)
# TODO: remove the loop when scylladb#24473 is fixed
# We call get_host_id only to cache host_id
@@ -1240,8 +1231,8 @@ class TestCQLAudit(AuditTester):
# If any audit mode is not done yet, continue polling.
all_modes_done = True
for mode, rows in rows_dict.items():
rows_with_error = [row for row in rows if row.error and row.operation == query_to_fail]
if len(rows_with_error) == query_fail_count:
rows_with_error = list(filter(lambda r: r.error, rows))
if len(rows_with_error) == 6:
logger.info(f"audit mode {mode} log updated after {i} iterations ({i / 10}s)")
assert rows_with_error[0].error is True
assert rows_with_error[0].consistency == "THREE"

View File

@@ -16,26 +16,16 @@ from test.cluster.util import get_topology_coordinator, new_test_keyspace, recon
logger = logging.getLogger(__name__)
# This test makes sure that view building is done mainly in the streaming
# scheduling group. We check that by grepping all relevant logs in TRACE mode
# and verifying that they come from the streaming scheduling group.
#
# For more context, see: https://github.com/scylladb/scylladb/issues/21232.
# This test reproduces the issue in non-tablet mode.
# This test makes sure that view building is done mainly in the streaming scheduling group
# and not the gossip scheduling group. We do that by measuring the time each group was
# busy during the view building process and confirming that the gossip group was busy
# much less than the streaming group.
# Reproduces https://github.com/scylladb/scylladb/issues/21232
@pytest.mark.asyncio
@skip_mode('debug', 'the test needs to do some work which takes too much time in debug mode')
async def test_view_building_scheduling_group(manager: ManagerClient):
# Note: The view building coordinator works in the gossiping scheduling group,
# and we intentionally omit it here.
# Note: We include "view" for keyspaces that don't use the view building coordinator
# and will follow the legacy path instead.
loggers = ["view_building_worker", "view_consumer", "view_update_generator", "view"]
# Flatten the list of lists.
cmdline = sum([["--logger-log-level", f"{logger}=trace"] for logger in loggers], [])
server = await manager.server_add(cmdline=cmdline)
server = await manager.server_add()
cql = manager.get_cql()
async with new_test_keyspace(manager, "WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1}") as ks:
await cql.run_async(f"CREATE TABLE {ks}.tab (p int, c int, PRIMARY KEY (p, c))")
@@ -45,30 +35,21 @@ async def test_view_building_scheduling_group(manager: ManagerClient):
batch = "BEGIN UNLOGGED BATCH\n" + "\n".join(inserts) + "\nAPPLY BATCH\n"
await manager.cql.run_async(batch)
log = await manager.server_open_log(server.server_id)
mark = await log.mark()
metrics_before = await manager.metrics.query(server.ip_addr)
ms_gossip_before = metrics_before.get('scylla_scheduler_runtime_ms', {'group': 'gossip'})
ms_streaming_before = metrics_before.get('scylla_scheduler_runtime_ms', {'group': 'streaming'})
await cql.run_async(f"CREATE MATERIALIZED VIEW {ks}.mv AS SELECT p, c FROM {ks}.tab WHERE p IS NOT NULL AND c IS NOT NULL PRIMARY KEY (c, p)")
await wait_for_view(cql, 'mv', 1)
logger_alternative = "|".join(loggers)
pattern = rf"\[shard [0-9]+:(.+)\] ({logger_alternative}) - "
results = await log.grep(pattern, from_mark=mark)
# Sanity check. If there are no logs, something's wrong.
assert len(results) > 0
# In case of non-tablet keyspaces, we won't use the view building coordinator.
# Instead, view updates will follow the legacy path. Along the way, we'll observe
# this message, which will be printed using another scheduling group, so let's
# filter it out.
predicate = lambda result: f"Building view {ks}.mv, starting at token" not in result[0]
results = list(filter(predicate, results))
# Take the first parenthesized match for each result, i.e. the scheduling group.
sched_groups = [matches[1] for _, matches in results]
assert all(sched_group == "strm" for sched_group in sched_groups)
metrics_after = await manager.metrics.query(server.ip_addr)
ms_gossip_after = metrics_after.get('scylla_scheduler_runtime_ms', {'group': 'gossip'})
ms_streaming_after = metrics_after.get('scylla_scheduler_runtime_ms', {'group': 'streaming'})
ms_streaming = ms_streaming_after - ms_streaming_before
ms_statement = ms_gossip_after - ms_gossip_before
ratio = ms_statement / ms_streaming
print(f"ms_streaming: {ms_streaming}, ms_statement: {ms_statement}, ratio: {ratio}")
assert ratio < 0.1
# A sanity check test ensures that starting and shutting down Scylla when view building is
# disabled is conducted properly and we don't run into any issues.

View File

@@ -38,6 +38,7 @@ class TaskStats(NamedTuple):
entity: str
sequence_number: SequenceNum
shard: int
creation_time: str
start_time: str
end_time: str
@@ -54,6 +55,7 @@ class TaskStatus(NamedTuple):
entity: str
sequence_number: SequenceNum
is_abortable: bool
creation_time: str
start_time: str
end_time: str
error: str

View File

@@ -25,14 +25,12 @@ import json
from cassandra.auth import PlainTextAuthProvider
import threading
import random
import re
from test.cluster.util import get_replication
from test.pylib.manager_client import ManagerClient
from test.pylib.util import wait_for
from test.pylib.tablets import get_all_tablet_replicas
from test.cluster.conftest import skip_mode
from test.pylib.tablets import get_tablet_replica
logger = logging.getLogger(__name__)
@@ -971,118 +969,3 @@ async def test_alternator_concurrent_rmw_same_partition_different_server(manager
t.join()
finally:
table.delete()
@pytest.mark.asyncio
@skip_mode('release', 'error injections are not supported in release mode')
async def test_alternator_invalid_shard_for_lwt(manager: ManagerClient):
"""
Reproducer for issue #27353.
LWT requires that storage_proxy::cas() is invoked on a valid shard — the one
returned by sharder.try_get_shard_for_reads() for a tablets-based table.
The bug: if the current shard is invalid and we jump to the valid shard, that
new shard may become invalid again by the time we attempt to capture the ERM.
This leads to a failure of the CAS path.
The fix: retry the validity check and jump again if the current shard is already
invalid. We should exit the loop once the shard is valid *and* we hold a strong pointer
to the ERM — which prevents further tablet movements until the ERM is released.
This problem is specific to BatchWriteItem; other commands are already handled
correctly.
"""
config = alternator_config.copy()
config['alternator_write_isolation'] = 'always_use_lwt'
cmdline = [
'--logger-log-level', 'alternator-executor=trace',
'--logger-log-level', 'alternator_controller=trace',
'--logger-log-level', 'paxos=trace'
]
server = await manager.server_add(config=config, cmdline=cmdline)
alternator = get_alternator(server.ip_addr)
logger.info("Creating alternator test table")
table = alternator.create_table(TableName=unique_table_name(),
Tags=[{'Key': 'system:initial_tablets', 'Value': '1'}],
BillingMode='PAY_PER_REQUEST',
KeySchema=[{'AttributeName': 'p', 'KeyType': 'HASH'}],
AttributeDefinitions=[{'AttributeName': 'p', 'AttributeType': 'N'}])
table_name = table.name
ks_name = 'alternator_' + table_name
last_token = 7 # Any token works since we have only one tablet
(src_host_id, src_shard) = await get_tablet_replica(manager, server, ks_name, table_name, last_token)
dst_shard = 0 if src_shard == 1 else 1
logger.info("Inject 'intranode_migration_streaming_wait'")
await manager.api.enable_injection(server.ip_addr,
"intranode_migration_streaming_wait",
one_shot=False)
logger.info("Start tablet migration")
intranode_migration_task = asyncio.create_task(
manager.api.move_tablet(server.ip_addr, ks_name, table_name,
src_host_id, src_shard,
src_host_id, dst_shard, last_token))
logger.info("Open server logs")
log = await manager.server_open_log(server.server_id)
logger.info("Wait for intranode_migration_streaming_wait")
await log.wait_for("intranode_migration_streaming: waiting")
logger.info("Inject 'alternator_executor_batch_write_wait'")
await manager.api.enable_injection(server.ip_addr,
"alternator_executor_batch_write_wait",
one_shot=False,
parameters={
'table': table_name,
'keyspace': ks_name,
'shard': dst_shard
})
m = await log.mark()
# Start a background thread, which tries to hit the alternator_executor_batch_write_wait
# injection on the destination shard.
logger.info("Start a batch_write thread")
stop_event = threading.Event()
def run_batch():
alternator = get_alternator(server.ip_addr)
table = alternator.Table(table_name)
while not stop_event.is_set():
with table.batch_writer() as batch:
batch.put_item(Item={'p': 1, 'x': 'hellow world'})
t = ThreadWrapper(target=run_batch)
t.start()
logger.info("Waiting for 'alternator_executor_batch_write_wait: hit'")
await log.wait_for("alternator_executor_batch_write_wait: hit", from_mark=m)
# We have a batch request with "streaming" cas_shard on the destination shard.
# This means we have already made a decision to jump to the src_shard.
# Now we're releasing the tablet migration so that it reaches write_both_read_new and
# and invaldiates this decision.
m = await log.mark()
await manager.api.message_injection(server.ip_addr, "intranode_migration_streaming_wait")
# The next barrier must be for the write_both_read_new, we need a guarantee
# that the src_shard observed it
logger.info("Waiting for the next barrier")
await log.wait_for(re.escape(f"[shard {src_shard}: gms] raft_topology - raft_topology_cmd::barrier_and_drain done"),
from_mark=m)
# Now we have a guarantee that a new barrier succeeded on the src_shard,
# this means the src_shard has already transitioned to write_both_read_new,
# and our batch write will have to jump back to the destination shard.
logger.info("Release the 'alternator_executor_batch_write_wait'")
await manager.api.message_injection(server.ip_addr, "alternator_executor_batch_write_wait")
logger.info("Waiting for migratino task to finish")
await intranode_migration_task
stop_event.set()
t.join()

View File

@@ -220,14 +220,14 @@ async def test_tablet_repair_sstable_skipped_read_metrics(manager: ManagerClient
await insert_keys(cql, ks, 0, 100)
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental')
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token)
skipped_bytes = get_incremental_repair_sst_skipped_bytes(servers[0])
read_bytes = get_incremental_repair_sst_read_bytes(servers[0])
# Nothing to skip. Repair all data.
assert skipped_bytes == 0
assert read_bytes > 0
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental')
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token)
skipped_bytes2 = get_incremental_repair_sst_skipped_bytes(servers[0])
read_bytes2 = get_incremental_repair_sst_read_bytes(servers[0])
# Skip all. Nothing to repair
@@ -236,7 +236,7 @@ async def test_tablet_repair_sstable_skipped_read_metrics(manager: ManagerClient
await insert_keys(cql, ks, 200, 300)
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental')
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token)
skipped_bytes3 = get_incremental_repair_sst_skipped_bytes(servers[0])
read_bytes3 = get_incremental_repair_sst_read_bytes(servers[0])
# Both skipped and read bytes should grow
@@ -272,7 +272,7 @@ async def test_tablet_incremental_repair(manager: ManagerClient):
assert get_sstables_repaired_at(map0, token) == sstables_repaired_at
# First repair
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental')
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token)
map1 = await load_tablet_sstables_repaired_at(manager, cql, servers[0], hosts[0], table_id)
logging.info(f'map1={map1}')
# Check sstables_repaired_at is increased by 1
@@ -288,7 +288,7 @@ async def test_tablet_incremental_repair(manager: ManagerClient):
assert len(enable) == 1
# Second repair
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental')
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token)
map2 = await load_tablet_sstables_repaired_at(manager, cql, servers[0], hosts[0], table_id)
logging.info(f'map2={map2}')
# Check sstables_repaired_at is increased by 1
@@ -313,7 +313,7 @@ async def test_tablet_incremental_repair_error(manager: ManagerClient):
# Repair should not finish with error
await inject_error_on(manager, "repair_tablet_fail_on_rpc_call", servers)
try:
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental', timeout=10)
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, timeout=10)
assert False # Check the tablet repair is not supposed to finish
except TimeoutError:
logger.info("Repair timeout as expected")
@@ -329,7 +329,7 @@ async def do_tablet_incremental_repair_and_ops(manager: ManagerClient, ops: str)
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await preapre_cluster_for_incremental_repair(manager, nr_keys)
token = -1
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental')
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token)
# 1 add 0 skip 1 mark
for log in logs:
sst_add, sst_skip, sst_mark = await get_sst_status("First", log)
@@ -355,7 +355,7 @@ async def do_tablet_incremental_repair_and_ops(manager: ManagerClient, ops: str)
else:
assert False # Wrong ops
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental')
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token)
# 1 add 1 skip 1 mark
for log in logs:
@@ -394,7 +394,7 @@ async def test_tablet_incremental_repair_and_minor(manager: ManagerClient):
await manager.api.disable_autocompaction(server.ip_addr, ks, 'test')
# First repair
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental')
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token)
# Insert more keys
await insert_keys(cql, ks, current_key, current_key + nr_keys)
@@ -402,7 +402,7 @@ async def test_tablet_incremental_repair_and_minor(manager: ManagerClient):
current_key += nr_keys
# Second repair
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental')
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token)
# Insert more keys and flush to get 2 more sstables
for _ in range(2):
@@ -436,7 +436,7 @@ async def do_test_tablet_incremental_repair_with_split_and_merge(manager, do_spl
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await preapre_cluster_for_incremental_repair(manager, nr_keys)
# First repair
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental') # sstables_repaired_at 1
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token) # sstables_repaired_at 1
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token) # sstables_repaired_at 2
# Insert more keys
@@ -445,7 +445,7 @@ async def do_test_tablet_incremental_repair_with_split_and_merge(manager, do_spl
current_key += nr_keys
# Second repair
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental') # sstables_repaired_at 3
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token) # sstables_repaired_at 3
# Insert more keys and flush to get 2 more sstables
for _ in range(2):
@@ -505,7 +505,7 @@ async def test_tablet_incremental_repair_existing_and_repair_produced_sstable(ma
await manager.server_start(servers[1].server_id)
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental')
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token)
scylla_path = get_scylla_path(cql)
@@ -521,8 +521,8 @@ async def test_tablet_incremental_repair_merge_higher_repaired_at_number(manager
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await preapre_cluster_for_incremental_repair(manager, nr_keys)
# First repair
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental') # sstables_repaired_at 1
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental') # sstables_repaired_at 2
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token) # sstables_repaired_at 1
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token) # sstables_repaired_at 2
# Insert more keys
await insert_keys(cql, ks, current_key, current_key + nr_keys)
@@ -532,7 +532,7 @@ async def test_tablet_incremental_repair_merge_higher_repaired_at_number(manager
# Second repair
await inject_error_on(manager, "repair_tablet_no_update_sstables_repair_at", servers)
# some sstable repaired_at = 3, but sstables_repaired_at = 2
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental') # sstables_repaired_at 2
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token) # sstables_repaired_at 2
await inject_error_off(manager, "repair_tablet_no_update_sstables_repair_at", servers)
scylla_path = get_scylla_path(cql)
@@ -561,8 +561,8 @@ async def test_tablet_incremental_repair_merge_correct_repaired_at_number_after_
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await preapre_cluster_for_incremental_repair(manager, nr_keys)
# First repair
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental') # sstables_repaired_at 1
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental') # sstables_repaired_at 2
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token) # sstables_repaired_at 1
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token) # sstables_repaired_at 2
# Insert more keys
await insert_keys(cql, ks, current_key, current_key + nr_keys)
@@ -574,7 +574,7 @@ async def test_tablet_incremental_repair_merge_correct_repaired_at_number_after_
last_tokens = [t.last_token for t in replicas]
for t in last_tokens[0::2]:
logging.info(f"Start repair for token={t}");
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", t, incremental_mode='incremental') # sstables_repaired_at 3
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", t) # sstables_repaired_at 3
scylla_path = get_scylla_path(cql)
@@ -595,7 +595,7 @@ async def do_test_tablet_incremental_repair_merge_error(manager, error):
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await preapre_cluster_for_incremental_repair(manager, nr_keys, cmdline)
# First repair
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental') # sstables_repaired_at 1
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token) # sstables_repaired_at 1
# Insert more keys
await insert_keys(cql, ks, current_key, current_key + nr_keys)
@@ -659,18 +659,13 @@ async def test_tablet_repair_with_incremental_option(manager: ManagerClient):
assert read1 == 0
assert skip2 == 0
assert read2 > 0
await do_repair_and_check('incremental', 1, rf'Starting tablet repair by API .* incremental_mode=incremental.*', check1)
await do_repair_and_check(None, 1, rf'Starting tablet repair by API .* incremental_mode=incremental.*', check1)
def check2(skip1, read1, skip2, read2):
assert skip1 == skip2
assert read1 == read2
await do_repair_and_check('disabled', 0, rf'Starting tablet repair by API .* incremental_mode=disabled.*', check2)
# FIXME: Incremental repair is disabled by default due to
# https://github.com/scylladb/scylladb/issues/26041 and
# https://github.com/scylladb/scylladb/issues/27414
await do_repair_and_check(None, 0, rf'Starting tablet repair by API .* incremental_mode=disabled.*', check2)
def check3(skip1, read1, skip2, read2):
assert skip1 < skip2
assert read1 == read2
@@ -682,14 +677,14 @@ async def test_tablet_repair_with_incremental_option(manager: ManagerClient):
await do_repair_and_check('full', 1, rf'Starting tablet repair by API .* incremental_mode=full.*', check4)
@pytest.mark.asyncio
async def test_incremental_repair_tablet_time_metrics(manager: ManagerClient):
async def test_tablet_repair_tablet_time_metrics(manager: ManagerClient):
servers, _, _, ks, _, _, _, _, _, token = await preapre_cluster_for_incremental_repair(manager)
time1 = 0
time2 = 0
for s in servers:
time1 += get_repair_tablet_time_ms(s)
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental')
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token)
for s in servers:
time2 += get_repair_tablet_time_ms(s)
@@ -699,7 +694,7 @@ async def test_incremental_repair_tablet_time_metrics(manager: ManagerClient):
# Reproducer for https://github.com/scylladb/scylladb/issues/26346
@pytest.mark.asyncio
@skip_mode('release', 'error injections are not supported in release mode')
async def test_incremental_repair_finishes_when_tablet_skips_end_repair_stage(manager):
async def test_repair_finishes_when_tablet_skips_end_repair_stage(manager):
servers = await manager.servers_add(3, auto_rack_dc="dc1")
async with new_test_keyspace(manager, "WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 3} AND tablets = {'initial': 1}") as ks:
@@ -724,7 +719,7 @@ async def test_incremental_repair_finishes_when_tablet_skips_end_repair_stage(ma
@pytest.mark.asyncio
@skip_mode('release', 'error injections are not supported in release mode')
async def test_incremental_repair_rejoin_do_tablet_operation(manager):
async def test_repair_rejoin_do_tablet_operation(manager):
cmdline = ['--logger-log-level', 'raft_topology=debug']
servers = await manager.servers_add(3, auto_rack_dc="dc1", cmdline=cmdline)

View File

@@ -10,7 +10,6 @@ import logging
from test.pylib.rest_client import inject_error_one_shot
from test.cluster.util import new_test_keyspace
from test.pylib.util import gather_safely
logger = logging.getLogger(__name__)
@@ -34,12 +33,25 @@ async def test_broken_bootstrap(manager: ManagerClient):
except Exception:
pass
await gather_safely(*(manager.server_stop(srv.server_id) for srv in [server_a, server_b]))
await manager.server_stop(server_b.server_id)
await manager.server_stop(server_a.server_id)
stop_event = asyncio.Event()
async def worker():
logger.info("Worker started")
while not stop_event.is_set():
for i in range(100):
await manager.cql.run_async(f"INSERT INTO {table} (a, b) VALUES ({i}, {i})")
response = await manager.cql.run_async(f"SELECT * FROM {table} WHERE a = {i}")
assert response[0].b == i
await asyncio.sleep(0.1)
logger.info("Worker stopped")
await manager.server_start(server_a.server_id)
await manager.driver_connect()
for i in range(100):
await manager.cql.run_async(f"INSERT INTO {table} (a, b) VALUES ({i}, {i})")
response = await manager.cql.run_async(f"SELECT * FROM {table} WHERE a = {i}")
assert response[0].b == i
worker_task = asyncio.create_task(worker())
await asyncio.sleep(20)
stop_event.set()
await worker_task

View File

@@ -4,8 +4,7 @@
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
#
from typing import Any
from cassandra.query import ConsistencyLevel, SimpleStatement
from cassandra.policies import FallthroughRetryPolicy
from cassandra.query import ConsistencyLevel
from test.pylib.internal_types import HostID, ServerInfo, ServerNum
from test.pylib.manager_client import ManagerClient
@@ -1597,7 +1596,7 @@ async def test_truncate_during_topology_change(manager: ManagerClient):
async def truncate_table():
await asyncio.sleep(10)
logger.info("Executing truncate during bootstrap")
await cql.run_async(SimpleStatement(f"TRUNCATE {ks}.test USING TIMEOUT 4m", retry_policy=FallthroughRetryPolicy()))
await cql.run_async(f"TRUNCATE {ks}.test USING TIMEOUT 1m")
truncate_task = asyncio.create_task(truncate_table())
logger.info("Adding fourth node")

View File

@@ -67,11 +67,11 @@ nodetool_cmd.conf = False
# Run the external "nodetool" executable (can be overridden by the NODETOOL
# environment variable). Only call this if the REST API doesn't work.
def run_nodetool(cql, *args, **subprocess_kwargs):
def run_nodetool(cql, *args):
# TODO: We may need to change this function or its callers to add proper
# support for testing on multi-node clusters.
host = cql.cluster.contact_points[0]
return subprocess.run([nodetool_cmd(), '-h', host, *args], **subprocess_kwargs)
subprocess.run([nodetool_cmd(), '-h', host, *args])
def flush(cql, table):
ks, cf = table.split('.')
@@ -157,28 +157,6 @@ def disablebinary(cql):
else:
run_nodetool(cql, "disablebinary")
def getlogginglevel(cql, logger):
if has_rest_api(cql):
resp = requests.get(f'{rest_api_url(cql)}/system/logger/{logger}')
if resp.ok:
return resp.text.strip()
raise RuntimeError(f"failed to fetch logging level for {logger}: {resp.status_code} {resp.text}")
result = run_nodetool(
cql,
"getlogginglevels",
capture_output=True,
text=True,
check=True,
)
for line in result.stdout.splitlines():
stripped = line.strip()
parts = stripped.split()
if len(parts) >= 2 and parts[0] == logger:
return parts[-1]
raise RuntimeError(f"logger {logger} not found in getlogginglevels output")
def setlogginglevel(cql, logger, level):
if has_rest_api(cql):
requests.post(f'{rest_api_url(cql)}/system/logger/{logger}', params={'level': level})

View File

@@ -182,6 +182,7 @@ def cleanup_all():
sys.stdout.flush()
shutil.copyfileobj(f, sys.stdout.buffer)
f.close()
scylla_set = set()
print(summary)
# We run the cleanup_all() function on exit for any reason - successful finish

View File

@@ -10,7 +10,6 @@ import re
import requests
import socket
import struct
from test.cqlpy import nodetool
from test.cqlpy.util import cql_session
def get_protocol_error_metrics(host) -> int:
@@ -59,50 +58,11 @@ def try_connect(host, port, creds, protocol_version):
with cql_with_protocol(host, port, creds, protocol_version) as session:
return 1 if session else 0
@pytest.fixture
def debug_exceptions_logging(request, cql):
def _read_level() -> str | None:
try:
level = nodetool.getlogginglevel(cql, "exception")
if level:
level = level.strip().strip('"').lower()
return level
except Exception as exc:
print(f"Failed to read exception logger level: {exc}")
return None
def _set_and_verify(level: str) -> bool:
try:
nodetool.setlogginglevel(cql, "exception", level)
except Exception as exc:
print(f"Failed to set exception logger level to '{level}': {exc}")
return False
observed = _read_level()
if observed == level:
return True
print(f"Exception logger level observed as '{observed}' while expecting '{level}'")
return False
def _restore_logging():
if not enabled and previous_level is None:
return
target_level = previous_level or "info"
_set_and_verify(target_level)
previous_level = _read_level()
enabled = _set_and_verify("debug")
yield
_restore_logging()
# If there is a protocol version mismatch, the server should
# raise a protocol error, which is counted in the metrics.
def test_protocol_version_mismatch(scylla_only, debug_exceptions_logging, request, host):
run_count = 200
cpp_exception_threshold = 20
def test_protocol_version_mismatch(scylla_only, request, host):
run_count = 100
cpp_exception_threshold = 10
cpp_exception_metrics_before = get_cpp_exceptions_metrics(host)
protocol_exception_metrics_before = get_protocol_error_metrics(host)
@@ -284,8 +244,8 @@ def _protocol_error_impl(
s.close()
def _test_impl(host, flag):
run_count = 200
cpp_exception_threshold = 20
run_count = 100
cpp_exception_threshold = 10
cpp_exception_metrics_before = get_cpp_exceptions_metrics(host)
protocol_exception_metrics_before = get_protocol_error_metrics(host)
@@ -307,47 +267,47 @@ def no_ssl(request):
yield
# Malformed BATCH with an invalid kind triggers a protocol error.
def test_invalid_kind_in_batch_message(scylla_only, no_ssl, debug_exceptions_logging, host):
def test_invalid_kind_in_batch_message(scylla_only, no_ssl, host):
_test_impl(host, "trigger_bad_batch")
# Send OPTIONS during AUTHENTICATE to trigger auth-state error.
def test_unexpected_message_during_auth(scylla_only, no_ssl, debug_exceptions_logging, host):
def test_unexpected_message_during_auth(scylla_only, no_ssl, host):
_test_impl(host, "trigger_unexpected_auth")
# STARTUP with an invalid/missing string-map entry should produce a protocol error.
def test_process_startup_invalid_string_map(scylla_only, no_ssl, debug_exceptions_logging, host):
def test_process_startup_invalid_string_map(scylla_only, no_ssl, host):
_test_impl(host, "trigger_process_startup_invalid_string_map")
# STARTUP with unknown COMPRESSION option should produce a protocol error.
def test_unknown_compression_algorithm(scylla_only, no_ssl, debug_exceptions_logging, host):
def test_unknown_compression_algorithm(scylla_only, no_ssl, host):
_test_impl(host, "trigger_unknown_compression")
# QUERY long-string truncation: declared length > provided bytes triggers protocol error.
def test_process_query_internal_malformed_query(scylla_only, no_ssl, debug_exceptions_logging, host):
def test_process_query_internal_malformed_query(scylla_only, no_ssl, host):
_test_impl(host, "trigger_process_query_internal_malformed_query")
# QUERY options malformed: PAGE_SIZE flag set but page_size truncated triggers protocol error.
def test_process_query_internal_fail_read_options(scylla_only, no_ssl, debug_exceptions_logging, host):
def test_process_query_internal_fail_read_options(scylla_only, no_ssl, host):
_test_impl(host, "trigger_process_query_internal_fail_read_options")
# PREPARE long-string truncation: declared length > provided bytes triggers protocol error.
def test_process_prepare_malformed_query(scylla_only, no_ssl, debug_exceptions_logging, host):
def test_process_prepare_malformed_query(scylla_only, no_ssl, host):
_test_impl(host, "trigger_process_prepare_malformed_query")
# EXECUTE cache-key malformed: short-bytes length > provided bytes triggers protocol error.
def test_process_execute_internal_malformed_cache_key(scylla_only, no_ssl, debug_exceptions_logging, host):
def test_process_execute_internal_malformed_cache_key(scylla_only, no_ssl, host):
_test_impl(host, "trigger_process_execute_internal_malformed_cache_key")
# REGISTER malformed string list: declared string length > provided bytes triggers protocol error.
def test_process_register_malformed_string_list(scylla_only, no_ssl, debug_exceptions_logging, host):
def test_process_register_malformed_string_list(scylla_only, no_ssl, host):
_test_impl(host, "trigger_process_register_malformed_string_list")
# Test if the protocol exceptions do not decrease after running the test happy path.
# This is to ensure that the protocol exceptions are not cleared or reset
# during the test execution.
def test_no_protocol_exceptions(scylla_only, no_ssl, debug_exceptions_logging, host):
run_count = 200
cpp_exception_threshold = 20
def test_no_protocol_exceptions(scylla_only, no_ssl, host):
run_count = 100
cpp_exception_threshold = 10
cpp_exception_metrics_before = get_cpp_exceptions_metrics(host)
protocol_exception_metrics_before = get_protocol_error_metrics(host)

View File

@@ -1128,8 +1128,11 @@ private:
auth_config.authenticator_java_name = qualified_authenticator_name;
auth_config.role_manager_java_name = qualified_role_manager_name;
_auth_service.start(perm_cache_config, std::ref(_qp), std::ref(group0_client), std::ref(_mnotifier), std::ref(_mm), auth_config, maintenance_socket_enabled::no, std::ref(_auth_cache)).get();
const uint64_t niceness = 19;
auto hashing_worker = utils::alien_worker(startlog, niceness, "pwd-hash");
_auth_service.start(perm_cache_config, std::ref(_qp), std::ref(group0_client), std::ref(_mnotifier), std::ref(_mm), auth_config, maintenance_socket_enabled::no, std::ref(_auth_cache), std::ref(hashing_worker)).get();
_auth_service.invoke_on_all([this] (auth::service& auth) {
return auth.start(_mm.local(), _sys_ks.local());
}).get();

View File

@@ -163,11 +163,6 @@ public:
_sst->_shards.push_back(this_shard_id());
}
void set_first_and_last_keys(const dht::decorated_key& first_key, const dht::decorated_key& last_key) {
_sst->_first = first_key;
_sst->_last = last_key;
}
void rewrite_toc_without_component(component_type component) {
SCYLLA_ASSERT(component != component_type::TOC);
_sst->_recognized_components.erase(component);

View File

@@ -109,7 +109,6 @@ class ResourceGather(ABC):
except subprocess.TimeoutExpired:
logger.critical(f"Process {args} timed out")
p.kill()
p.communicate()
except KeyboardInterrupt:
p.kill()
raise

View File

@@ -789,7 +789,7 @@ class ScyllaServer:
while time.time() < self.start_time + self.TOPOLOGY_TIMEOUT and not self.stop_event.is_set():
assert self.cmd is not None
if self.cmd.returncode is not None:
if self.cmd.returncode:
self.cmd = None
if expected_error is not None:
with self.log_filename.open("r", encoding="utf-8") as log_file:

View File

@@ -654,7 +654,7 @@ void cluster_repair_operation(scylla_rest_client& client, const bpo::variables_m
for (const auto& table : tables.empty() ? ks_to_cfs[keyspace] : tables) {
repair_params["table"] = table;
try {
sstring task_id = rjson::to_sstring(client.post("/storage_service/tablets/repair", repair_params).GetObject()["tablet_task_id"]);
sstring task_id = client.post("/storage_service/tablets/repair", repair_params).GetObject()["tablet_task_id"].GetString();
log("Starting repair with task_id={} keyspace={} table={}", task_id, keyspace, table);
@@ -3174,7 +3174,7 @@ void tasks_print_status(const rjson::value& res) {
auto status = res.GetObject();
for (const auto& x: status) {
if (x.value.IsString()) {
if (strcmp(x.name.GetString(), "start_time") == 0 || strcmp(x.name.GetString(), "end_time") == 0) {
if (strcmp(x.name.GetString(), "creation_time") == 0 || strcmp(x.name.GetString(), "start_time") == 0 || strcmp(x.name.GetString(), "end_time") == 0) {
fmt::print("{}: {}\n", x.name.GetString(), get_time(x.value.GetString()));
} else {
fmt::print("{}: {}\n", x.name.GetString(), x.value.GetString());
@@ -3226,6 +3226,7 @@ void tasks_add_tree_to_statuses_lists(Tabulate& table, const rjson::value& res)
rjson::to_string_view(status["scope"]),
rjson::to_string_view(status["state"]),
status["is_abortable"].GetBool(),
get_time(rjson::to_string_view(status["creation_time"])),
get_time(rjson::to_string_view(status["start_time"])),
get_time(rjson::to_string_view(status["end_time"])),
rjson::to_string_view(status["error"]),
@@ -3245,7 +3246,7 @@ void tasks_add_tree_to_statuses_lists(Tabulate& table, const rjson::value& res)
void tasks_print_trees(const std::vector<rjson::value>& res) {
Tabulate table;
table.add("id", "type", "kind", "scope", "state",
"is_abortable", "start_time", "end_time", "error", "parent_id",
"is_abortable", "creation_time", "start_time", "end_time", "error", "parent_id",
"sequence_number", "shard", "keyspace", "table", "entity",
"progress_units", "total", "completed", "children_ids");
@@ -3259,7 +3260,7 @@ void tasks_print_trees(const std::vector<rjson::value>& res) {
void tasks_print_stats_list(const rjson::value& res) {
auto stats = res.GetArray();
Tabulate table;
table.add("task_id", "type", "kind", "scope", "state", "sequence_number", "keyspace", "table", "entity", "shard", "start_time", "end_time");
table.add("task_id", "type", "kind", "scope", "state", "sequence_number", "keyspace", "table", "entity", "shard", "creation_time", "start_time", "end_time");
for (auto& element : stats) {
const auto& s = element.GetObject();
@@ -3273,6 +3274,7 @@ void tasks_print_stats_list(const rjson::value& res) {
rjson::to_string_view(s["table"]),
rjson::to_string_view(s["entity"]),
s["shard"].GetUint(),
get_time(rjson::to_string_view(s["creation_time"])),
get_time(rjson::to_string_view(s["start_time"])),
get_time(rjson::to_string_view(s["end_time"])));
}

View File

@@ -1,4 +1,4 @@
FROM registry.fedoraproject.org/fedora:43
FROM docker.io/fedora:42
ARG CLANG_BUILD="SKIP"
ARG CLANG_ARCHIVES

View File

@@ -1 +1 @@
docker.io/scylladb/scylla-toolchain:fedora-43-20251208
docker.io/scylladb/scylla-toolchain:fedora-42-20251122

View File

@@ -65,7 +65,7 @@ SCYLLA_BUILD_DIR_FULLPATH="${SCYLLA_DIR}"/"${SCYLLA_BUILD_DIR}"
SCYLLA_NINJA_FILE_FULLPATH="${SCYLLA_DIR}"/"${SCYLLA_NINJA_FILE}"
# Which LLVM release to build in order to compile Scylla
LLVM_CLANG_TAG=21.1.6
LLVM_CLANG_TAG=20.1.8
CLANG_ARCHIVE=$(cd "${SCYLLA_DIR}" && realpath -m "${CLANG_ARCHIVE}")
@@ -186,3 +186,7 @@ if [[ $? -ne 0 ]]; then
fi
set -e
tar -C / -xpzf "${CLANG_ARCHIVE}"
dnf remove -y clang clang-libs
# above package removal might have removed those symbolic links, which will cause ccache not to work later on. Manually restore them.
ln -sf /usr/bin/ccache /usr/lib64/ccache/clang
ln -sf /usr/bin/ccache /usr/lib64/ccache/clang++

View File

@@ -29,8 +29,11 @@ class counted_data_source_impl : public data_source_impl {
if (_cpu_concurrency.stopped) {
return fun();
}
_cpu_concurrency.units.return_all();
return fun().finally([this] () {
return futurize_invoke([this] () {
_cpu_concurrency.units.return_all();
}).then([fun = std::move(fun)] () {
return fun();
}).finally([this] () {
_cpu_concurrency.units.adopt(consume_units(_cpu_concurrency.semaphore, 1));
});
};
@@ -57,8 +60,11 @@ class counted_data_sink_impl : public data_sink_impl {
if (_cpu_concurrency.stopped) {
return fun();
}
_cpu_concurrency.units.return_all();
return fun().finally([this] () {
return futurize_invoke([this] () {
_cpu_concurrency.units.return_all();
}).then([fun = std::move(fun)] () mutable {
return fun();
}).finally([this] () {
_cpu_concurrency.units.adopt(consume_units(_cpu_concurrency.semaphore, 1));
});
};

View File

@@ -832,12 +832,6 @@ to_bytes(bytes_view x) {
return bytes(x.begin(), x.size());
}
inline
bytes
to_bytes(std::string_view x) {
return to_bytes(to_bytes_view(x));
}
inline
bytes_opt
to_bytes_opt(bytes_view_opt bv) {

View File

@@ -15,7 +15,6 @@ target_sources(utils
buffer_input_stream.cc
build_id.cc
config_file.cc
crypt_sha512.cc
directories.cc
disk-error-handler.cc
disk_space_monitor.cc

View File

@@ -1,381 +0,0 @@
/*
* This file originates from musl libc (git.musl-libc.org).
* Modifications have been made and are licensed under the following terms:
* Copyright (C) 2025-present ScyllaDB
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*
* public domain sha512 crypt implementation
*
* original sha crypt design: http://people.redhat.com/drepper/SHA-crypt.txt
* in this implementation at least 32bit int is assumed,
* key length is limited, the $6$ prefix is mandatory, '\n' and ':' is rejected
* in the salt and rounds= setting must contain a valid iteration count,
* on error "*" is returned.
*/
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include "crypt_sha512.hh"
#include <seastar/core/coroutine.hh>
#include <seastar/coroutine/maybe_yield.hh>
/* public domain sha512 implementation based on fips180-3 */
/* >=2^64 bits messages are not supported (about 2000 peta bytes) */
struct sha512 {
uint64_t len; /* processed message length */
uint64_t h[8]; /* hash state */
uint8_t buf[128]; /* message block buffer */
};
static uint64_t ror(uint64_t n, int k) { return (n >> k) | (n << (64-k)); }
#define Ch(x,y,z) (z ^ (x & (y ^ z)))
#define Maj(x,y,z) ((x & y) | (z & (x | y)))
#define S0(x) (ror(x,28) ^ ror(x,34) ^ ror(x,39))
#define S1(x) (ror(x,14) ^ ror(x,18) ^ ror(x,41))
#define R0(x) (ror(x,1) ^ ror(x,8) ^ (x>>7))
#define R1(x) (ror(x,19) ^ ror(x,61) ^ (x>>6))
static const uint64_t K[80] = {
0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, 0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
};
static void processblock(struct sha512 *s, const uint8_t *buf)
{
uint64_t W[80], t1, t2, a, b, c, d, e, f, g, h;
int i;
for (i = 0; i < 16; i++) {
W[i] = (uint64_t)buf[8*i]<<56;
W[i] |= (uint64_t)buf[8*i+1]<<48;
W[i] |= (uint64_t)buf[8*i+2]<<40;
W[i] |= (uint64_t)buf[8*i+3]<<32;
W[i] |= (uint64_t)buf[8*i+4]<<24;
W[i] |= (uint64_t)buf[8*i+5]<<16;
W[i] |= (uint64_t)buf[8*i+6]<<8;
W[i] |= buf[8*i+7];
}
for (; i < 80; i++)
W[i] = R1(W[i-2]) + W[i-7] + R0(W[i-15]) + W[i-16];
a = s->h[0];
b = s->h[1];
c = s->h[2];
d = s->h[3];
e = s->h[4];
f = s->h[5];
g = s->h[6];
h = s->h[7];
for (i = 0; i < 80; i++) {
t1 = h + S1(e) + Ch(e,f,g) + K[i] + W[i];
t2 = S0(a) + Maj(a,b,c);
h = g;
g = f;
f = e;
e = d + t1;
d = c;
c = b;
b = a;
a = t1 + t2;
}
s->h[0] += a;
s->h[1] += b;
s->h[2] += c;
s->h[3] += d;
s->h[4] += e;
s->h[5] += f;
s->h[6] += g;
s->h[7] += h;
}
static void pad(struct sha512 *s)
{
unsigned r = s->len % 128;
s->buf[r++] = 0x80;
if (r > 112) {
memset(s->buf + r, 0, 128 - r);
r = 0;
processblock(s, s->buf);
}
memset(s->buf + r, 0, 120 - r);
s->len *= 8;
s->buf[120] = s->len >> 56;
s->buf[121] = s->len >> 48;
s->buf[122] = s->len >> 40;
s->buf[123] = s->len >> 32;
s->buf[124] = s->len >> 24;
s->buf[125] = s->len >> 16;
s->buf[126] = s->len >> 8;
s->buf[127] = s->len;
processblock(s, s->buf);
}
static void sha512_init(struct sha512 *s)
{
s->len = 0;
s->h[0] = 0x6a09e667f3bcc908ULL;
s->h[1] = 0xbb67ae8584caa73bULL;
s->h[2] = 0x3c6ef372fe94f82bULL;
s->h[3] = 0xa54ff53a5f1d36f1ULL;
s->h[4] = 0x510e527fade682d1ULL;
s->h[5] = 0x9b05688c2b3e6c1fULL;
s->h[6] = 0x1f83d9abfb41bd6bULL;
s->h[7] = 0x5be0cd19137e2179ULL;
}
static void sha512_sum(struct sha512 *s, uint8_t *md)
{
int i;
pad(s);
for (i = 0; i < 8; i++) {
md[8*i] = s->h[i] >> 56;
md[8*i+1] = s->h[i] >> 48;
md[8*i+2] = s->h[i] >> 40;
md[8*i+3] = s->h[i] >> 32;
md[8*i+4] = s->h[i] >> 24;
md[8*i+5] = s->h[i] >> 16;
md[8*i+6] = s->h[i] >> 8;
md[8*i+7] = s->h[i];
}
}
static void sha512_update(struct sha512 *s, const void *m, unsigned long len)
{
const uint8_t *p = (const uint8_t *)m;
unsigned r = s->len % 128;
s->len += len;
if (r) {
if (len < 128 - r) {
memcpy(s->buf + r, p, len);
return;
}
memcpy(s->buf + r, p, 128 - r);
len -= 128 - r;
p += 128 - r;
processblock(s, s->buf);
}
for (; len >= 128; len -= 128, p += 128)
processblock(s, p);
memcpy(s->buf, p, len);
}
static const unsigned char b64[] =
"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
static char *to64(char *s, unsigned int u, int n)
{
while (--n >= 0) {
*s++ = b64[u % 64];
u /= 64;
}
return s;
}
/* key limit is not part of the original design, added for DoS protection.
* rounds limit has been lowered (versus the reference/spec), also for DoS
* protection. runtime is O(klen^2 + klen*rounds) */
#define KEY_MAX 256
#define SALT_MAX 16
#define ROUNDS_DEFAULT 5000
#define ROUNDS_MIN 1000
#define ROUNDS_MAX 9999999
/* hash n bytes of the repeated md message digest */
static void hashmd(struct sha512 *s, unsigned int n, const void *md)
{
unsigned int i;
for (i = n; i > 64; i -= 64)
sha512_update(s, md, 64);
sha512_update(s, md, i);
}
static seastar::future<char *> sha512crypt(const char *key, const char *setting, char *output)
{
struct sha512 ctx;
unsigned char md[64], kmd[64], smd[64];
unsigned int i, r, klen, slen;
char rounds[20] = "";
const char *salt;
char *p;
/* reject large keys */
for (i = 0; i <= KEY_MAX && key[i]; i++);
if (i > KEY_MAX)
co_return nullptr;
klen = i;
/* setting: $6$rounds=n$salt$ (rounds=n$ and closing $ are optional) */
if (strncmp(setting, "$6$", 3) != 0)
co_return nullptr;
salt = setting + 3;
r = ROUNDS_DEFAULT;
if (strncmp(salt, "rounds=", sizeof "rounds=" - 1) == 0) {
unsigned long u;
char *end;
/*
* this is a deviation from the reference:
* bad rounds setting is rejected if it is
* - empty
* - unterminated (missing '$')
* - begins with anything but a decimal digit
* the reference implementation treats these bad
* rounds as part of the salt or parse them with
* strtoul semantics which may cause problems
* including non-portable hashes that depend on
* the host's value of ULONG_MAX.
*/
salt += sizeof "rounds=" - 1;
if (!isdigit(*salt))
co_return nullptr;
u = strtoul(salt, &end, 10);
if (*end != '$')
co_return nullptr;
salt = end+1;
if (u < ROUNDS_MIN)
r = ROUNDS_MIN;
else if (u > ROUNDS_MAX)
co_return nullptr;
else
r = u;
/* needed when rounds is zero prefixed or out of bounds */
sprintf(rounds, "rounds=%u$", r);
}
for (i = 0; i < SALT_MAX && salt[i] && salt[i] != '$'; i++)
/* reject characters that interfere with /etc/shadow parsing */
if (salt[i] == '\n' || salt[i] == ':')
co_return nullptr;
slen = i;
/* B = sha(key salt key) */
sha512_init(&ctx);
sha512_update(&ctx, key, klen);
sha512_update(&ctx, salt, slen);
sha512_update(&ctx, key, klen);
sha512_sum(&ctx, md);
/* A = sha(key salt repeat-B alternate-B-key) */
sha512_init(&ctx);
sha512_update(&ctx, key, klen);
sha512_update(&ctx, salt, slen);
hashmd(&ctx, klen, md);
for (i = klen; i > 0; i >>= 1)
if (i & 1)
sha512_update(&ctx, md, sizeof md);
else
sha512_update(&ctx, key, klen);
sha512_sum(&ctx, md);
/* DP = sha(repeat-key), this step takes O(klen^2) time */
sha512_init(&ctx);
for (i = 0; i < klen; i++)
sha512_update(&ctx, key, klen);
sha512_sum(&ctx, kmd);
/* DS = sha(repeat-salt) */
sha512_init(&ctx);
for (i = 0; i < 16 + md[0]; i++)
sha512_update(&ctx, salt, slen);
sha512_sum(&ctx, smd);
/* iterate A = f(A,DP,DS), this step takes O(rounds*klen) time */
for (i = 0; i < r; i++) {
sha512_init(&ctx);
if (i % 2)
hashmd(&ctx, klen, kmd);
else
sha512_update(&ctx, md, sizeof md);
if (i % 3)
sha512_update(&ctx, smd, slen);
if (i % 7)
hashmd(&ctx, klen, kmd);
if (i % 2)
sha512_update(&ctx, md, sizeof md);
else
hashmd(&ctx, klen, kmd);
sha512_sum(&ctx, md);
co_await seastar::coroutine::maybe_yield();
}
/* output is $6$rounds=n$salt$hash */
p = output;
p += sprintf(p, "$6$%s%.*s$", rounds, slen, salt);
#if 1
static const unsigned char perm[][3] = {
{0,21,42},{22,43,1},{44,2,23},{3,24,45},{25,46,4},
{47,5,26},{6,27,48},{28,49,7},{50,8,29},{9,30,51},
{31,52,10},{53,11,32},{12,33,54},{34,55,13},{56,14,35},
{15,36,57},{37,58,16},{59,17,38},{18,39,60},{40,61,19},
{62,20,41} };
for (i=0; i<21; i++) p = to64(p,
(md[perm[i][0]]<<16)|(md[perm[i][1]]<<8)|md[perm[i][2]], 4);
#else
p = to64(p, (md[0]<<16)|(md[21]<<8)|md[42], 4);
p = to64(p, (md[22]<<16)|(md[43]<<8)|md[1], 4);
p = to64(p, (md[44]<<16)|(md[2]<<8)|md[23], 4);
p = to64(p, (md[3]<<16)|(md[24]<<8)|md[45], 4);
p = to64(p, (md[25]<<16)|(md[46]<<8)|md[4], 4);
p = to64(p, (md[47]<<16)|(md[5]<<8)|md[26], 4);
p = to64(p, (md[6]<<16)|(md[27]<<8)|md[48], 4);
p = to64(p, (md[28]<<16)|(md[49]<<8)|md[7], 4);
p = to64(p, (md[50]<<16)|(md[8]<<8)|md[29], 4);
p = to64(p, (md[9]<<16)|(md[30]<<8)|md[51], 4);
p = to64(p, (md[31]<<16)|(md[52]<<8)|md[10], 4);
p = to64(p, (md[53]<<16)|(md[11]<<8)|md[32], 4);
p = to64(p, (md[12]<<16)|(md[33]<<8)|md[54], 4);
p = to64(p, (md[34]<<16)|(md[55]<<8)|md[13], 4);
p = to64(p, (md[56]<<16)|(md[14]<<8)|md[35], 4);
p = to64(p, (md[15]<<16)|(md[36]<<8)|md[57], 4);
p = to64(p, (md[37]<<16)|(md[58]<<8)|md[16], 4);
p = to64(p, (md[59]<<16)|(md[17]<<8)|md[38], 4);
p = to64(p, (md[18]<<16)|(md[39]<<8)|md[60], 4);
p = to64(p, (md[40]<<16)|(md[61]<<8)|md[19], 4);
p = to64(p, (md[62]<<16)|(md[20]<<8)|md[41], 4);
#endif
p = to64(p, md[63], 2);
*p = 0;
co_return output;
}
seastar::future<const char *> __crypt_sha512(const char *key, const char *setting, char *output)
{
static const char testkey[] = "Xy01@#\x01\x02\x80\x7f\xff\r\n\x81\t !";
static const char testsetting[] = "$6$rounds=1234$abc0123456789$";
static const char testhash[] = "$6$rounds=1234$abc0123456789$BCpt8zLrc/RcyuXmCDOE1ALqMXB2MH6n1g891HhFj8.w7LxGv.FTkqq6Vxc/km3Y0jE0j24jY5PIv/oOu6reg1";
char testbuf[128];
char *p, *q;
p = co_await sha512crypt(key, setting, output);
/* self test and stack cleanup */
q = co_await sha512crypt(testkey, testsetting, testbuf);
if (!p || q != testbuf || memcmp(testbuf, testhash, sizeof testhash))
co_return "*";
co_return p;
}

View File

@@ -1,13 +0,0 @@
/*
* Copyright (C) 2025-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#pragma once
#include <seastar/core/future.hh>
seastar::future<const char *> __crypt_sha512(const char *key, const char *setting, char *output);

View File

@@ -204,7 +204,7 @@ public:
public:
template <typename Clock, typename Duration>
future<> wait_for_message(std::chrono::time_point<Clock, Duration> timeout, abort_source* as = nullptr, std::source_location loc = std::source_location::current()) {
future<> wait_for_message(std::chrono::time_point<Clock, Duration> timeout, abort_source* as = nullptr) {
if (!_shared_data) {
on_internal_error(errinj_logger, "injection_shared_data is not initialized");
}
@@ -234,8 +234,7 @@ public:
throw;
}
catch (const std::exception& e) {
on_internal_error(errinj_logger, fmt::format("Error injection [{}] wait_for_message timeout: Called from `{}` @ {}:{}:{:d}: {}",
_shared_data->injection_name, loc.function_name(), loc.file_name(), loc.line(), loc.column(), e.what()));
on_internal_error(errinj_logger, "Error injection wait_for_message timeout: " + std::string(e.what()));
}
++_read_messages_counter;
}

Some files were not shown because too many files have changed in this diff Show More