Compare commits

..

6 Commits

Author SHA1 Message Date
copilot-swe-agent[bot]
a578bcc653 Remove temporary PR tracking file
Co-authored-by: mykaul <4655593+mykaul@users.noreply.github.com>
2025-12-10 13:20:31 +00:00
copilot-swe-agent[bot]
4573bb4f63 Add PR tracking file
Co-authored-by: mykaul <4655593+mykaul@users.noreply.github.com>
2025-12-10 13:19:45 +00:00
copilot-swe-agent[bot]
04fc2fb0c1 Remove test file
Co-authored-by: mykaul <4655593+mykaul@users.noreply.github.com>
2025-12-10 11:58:14 +00:00
copilot-swe-agent[bot]
0bf6da38ee Test commit
Co-authored-by: mykaul <4655593+mykaul@users.noreply.github.com>
2025-12-10 11:57:45 +00:00
copilot-swe-agent[bot]
c330232426 Update auto-backport PR message to mention removing 'conflicts' label
Co-authored-by: mykaul <4655593+mykaul@users.noreply.github.com>
2025-12-10 11:41:39 +00:00
copilot-swe-agent[bot]
44c1ebdfe2 Initial plan 2025-12-10 11:39:35 +00:00
265 changed files with 2196 additions and 7059 deletions

8
.github/CODEOWNERS vendored
View File

@@ -1,5 +1,5 @@
# AUTH
auth/* @nuivall
auth/* @nuivall @ptrsmrn
# CACHE
row_cache* @tgrabiec
@@ -25,11 +25,11 @@ compaction/* @raphaelsc
transport/*
# CQL QUERY LANGUAGE
cql3/* @tgrabiec @nuivall
cql3/* @tgrabiec @nuivall @ptrsmrn
# COUNTERS
counters* @nuivall
tests/counter_test* @nuivall
counters* @nuivall @ptrsmrn
tests/counter_test* @nuivall @ptrsmrn
# DOCS
docs/* @annastuchlik @tzach

View File

@@ -62,7 +62,7 @@ def create_pull_request(repo, new_branch_name, base_branch_name, pr, backport_pr
if is_draft:
labels_to_add.append("conflicts")
pr_comment = f"@{pr.user.login} - This PR was marked as draft because it has conflicts\n"
pr_comment += "Please resolve them and remove the 'conflicts' label. The PR will be made ready for review automatically."
pr_comment += "Please resolve them and remove the 'conflicts' label so this PR will be ready for review"
backport_pr.create_issue_comment(pr_comment)
# Apply all labels at once if we have any

View File

@@ -18,7 +18,7 @@ jobs:
// Regular expression pattern to check for "Fixes" prefix
// Adjusted to dynamically insert the repository full name
const pattern = `Fixes:? ((?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)|([A-Z]+-\\d+))`;
const pattern = `Fixes:? (?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)`;
const regex = new RegExp(pattern);
if (!regex.test(body)) {

View File

@@ -1,14 +0,0 @@
name: Call Jira release creation for new milestone
on:
milestone:
types: [created]
jobs:
sync-milestone-to-jira:
uses: scylladb/github-automation/.github/workflows/main_sync_milestone_to_jira_release.yml@main
with:
# Comma-separated list of Jira project keys
jira_project_keys: "SCYLLADB,CUSTOMER"
secrets:
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}

View File

@@ -7,7 +7,7 @@ on:
- enterprise
paths:
- '**/*.cc'
- 'scripts/metrics-config.yml'
- 'scripts/metrics-config.yml'
- 'scripts/get_description.py'
- 'docs/_ext/scylladb_metrics.py'
@@ -15,20 +15,20 @@ jobs:
validate-metrics:
runs-on: ubuntu-latest
name: Check metrics documentation coverage
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
submodules: true
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: '3.10'
- name: Install dependencies
run: pip install PyYAML
- name: Validate metrics
run: python3 scripts/get_description.py --validate -c scripts/metrics-config.yml

View File

@@ -3,13 +3,10 @@ name: Trigger Scylla CI Route
on:
issue_comment:
types: [created]
pull_request_target:
types:
- unlabeled
jobs:
trigger-jenkins:
if: (github.event.comment.user.login != 'scylladbbot' && contains(github.event.comment.body, '@scylladbbot') && contains(github.event.comment.body, 'trigger-ci')) || github.event.label.name == 'conflicts'
if: github.event.comment.user.login != 'scylladbbot' && contains(github.event.comment.body, '@scylladbbot') && contains(github.event.comment.body, 'trigger-ci')
runs-on: ubuntu-latest
steps:
- name: Trigger Scylla-CI-Route Jenkins Job

View File

@@ -42,7 +42,7 @@ comparison_operator_type get_comparison_operator(const rjson::value& comparison_
if (!comparison_operator.IsString()) {
throw api_error::validation(fmt::format("Invalid comparison operator definition {}", rjson::print(comparison_operator)));
}
std::string op = rjson::to_string(comparison_operator);
std::string op = comparison_operator.GetString();
auto it = ops.find(op);
if (it == ops.end()) {
throw api_error::validation(fmt::format("Unsupported comparison operator {}", op));
@@ -377,8 +377,8 @@ bool check_compare(const rjson::value* v1, const rjson::value& v2, const Compara
return cmp(unwrap_number(*v1, cmp.diagnostic), unwrap_number(v2, cmp.diagnostic));
}
if (kv1.name == "S") {
return cmp(rjson::to_string_view(kv1.value),
rjson::to_string_view(kv2.value));
return cmp(std::string_view(kv1.value.GetString(), kv1.value.GetStringLength()),
std::string_view(kv2.value.GetString(), kv2.value.GetStringLength()));
}
if (kv1.name == "B") {
auto d_kv1 = unwrap_bytes(kv1.value, v1_from_query);
@@ -470,9 +470,9 @@ static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const r
return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag), bounds_from_query);
}
if (kv_v.name == "S") {
return check_BETWEEN(rjson::to_string_view(kv_v.value),
rjson::to_string_view(kv_lb.value),
rjson::to_string_view(kv_ub.value),
return check_BETWEEN(std::string_view(kv_v.value.GetString(), kv_v.value.GetStringLength()),
std::string_view(kv_lb.value.GetString(), kv_lb.value.GetStringLength()),
std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()),
bounds_from_query);
}
if (kv_v.name == "B") {

View File

@@ -8,8 +8,6 @@
#include "consumed_capacity.hh"
#include "error.hh"
#include "utils/rjson.hh"
#include <fmt/format.h>
namespace alternator {
@@ -34,12 +32,12 @@ bool consumed_capacity_counter::should_add_capacity(const rjson::value& request)
if (!return_consumed->IsString()) {
throw api_error::validation("Non-string ReturnConsumedCapacity field in request");
}
std::string_view consumed = rjson::to_string_view(*return_consumed);
std::string consumed = return_consumed->GetString();
if (consumed == "INDEXES") {
throw api_error::validation("INDEXES consumed capacity is not supported");
}
if (consumed != "TOTAL") {
throw api_error::validation(fmt::format("Unknown consumed capacity {}", consumed));
throw api_error::validation("Unknown consumed capacity "+ consumed);
}
return true;
}

View File

@@ -169,7 +169,7 @@ future<> controller::request_stop_server() {
});
}
future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> controller::get_client_data() {
future<utils::chunked_vector<client_data>> controller::get_client_data() {
return _server.local().get_client_data();
}

View File

@@ -93,7 +93,7 @@ public:
// This virtual function is called (on each shard separately) when the
// virtual table "system.clients" is read. It is expected to generate a
// list of clients connected to this server (on this shard).
virtual future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> get_client_data() override;
virtual future<utils::chunked_vector<client_data>> get_client_data() override;
};
}

View File

@@ -419,7 +419,7 @@ static std::optional<std::string> find_table_name(const rjson::value& request) {
if (!table_name_value->IsString()) {
throw api_error::validation("Non-string TableName field in request");
}
std::string table_name = rjson::to_string(*table_name_value);
std::string table_name = table_name_value->GetString();
return table_name;
}
@@ -546,7 +546,7 @@ get_table_or_view(service::storage_proxy& proxy, const rjson::value& request) {
// does exist but the index does not (ValidationException).
if (proxy.data_dictionary().has_schema(keyspace_name, orig_table_name)) {
throw api_error::validation(
fmt::format("Requested resource not found: Index '{}' for table '{}'", rjson::to_string_view(*index_name), orig_table_name));
fmt::format("Requested resource not found: Index '{}' for table '{}'", index_name->GetString(), orig_table_name));
} else {
throw api_error::resource_not_found(
fmt::format("Requested resource not found: Table: {} not found", orig_table_name));
@@ -587,7 +587,7 @@ static std::string get_string_attribute(const rjson::value& value, std::string_v
throw api_error::validation(fmt::format("Expected string value for attribute {}, got: {}",
attribute_name, value));
}
return rjson::to_string(*attribute_value);
return std::string(attribute_value->GetString(), attribute_value->GetStringLength());
}
// Convenience function for getting the value of a boolean attribute, or a
@@ -1080,8 +1080,8 @@ static void add_column(schema_builder& builder, const std::string& name, const r
}
for (auto it = attribute_definitions.Begin(); it != attribute_definitions.End(); ++it) {
const rjson::value& attribute_info = *it;
if (rjson::to_string_view(attribute_info["AttributeName"]) == name) {
std::string_view type = rjson::to_string_view(attribute_info["AttributeType"]);
if (attribute_info["AttributeName"].GetString() == name) {
auto type = attribute_info["AttributeType"].GetString();
data_type dt = parse_key_type(type);
if (computed_column) {
// Computed column for GSI (doesn't choose a real column as-is
@@ -1116,7 +1116,7 @@ static std::pair<std::string, std::string> parse_key_schema(const rjson::value&
throw api_error::validation("First element of KeySchema must be an object");
}
const rjson::value *v = rjson::find((*key_schema)[0], "KeyType");
if (!v || !v->IsString() || rjson::to_string_view(*v) != "HASH") {
if (!v || !v->IsString() || v->GetString() != std::string("HASH")) {
throw api_error::validation("First key in KeySchema must be a HASH key");
}
v = rjson::find((*key_schema)[0], "AttributeName");
@@ -1124,14 +1124,14 @@ static std::pair<std::string, std::string> parse_key_schema(const rjson::value&
throw api_error::validation("First key in KeySchema must have string AttributeName");
}
validate_attr_name_length(supplementary_context, v->GetStringLength(), true, "HASH key in KeySchema - ");
std::string hash_key = rjson::to_string(*v);
std::string hash_key = v->GetString();
std::string range_key;
if (key_schema->Size() == 2) {
if (!(*key_schema)[1].IsObject()) {
throw api_error::validation("Second element of KeySchema must be an object");
}
v = rjson::find((*key_schema)[1], "KeyType");
if (!v || !v->IsString() || rjson::to_string_view(*v) != "RANGE") {
if (!v || !v->IsString() || v->GetString() != std::string("RANGE")) {
throw api_error::validation("Second key in KeySchema must be a RANGE key");
}
v = rjson::find((*key_schema)[1], "AttributeName");
@@ -1799,11 +1799,6 @@ static future<executor::request_return_type> create_table_on_shard0(service::cli
}
}
}
// Creating an index in tablets mode requires the rf_rack_valid_keyspaces option to be enabled.
// GSI and LSI indexes are based on materialized views which require this option to avoid consistency issues.
if (!view_builders.empty() && ksm->uses_tablets() && !sp.data_dictionary().get_config().rf_rack_valid_keyspaces()) {
co_return api_error::validation("GlobalSecondaryIndexes and LocalSecondaryIndexes with tablets require the rf_rack_valid_keyspaces option to be enabled.");
}
try {
schema_mutations = service::prepare_new_keyspace_announcement(sp.local_db(), ksm, ts);
} catch (exceptions::already_exists_exception&) {
@@ -1892,8 +1887,8 @@ future<executor::request_return_type> executor::create_table(client_state& clien
std::string def_type = type_to_string(def.type);
for (auto it = attribute_definitions.Begin(); it != attribute_definitions.End(); ++it) {
const rjson::value& attribute_info = *it;
if (rjson::to_string_view(attribute_info["AttributeName"]) == def.name_as_text()) {
std::string_view type = rjson::to_string_view(attribute_info["AttributeType"]);
if (attribute_info["AttributeName"].GetString() == def.name_as_text()) {
auto type = attribute_info["AttributeType"].GetString();
if (type != def_type) {
throw api_error::validation(fmt::format("AttributeDefinitions redefined {} to {} already a key attribute of type {} in this table", def.name_as_text(), type, def_type));
}
@@ -2024,10 +2019,6 @@ future<executor::request_return_type> executor::update_table(client_state& clien
co_return api_error::validation(fmt::format(
"LSI {} already exists in table {}, can't use same name for GSI", index_name, table_name));
}
if (p.local().local_db().find_keyspace(keyspace_name).get_replication_strategy().uses_tablets() &&
!p.local().data_dictionary().get_config().rf_rack_valid_keyspaces()) {
co_return api_error::validation("GlobalSecondaryIndexes with tablets require the rf_rack_valid_keyspaces option to be enabled.");
}
elogger.trace("Adding GSI {}", index_name);
// FIXME: read and handle "Projection" parameter. This will
@@ -2371,7 +2362,7 @@ put_or_delete_item::put_or_delete_item(const rjson::value& item, schema_ptr sche
_cells = std::vector<cell>();
_cells->reserve(item.MemberCount());
for (auto it = item.MemberBegin(); it != item.MemberEnd(); ++it) {
bytes column_name = to_bytes(rjson::to_string_view(it->name));
bytes column_name = to_bytes(it->name.GetString());
validate_value(it->value, "PutItem");
const column_definition* cdef = find_attribute(*schema, column_name);
validate_attr_name_length("", column_name.size(), cdef && cdef->is_primary_key());
@@ -2792,10 +2783,10 @@ static void verify_all_are_used(const rjson::value* field,
return;
}
for (auto it = field->MemberBegin(); it != field->MemberEnd(); ++it) {
if (!used.contains(rjson::to_string(it->name))) {
if (!used.contains(it->name.GetString())) {
throw api_error::validation(
format("{} has spurious '{}', not used in {}",
field_name, rjson::to_string_view(it->name), operation));
field_name, it->name.GetString(), operation));
}
}
}
@@ -3009,7 +3000,7 @@ future<executor::request_return_type> executor::delete_item(client_state& client
}
static schema_ptr get_table_from_batch_request(const service::storage_proxy& proxy, const rjson::value::ConstMemberIterator& batch_request) {
sstring table_name = rjson::to_sstring(batch_request->name); // JSON keys are always strings
sstring table_name = batch_request->name.GetString(); // JSON keys are always strings
try {
return proxy.data_dictionary().find_schema(sstring(executor::KEYSPACE_NAME_PREFIX) + table_name, table_name);
} catch(data_dictionary::no_such_column_family&) {
@@ -3395,7 +3386,7 @@ static bool hierarchy_filter(rjson::value& val, const attribute_path_map_node<T>
}
rjson::value newv = rjson::empty_object();
for (auto it = v.MemberBegin(); it != v.MemberEnd(); ++it) {
std::string attr = rjson::to_string(it->name);
std::string attr = it->name.GetString();
auto x = members.find(attr);
if (x != members.end()) {
if (x->second) {
@@ -3615,7 +3606,7 @@ static std::optional<attrs_to_get> calculate_attrs_to_get(const rjson::value& re
const rjson::value& attributes_to_get = req["AttributesToGet"];
attrs_to_get ret;
for (auto it = attributes_to_get.Begin(); it != attributes_to_get.End(); ++it) {
attribute_path_map_add("AttributesToGet", ret, rjson::to_string(*it));
attribute_path_map_add("AttributesToGet", ret, it->GetString());
validate_attr_name_length("AttributesToGet", it->GetStringLength(), false);
}
if (ret.empty()) {
@@ -4281,12 +4272,12 @@ inline void update_item_operation::apply_attribute_updates(const std::unique_ptr
attribute_collector& modified_attrs, bool& any_updates, bool& any_deletes) const {
for (auto it = _attribute_updates->MemberBegin(); it != _attribute_updates->MemberEnd(); ++it) {
// Note that it.key() is the name of the column, *it is the operation
bytes column_name = to_bytes(rjson::to_string_view(it->name));
bytes column_name = to_bytes(it->name.GetString());
const column_definition* cdef = _schema->get_column_definition(column_name);
if (cdef && cdef->is_primary_key()) {
throw api_error::validation(format("UpdateItem cannot update key column {}", rjson::to_string_view(it->name)));
throw api_error::validation(format("UpdateItem cannot update key column {}", it->name.GetString()));
}
std::string action = rjson::to_string((it->value)["Action"]);
std::string action = (it->value)["Action"].GetString();
if (action == "DELETE") {
// The DELETE operation can do two unrelated tasks. Without a
// "Value" option, it is used to delete an attribute. With a
@@ -5483,7 +5474,7 @@ calculate_bounds_conditions(schema_ptr schema, const rjson::value& conditions) {
std::vector<query::clustering_range> ck_bounds;
for (auto it = conditions.MemberBegin(); it != conditions.MemberEnd(); ++it) {
sstring key = rjson::to_sstring(it->name);
std::string key = it->name.GetString();
const rjson::value& condition = it->value;
const rjson::value& comp_definition = rjson::get(condition, "ComparisonOperator");
@@ -5491,13 +5482,13 @@ calculate_bounds_conditions(schema_ptr schema, const rjson::value& conditions) {
const column_definition& pk_cdef = schema->partition_key_columns().front();
const column_definition* ck_cdef = schema->clustering_key_size() > 0 ? &schema->clustering_key_columns().front() : nullptr;
if (key == pk_cdef.name_as_text()) {
if (sstring(key) == pk_cdef.name_as_text()) {
if (!partition_ranges.empty()) {
throw api_error::validation("Currently only a single restriction per key is allowed");
}
partition_ranges.push_back(calculate_pk_bound(schema, pk_cdef, comp_definition, attr_list));
}
if (ck_cdef && key == ck_cdef->name_as_text()) {
if (ck_cdef && sstring(key) == ck_cdef->name_as_text()) {
if (!ck_bounds.empty()) {
throw api_error::validation("Currently only a single restriction per key is allowed");
}
@@ -5898,7 +5889,7 @@ future<executor::request_return_type> executor::list_tables(client_state& client
rjson::value* exclusive_start_json = rjson::find(request, "ExclusiveStartTableName");
rjson::value* limit_json = rjson::find(request, "Limit");
std::string exclusive_start = exclusive_start_json ? rjson::to_string(*exclusive_start_json) : "";
std::string exclusive_start = exclusive_start_json ? exclusive_start_json->GetString() : "";
int limit = limit_json ? limit_json->GetInt() : 100;
if (limit < 1 || limit > 100) {
co_return api_error::validation("Limit must be greater than 0 and no greater than 100");

View File

@@ -496,7 +496,7 @@ const std::pair<std::string, const rjson::value*> unwrap_set(const rjson::value&
return {"", nullptr};
}
auto it = v.MemberBegin();
const std::string it_key = rjson::to_string(it->name);
const std::string it_key = it->name.GetString();
if (it_key != "SS" && it_key != "BS" && it_key != "NS") {
return {std::move(it_key), nullptr};
}

View File

@@ -708,12 +708,8 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
// As long as the system_clients_entry object is alive, this request will
// be visible in the "system.clients" virtual table. When requested, this
// entry will be formatted by server::ongoing_request::make_client_data().
auto user_agent_header = co_await _connection_options_keys_and_values.get_or_load(req->get_header("User-Agent"), [] (const client_options_cache_key_type&) {
return make_ready_future<options_cache_value_type>(options_cache_value_type{});
});
auto system_clients_entry = _ongoing_requests.emplace(
req->get_client_address(), std::move(user_agent_header),
req->get_client_address(), req->get_header("User-Agent"),
username, current_scheduling_group(),
req->get_protocol_name() == "https");
@@ -989,10 +985,10 @@ client_data server::ongoing_request::make_client_data() const {
return cd;
}
future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> server::get_client_data() {
utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>> ret;
future<utils::chunked_vector<client_data>> server::get_client_data() {
utils::chunked_vector<client_data> ret;
co_await _ongoing_requests.for_each_gently([&ret] (const ongoing_request& r) {
ret.emplace_back(make_foreign(std::make_unique<client_data>(r.make_client_data())));
ret.emplace_back(r.make_client_data());
});
co_return ret;
}

View File

@@ -55,7 +55,6 @@ class server : public peering_sharded_service<server> {
// though it isn't really relevant for Alternator which defines its own
// timeouts separately. We can create this object only once.
updateable_timeout_config _timeout_config;
client_options_cache_type _connection_options_keys_and_values;
alternator_callbacks_map _callbacks;
@@ -89,7 +88,7 @@ class server : public peering_sharded_service<server> {
// is called when reading the "system.clients" virtual table.
struct ongoing_request {
socket_address _client_address;
client_options_cache_entry_type _user_agent;
sstring _user_agent;
sstring _username;
scheduling_group _scheduling_group;
bool _is_https;
@@ -108,7 +107,7 @@ public:
// table "system.clients" is read. It is expected to generate a list of
// clients connected to this server (on this shard). This function is
// called by alternator::controller::get_client_data().
future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> get_client_data();
future<utils::chunked_vector<client_data>> get_client_data();
private:
void set_routes(seastar::httpd::routes& r);
// If verification succeeds, returns the authenticated user's username

View File

@@ -93,7 +93,7 @@ future<executor::request_return_type> executor::update_time_to_live(client_state
if (v->GetStringLength() < 1 || v->GetStringLength() > 255) {
co_return api_error::validation("The length of AttributeName must be between 1 and 255");
}
sstring attribute_name = rjson::to_sstring(*v);
sstring attribute_name(v->GetString(), v->GetStringLength());
co_await verify_permission(_enforce_authorization, _warn_authorization, client_state, schema, auth::permission::ALTER, _stats);
co_await db::modify_tags(_mm, schema->ks_name(), schema->cf_name(), [&](std::map<sstring, sstring>& tags_map) {

View File

@@ -31,7 +31,6 @@ set(swagger_files
api-doc/column_family.json
api-doc/commitlog.json
api-doc/compaction_manager.json
api-doc/client_routes.json
api-doc/config.json
api-doc/cql_server_test.json
api-doc/endpoint_snitch_info.json
@@ -69,7 +68,6 @@ target_sources(api
PRIVATE
api.cc
cache_service.cc
client_routes.cc
collectd.cc
column_family.cc
commitlog.cc

View File

@@ -1,23 +0,0 @@
, "client_routes_entry": {
"id": "client_routes_entry",
"summary": "An entry storing client routes",
"properties": {
"connection_id": {"type": "string"},
"host_id": {"type": "string", "format": "uuid"},
"address": {"type": "string"},
"port": {"type": "integer"},
"tls_port": {"type": "integer"},
"alternator_port": {"type": "integer"},
"alternator_https_port": {"type": "integer"}
},
"required": ["connection_id", "host_id", "address"]
}
, "client_routes_key": {
"id": "client_routes_key",
"summary": "A key of client_routes_entry",
"properties": {
"connection_id": {"type": "string"},
"host_id": {"type": "string", "format": "uuid"}
}
}

View File

@@ -1,74 +0,0 @@
, "/v2/client-routes":{
"get": {
"description":"List all client route entries",
"operationId":"get_client_routes",
"tags":["client_routes"],
"produces":[
"application/json"
],
"parameters":[],
"responses":{
"200":{
"schema":{
"type":"array",
"items":{ "$ref":"#/definitions/client_routes_entry" }
}
},
"default":{
"description":"unexpected error",
"schema":{"$ref":"#/definitions/ErrorModel"}
}
}
},
"post": {
"description":"Upsert one or more client route entries",
"operationId":"set_client_routes",
"tags":["client_routes"],
"parameters":[
{
"name":"body",
"in":"body",
"required":true,
"schema":{
"type":"array",
"items":{ "$ref":"#/definitions/client_routes_entry" }
}
}
],
"responses":{
"200":{ "description": "OK" },
"default":{
"description":"unexpected error",
"schema":{ "$ref":"#/definitions/ErrorModel" }
}
}
},
"delete": {
"description":"Delete one or more client route entries",
"operationId":"delete_client_routes",
"tags":["client_routes"],
"parameters":[
{
"name":"body",
"in":"body",
"required":true,
"schema":{
"type":"array",
"items":{ "$ref":"#/definitions/client_routes_key" }
}
}
],
"responses":{
"200":{
"description": "OK"
},
"default":{
"description":"unexpected error",
"schema":{
"$ref":"#/definitions/ErrorModel"
}
}
}
}
}

View File

@@ -729,6 +729,14 @@
"allowMultiple":false,
"type":"boolean",
"paramType":"query"
},
{
"name":"use_sstable_identifier",
"description":"Use the sstable identifier UUID, if available, rather than the sstable generation.",
"required":false,
"allowMultiple":false,
"type":"boolean",
"paramType":"query"
}
]
},
@@ -3051,7 +3059,7 @@
},
{
"name":"incremental_mode",
"description":"Set the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to 'disabled' mode.",
"description":"Set the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to incremental mode.",
"required":false,
"allowMultiple":false,
"type":"string",

View File

@@ -37,7 +37,6 @@
#include "raft.hh"
#include "gms/gossip_address_map.hh"
#include "service_levels.hh"
#include "client_routes.hh"
logging::logger apilog("api");
@@ -68,11 +67,9 @@ future<> set_server_init(http_context& ctx) {
rb02->set_api_doc(r);
rb02->register_api_file(r, "swagger20_header");
rb02->register_api_file(r, "metrics");
rb02->register_api_file(r, "client_routes");
rb->register_function(r, "system",
"The system related API");
rb02->add_definitions_file(r, "metrics");
rb02->add_definitions_file(r, "client_routes");
set_system(ctx, r);
rb->register_function(r, "error_injection",
"The error injection API");
@@ -132,16 +129,6 @@ future<> unset_server_storage_service(http_context& ctx) {
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_storage_service(ctx, r); });
}
future<> set_server_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr) {
return ctx.http_server.set_routes([&ctx, &cr] (routes& r) {
set_client_routes(ctx, r, cr);
});
}
future<> unset_server_client_routes(http_context& ctx) {
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_client_routes(ctx, r); });
}
future<> set_load_meter(http_context& ctx, service::load_meter& lm) {
return ctx.http_server.set_routes([&ctx, &lm] (routes& r) { set_load_meter(ctx, r, lm); });
}

View File

@@ -29,7 +29,6 @@ class storage_proxy;
class storage_service;
class raft_group0_client;
class raft_group_registry;
class client_routes_service;
} // namespace service
@@ -100,8 +99,6 @@ future<> set_server_snitch(http_context& ctx, sharded<locator::snitch_ptr>& snit
future<> unset_server_snitch(http_context& ctx);
future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, service::raft_group0_client&);
future<> unset_server_storage_service(http_context& ctx);
future<> set_server_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr);
future<> unset_server_client_routes(http_context& ctx);
future<> set_server_sstables_loader(http_context& ctx, sharded<sstables_loader>& sst_loader);
future<> unset_server_sstables_loader(http_context& ctx);
future<> set_server_view_builder(http_context& ctx, sharded<db::view::view_builder>& vb, sharded<gms::gossiper>& g);

View File

@@ -1,176 +0,0 @@
/*
* Copyright (C) 2025-present ScyllaDB
*
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#include <seastar/http/short_streams.hh>
#include "client_routes.hh"
#include "api/api.hh"
#include "service/storage_service.hh"
#include "service/client_routes.hh"
#include "utils/rjson.hh"
#include "api/api-doc/client_routes.json.hh"
using namespace seastar::httpd;
using namespace std::chrono_literals;
using namespace json;
extern logging::logger apilog;
namespace api {
static void validate_client_routes_endpoint(sharded<service::client_routes_service>& cr, sstring endpoint_name) {
if (!cr.local().get_feature_service().client_routes) {
apilog.warn("{}: called before the cluster feature was enabled", endpoint_name);
throw std::runtime_error(fmt::format("{} requires all nodes to support the CLIENT_ROUTES cluster feature", endpoint_name));
}
}
static sstring parse_string(const char* name, rapidjson::Value const& v) {
const auto it = v.FindMember(name);
if (it == v.MemberEnd()) {
throw bad_param_exception(fmt::format("Missing '{}'", name));
}
if (!it->value.IsString()) {
throw bad_param_exception(fmt::format("'{}' must be a string", name));
}
return {it->value.GetString(), it->value.GetStringLength()};
}
static std::optional<uint32_t> parse_port(const char* name, rapidjson::Value const& v) {
const auto it = v.FindMember(name);
if (it == v.MemberEnd()) {
return std::nullopt;
}
if (!it->value.IsInt()) {
throw bad_param_exception(fmt::format("'{}' must be an integer", name));
}
auto port = it->value.GetInt();
if (port < 1 || port > 65535) {
throw bad_param_exception(fmt::format("'{}' value={} is outside the allowed port range", name, port));
}
return port;
}
static std::vector<service::client_routes_service::client_route_entry> parse_set_client_array(const rapidjson::Document& root) {
if (!root.IsArray()) {
throw bad_param_exception("Body must be a JSON array");
}
std::vector<service::client_routes_service::client_route_entry> v;
v.reserve(root.GetArray().Size());
for (const auto& element : root.GetArray()) {
if (!element.IsObject()) { throw bad_param_exception("Each element must be object"); }
const auto port = parse_port("port", element);
const auto tls_port = parse_port("tls_port", element);
const auto alternator_port = parse_port("alternator_port", element);
const auto alternator_https_port = parse_port("alternator_https_port", element);
if (!port.has_value() && !tls_port.has_value() && !alternator_port.has_value() && !alternator_https_port.has_value()) {
throw bad_param_exception("At least one port field ('port', 'tls_port', 'alternator_port', 'alternator_https_port') must be specified");
}
v.emplace_back(
parse_string("connection_id", element),
utils::UUID{parse_string("host_id", element)},
parse_string("address", element),
port,
tls_port,
alternator_port,
alternator_https_port
);
}
return v;
}
static
future<json::json_return_type>
rest_set_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr, std::unique_ptr<http::request> req) {
validate_client_routes_endpoint(cr, "rest_set_client_routes");
rapidjson::Document root;
auto content = co_await util::read_entire_stream_contiguous(*req->content_stream);
root.Parse(content.c_str());
co_await cr.local().set_client_routes(parse_set_client_array(root));
co_return seastar::json::json_void();
}
static std::vector<service::client_routes_service::client_route_key> parse_delete_client_array(const rapidjson::Document& root) {
if (!root.IsArray()) {
throw bad_param_exception("Body must be a JSON array");
}
std::vector<service::client_routes_service::client_route_key> v;
v.reserve(root.GetArray().Size());
for (const auto& element : root.GetArray()) {
v.emplace_back(
parse_string("connection_id", element),
utils::UUID{parse_string("host_id", element)}
);
}
return v;
}
static
future<json::json_return_type>
rest_delete_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr, std::unique_ptr<http::request> req) {
validate_client_routes_endpoint(cr, "delete_client_routes");
rapidjson::Document root;
auto content = co_await util::read_entire_stream_contiguous(*req->content_stream);
root.Parse(content.c_str());
co_await cr.local().delete_client_routes(parse_delete_client_array(root));
co_return seastar::json::json_void();
}
static
future<json::json_return_type>
rest_get_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr, std::unique_ptr<http::request> req) {
validate_client_routes_endpoint(cr, "get_client_routes");
co_return co_await cr.invoke_on(0, [] (service::client_routes_service& cr) -> future<json::json_return_type> {
co_return json::json_return_type(stream_range_as_array(co_await cr.get_client_routes(), [](const service::client_routes_service::client_route_entry & entry) {
seastar::httpd::client_routes_json::client_routes_entry obj;
obj.connection_id = entry.connection_id;
obj.host_id = fmt::to_string(entry.host_id);
obj.address = entry.address;
if (entry.port.has_value()) { obj.port = entry.port.value(); }
if (entry.tls_port.has_value()) { obj.tls_port = entry.tls_port.value(); }
if (entry.alternator_port.has_value()) { obj.alternator_port = entry.alternator_port.value(); }
if (entry.alternator_https_port.has_value()) { obj.alternator_https_port = entry.alternator_https_port.value(); }
return obj;
}));
});
}
void set_client_routes(http_context& ctx, routes& r, sharded<service::client_routes_service>& cr) {
seastar::httpd::client_routes_json::set_client_routes.set(r, [&ctx, &cr] (std::unique_ptr<seastar::http::request> req) {
return rest_set_client_routes(ctx, cr, std::move(req));
});
seastar::httpd::client_routes_json::delete_client_routes.set(r, [&ctx, &cr] (std::unique_ptr<seastar::http::request> req) {
return rest_delete_client_routes(ctx, cr, std::move(req));
});
seastar::httpd::client_routes_json::get_client_routes.set(r, [&ctx, &cr] (std::unique_ptr<seastar::http::request> req) {
return rest_get_client_routes(ctx, cr, std::move(req));
});
}
void unset_client_routes(http_context& ctx, routes& r) {
seastar::httpd::client_routes_json::set_client_routes.unset(r);
seastar::httpd::client_routes_json::delete_client_routes.unset(r);
seastar::httpd::client_routes_json::get_client_routes.unset(r);
}
}

View File

@@ -1,20 +0,0 @@
/*
* Copyright (C) 2025-present ScyllaDB
*
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#pragma once
#include <seastar/core/sharded.hh>
#include <seastar/json/json_elements.hh>
#include "api/api_init.hh"
namespace api {
void set_client_routes(http_context& ctx, httpd::routes& r, sharded<service::client_routes_service>& cr);
void unset_client_routes(http_context& ctx, httpd::routes& r);
}

View File

@@ -2020,12 +2020,16 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
auto tag = req->get_query_param("tag");
auto column_families = split(req->get_query_param("cf"), ",");
auto sfopt = req->get_query_param("sf");
auto sf = db::snapshot_ctl::skip_flush(strcasecmp(sfopt.c_str(), "true") == 0);
auto usiopt = req->get_query_param("use_sstable_identifier");
db::snapshot_options opts = {
.skip_flush = strcasecmp(sfopt.c_str(), "true") == 0,
.use_sstable_identifier = strcasecmp(usiopt.c_str(), "true") == 0
};
std::vector<sstring> keynames = split(req->get_query_param("kn"), ",");
try {
if (column_families.empty()) {
co_await snap_ctl.local().take_snapshot(tag, keynames, sf);
co_await snap_ctl.local().take_snapshot(tag, keynames, opts);
} else {
if (keynames.empty()) {
throw httpd::bad_param_exception("The keyspace of column families must be specified");
@@ -2033,7 +2037,7 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
if (keynames.size() > 1) {
throw httpd::bad_param_exception("Only one keyspace allowed when specifying a column family");
}
co_await snap_ctl.local().take_column_family_snapshot(keynames[0], column_families, tag, sf);
co_await snap_ctl.local().take_column_family_snapshot(keynames[0], column_families, tag, opts);
}
co_return json_void();
} catch (...) {
@@ -2068,7 +2072,8 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
auto info = parse_scrub_options(ctx, std::move(req));
if (!info.snapshot_tag.empty()) {
co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, db::snapshot_ctl::skip_flush::no);
db::snapshot_options opts = {.skip_flush = false, .use_sstable_identifier = false};
co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, opts);
}
compaction::compaction_stats stats;

View File

@@ -146,7 +146,8 @@ void set_tasks_compaction_module(http_context& ctx, routes& r, sharded<service::
auto info = parse_scrub_options(ctx, std::move(req));
if (!info.snapshot_tag.empty()) {
co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, db::snapshot_ctl::skip_flush::no);
db::snapshot_options opts = {.skip_flush = false, .use_sstable_identifier = false};
co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, opts);
}
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();

View File

@@ -9,6 +9,7 @@
#include "auth/allow_all_authenticator.hh"
#include "service/migration_manager.hh"
#include "utils/alien_worker.hh"
#include "utils/class_registrator.hh"
namespace auth {
@@ -22,6 +23,7 @@ static const class_registrator<
cql3::query_processor&,
::service::raft_group0_client&,
::service::migration_manager&,
cache&> registration("org.apache.cassandra.auth.AllowAllAuthenticator");
cache&,
utils::alien_worker&> registration("org.apache.cassandra.auth.AllowAllAuthenticator");
}

View File

@@ -14,6 +14,7 @@
#include "auth/authenticator.hh"
#include "auth/cache.hh"
#include "auth/common.hh"
#include "utils/alien_worker.hh"
namespace cql3 {
class query_processor;
@@ -29,7 +30,7 @@ extern const std::string_view allow_all_authenticator_name;
class allow_all_authenticator final : public authenticator {
public:
allow_all_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&) {
allow_all_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&) {
}
virtual future<> start() override {

View File

@@ -35,13 +35,14 @@ static const class_registrator<auth::authenticator
, cql3::query_processor&
, ::service::raft_group0_client&
, ::service::migration_manager&
, auth::cache&> cert_auth_reg(CERT_AUTH_NAME);
, auth::cache&
, utils::alien_worker&> cert_auth_reg(CERT_AUTH_NAME);
enum class auth::certificate_authenticator::query_source {
subject, altname
};
auth::certificate_authenticator::certificate_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, auth::cache&)
auth::certificate_authenticator::certificate_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, auth::cache&, utils::alien_worker&)
: _queries([&] {
auto& conf = qp.db().get_config();
auto queries = conf.auth_certificate_role_queries();
@@ -76,9 +77,9 @@ auth::certificate_authenticator::certificate_authenticator(cql3::query_processor
throw std::invalid_argument(fmt::format("Invalid source: {}", map.at(cfg_source_attr)));
}
continue;
} catch (const std::out_of_range&) {
} catch (std::out_of_range&) {
// just fallthrough
} catch (const boost::regex_error&) {
} catch (boost::regex_error&) {
std::throw_with_nested(std::invalid_argument(fmt::format("Invalid query expression: {}", map.at(cfg_query_attr))));
}
}

View File

@@ -10,6 +10,7 @@
#pragma once
#include "auth/authenticator.hh"
#include "utils/alien_worker.hh"
#include <boost/regex_fwd.hpp> // IWYU pragma: keep
namespace cql3 {
@@ -33,7 +34,7 @@ class certificate_authenticator : public authenticator {
enum class query_source;
std::vector<std::pair<query_source, boost::regex>> _queries;
public:
certificate_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);
certificate_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&);
~certificate_authenticator();
future<> start() override;

View File

@@ -94,7 +94,7 @@ static future<> create_legacy_metadata_table_if_missing_impl(
try {
co_return co_await mm.announce(co_await ::service::prepare_new_column_family_announcement(qp.proxy(), table, ts),
std::move(group0_guard), format("auth: create {} metadata table", table->cf_name()));
} catch (const exceptions::already_exists_exception&) {}
} catch (exceptions::already_exists_exception&) {}
}
}

View File

@@ -256,7 +256,7 @@ future<> default_authorizer::revoke_all(std::string_view role_name, ::service::g
} else {
co_await collect_mutations(_qp, mc, query, {sstring(role_name)});
}
} catch (const exceptions::request_execution_exception& e) {
} catch (exceptions::request_execution_exception& e) {
alogger.warn("CassandraAuthorizer failed to revoke all permissions of {}: {}", role_name, e);
}
}
@@ -293,13 +293,13 @@ future<> default_authorizer::revoke_all_legacy(const resource& resource) {
[resource](auto ep) {
try {
std::rethrow_exception(ep);
} catch (const exceptions::request_execution_exception& e) {
} catch (exceptions::request_execution_exception& e) {
alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
}
});
});
} catch (const exceptions::request_execution_exception& e) {
} catch (exceptions::request_execution_exception& e) {
alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
return make_ready_future();
}

View File

@@ -49,7 +49,8 @@ static const class_registrator<
cql3::query_processor&,
::service::raft_group0_client&,
::service::migration_manager&,
cache&> password_auth_reg("org.apache.cassandra.auth.PasswordAuthenticator");
cache&,
utils::alien_worker&> password_auth_reg("org.apache.cassandra.auth.PasswordAuthenticator");
static thread_local auto rng_for_salt = std::default_random_engine(std::random_device{}());
@@ -63,13 +64,14 @@ std::string password_authenticator::default_superuser(const db::config& cfg) {
password_authenticator::~password_authenticator() {
}
password_authenticator::password_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache)
password_authenticator::password_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache, utils::alien_worker& hashing_worker)
: _qp(qp)
, _group0_client(g0)
, _migration_manager(mm)
, _cache(cache)
, _stopped(make_ready_future<>())
, _superuser(default_superuser(qp.db().get_config()))
, _hashing_worker(hashing_worker)
{}
static bool has_salted_hash(const cql3::untyped_result_set_row& row) {
@@ -328,18 +330,20 @@ future<authenticated_user> password_authenticator::authenticate(
}
salted_hash = role->salted_hash;
}
const bool password_match = co_await passwords::check(password, *salted_hash);
const bool password_match = co_await _hashing_worker.submit<bool>([password = std::move(password), salted_hash] {
return passwords::check(password, *salted_hash);
});
if (!password_match) {
throw exceptions::authentication_exception("Username and/or password are incorrect");
}
co_return username;
} catch (const std::system_error &) {
} catch (std::system_error &) {
std::throw_with_nested(exceptions::authentication_exception("Could not verify password"));
} catch (const exceptions::request_execution_exception& e) {
} catch (exceptions::request_execution_exception& e) {
std::throw_with_nested(exceptions::authentication_exception(e.what()));
} catch (const exceptions::authentication_exception& e) {
} catch (exceptions::authentication_exception& e) {
std::throw_with_nested(e);
} catch (const exceptions::unavailable_exception& e) {
} catch (exceptions::unavailable_exception& e) {
std::throw_with_nested(exceptions::authentication_exception(e.get_message()));
} catch (...) {
std::throw_with_nested(exceptions::authentication_exception("authentication failed"));

View File

@@ -18,6 +18,7 @@
#include "auth/passwords.hh"
#include "auth/cache.hh"
#include "service/raft/raft_group0_client.hh"
#include "utils/alien_worker.hh"
namespace db {
class config;
@@ -48,12 +49,13 @@ class password_authenticator : public authenticator {
shared_promise<> _superuser_created_promise;
// We used to also support bcrypt, SHA-256, and MD5 (ref. scylladb#24524).
constexpr static auth::passwords::scheme _scheme = passwords::scheme::sha_512;
utils::alien_worker& _hashing_worker;
public:
static db::consistency_level consistency_for_user(std::string_view role_name);
static std::string default_superuser(const db::config&);
password_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);
password_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&);
~password_authenticator();

View File

@@ -7,8 +7,6 @@
*/
#include "auth/passwords.hh"
#include "utils/crypt_sha512.hh"
#include <seastar/core/coroutine.hh>
#include <cerrno>
@@ -23,46 +21,25 @@ static thread_local crypt_data tlcrypt = {};
namespace detail {
void verify_hashing_output(const char * res) {
if (!res || (res[0] == '*')) {
throw std::system_error(errno, std::system_category());
}
}
void verify_scheme(scheme scheme) {
const sstring random_part_of_salt = "aaaabbbbccccdddd";
const sstring salt = sstring(prefix_for_scheme(scheme)) + random_part_of_salt;
const char* e = crypt_r("fisk", salt.c_str(), &tlcrypt);
try {
verify_hashing_output(e);
} catch (const std::system_error& ex) {
throw no_supported_schemes();
if (e && (e[0] != '*')) {
return;
}
throw no_supported_schemes();
}
sstring hash_with_salt(const sstring& pass, const sstring& salt) {
auto res = crypt_r(pass.c_str(), salt.c_str(), &tlcrypt);
verify_hashing_output(res);
return res;
}
seastar::future<sstring> hash_with_salt_async(const sstring& pass, const sstring& salt) {
sstring res;
// Only SHA-512 hashes for passphrases shorter than 256 bytes can be computed using
// the __crypt_sha512 method. For other computations, we fall back to the
// crypt_r implementation from `<crypt.h>`, which can stall.
if (salt.starts_with(prefix_for_scheme(scheme::sha_512)) && pass.size() <= 255) {
char buf[128];
const char * output_ptr = co_await __crypt_sha512(pass.c_str(), salt.c_str(), buf);
verify_hashing_output(output_ptr);
res = output_ptr;
} else {
const char * output_ptr = crypt_r(pass.c_str(), salt.c_str(), &tlcrypt);
verify_hashing_output(output_ptr);
res = output_ptr;
if (!res || (res[0] == '*')) {
throw std::system_error(errno, std::system_category());
}
co_return res;
return res;
}
std::string_view prefix_for_scheme(scheme c) noexcept {
@@ -81,9 +58,8 @@ no_supported_schemes::no_supported_schemes()
: std::runtime_error("No allowed hashing schemes are supported on this system") {
}
seastar::future<bool> check(const sstring& pass, const sstring& salted_hash) {
const auto pwd_hash = co_await detail::hash_with_salt_async(pass, salted_hash);
co_return pwd_hash == salted_hash;
bool check(const sstring& pass, const sstring& salted_hash) {
return detail::hash_with_salt(pass, salted_hash) == salted_hash;
}
} // namespace auth::passwords

View File

@@ -11,7 +11,6 @@
#include <random>
#include <stdexcept>
#include <seastar/core/future.hh>
#include <seastar/core/sstring.hh>
#include "seastarx.hh"
@@ -76,19 +75,10 @@ sstring generate_salt(RandomNumberEngine& g, scheme scheme) {
///
/// Hash a password combined with an implementation-specific salt string.
/// Deprecated in favor of `hash_with_salt_async`.
///
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
///
[[deprecated("Use hash_with_salt_async instead")]] sstring hash_with_salt(const sstring& pass, const sstring& salt);
///
/// Async version of `hash_with_salt` that returns a future.
/// If possible, hashing uses `coroutine::maybe_yield` to prevent reactor stalls.
///
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
///
seastar::future<sstring> hash_with_salt_async(const sstring& pass, const sstring& salt);
sstring hash_with_salt(const sstring& pass, const sstring& salt);
} // namespace detail
@@ -117,6 +107,6 @@ sstring hash(const sstring& pass, RandomNumberEngine& g, scheme scheme) {
///
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
///
seastar::future<bool> check(const sstring& pass, const sstring& salted_hash);
bool check(const sstring& pass, const sstring& salted_hash);
} // namespace auth::passwords

View File

@@ -35,9 +35,10 @@ static const class_registrator<
cql3::query_processor&,
::service::raft_group0_client&,
::service::migration_manager&,
cache&> saslauthd_auth_reg("com.scylladb.auth.SaslauthdAuthenticator");
cache&,
utils::alien_worker&> saslauthd_auth_reg("com.scylladb.auth.SaslauthdAuthenticator");
saslauthd_authenticator::saslauthd_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, cache&)
saslauthd_authenticator::saslauthd_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&)
: _socket_path(qp.db().get_config().saslauthd_socket_path())
{}

View File

@@ -12,6 +12,7 @@
#include "auth/authenticator.hh"
#include "auth/cache.hh"
#include "utils/alien_worker.hh"
namespace cql3 {
class query_processor;
@@ -29,7 +30,7 @@ namespace auth {
class saslauthd_authenticator : public authenticator {
sstring _socket_path; ///< Path to the domain socket on which saslauthd is listening.
public:
saslauthd_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);
saslauthd_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&,utils::alien_worker&);
future<> start() override;

View File

@@ -191,7 +191,8 @@ service::service(
::service::migration_manager& mm,
const service_config& sc,
maintenance_socket_enabled used_by_maintenance_socket,
cache& cache)
cache& cache,
utils::alien_worker& hashing_worker)
: service(
std::move(c),
cache,
@@ -199,7 +200,7 @@ service::service(
g0,
mn,
create_object<authorizer>(sc.authorizer_java_name, qp, g0, mm),
create_object<authenticator>(sc.authenticator_java_name, qp, g0, mm, cache),
create_object<authenticator>(sc.authenticator_java_name, qp, g0, mm, cache, hashing_worker),
create_object<role_manager>(sc.role_manager_java_name, qp, g0, mm, cache),
used_by_maintenance_socket) {
}
@@ -225,7 +226,7 @@ future<> service::create_legacy_keyspace_if_missing(::service::migration_manager
try {
co_return co_await mm.announce(::service::prepare_new_keyspace_announcement(db.real_database(), ksm, ts),
std::move(group0_guard), seastar::format("auth_service: create {} keyspace", meta::legacy::AUTH_KS));
} catch (const ::service::group0_concurrent_modification&) {
} catch (::service::group0_concurrent_modification&) {
log.info("Concurrent operation is detected while creating {} keyspace, retrying.", meta::legacy::AUTH_KS);
}
}

View File

@@ -27,6 +27,7 @@
#include "cql3/description.hh"
#include "seastarx.hh"
#include "service/raft/raft_group0_client.hh"
#include "utils/alien_worker.hh"
#include "utils/observable.hh"
#include "utils/serialized_action.hh"
#include "service/maintenance_mode.hh"
@@ -130,7 +131,8 @@ public:
::service::migration_manager&,
const service_config&,
maintenance_socket_enabled,
cache&);
cache&,
utils::alien_worker&);
future<> start(::service::migration_manager&, db::system_keyspace&);

View File

@@ -192,7 +192,7 @@ future<> standard_role_manager::legacy_create_default_role_if_missing() {
{_superuser},
cql3::query_processor::cache_internal::no).discard_result();
log.info("Created default superuser role '{}'.", _superuser);
} catch (const exceptions::unavailable_exception& e) {
} catch(const exceptions::unavailable_exception& e) {
log.warn("Skipped default role setup: some nodes were not ready; will retry");
throw e;
}

View File

@@ -38,8 +38,8 @@ class transitional_authenticator : public authenticator {
public:
static const sstring PASSWORD_AUTHENTICATOR_NAME;
transitional_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache)
: transitional_authenticator(std::make_unique<password_authenticator>(qp, g0, mm, cache)) {
transitional_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache, utils::alien_worker& hashing_worker)
: transitional_authenticator(std::make_unique<password_authenticator>(qp, g0, mm, cache, hashing_worker)) {
}
transitional_authenticator(std::unique_ptr<authenticator> a)
: _authenticator(std::move(a)) {
@@ -81,7 +81,7 @@ public:
}).handle_exception([](auto ep) {
try {
std::rethrow_exception(ep);
} catch (const exceptions::authentication_exception&) {
} catch (exceptions::authentication_exception&) {
// return anon user
return make_ready_future<authenticated_user>(anonymous_user());
}
@@ -126,7 +126,7 @@ public:
virtual bytes evaluate_response(bytes_view client_response) override {
try {
return _sasl->evaluate_response(client_response);
} catch (const exceptions::authentication_exception&) {
} catch (exceptions::authentication_exception&) {
_complete = true;
return {};
}
@@ -141,7 +141,7 @@ public:
return _sasl->get_authenticated_user().handle_exception([](auto ep) {
try {
std::rethrow_exception(ep);
} catch (const exceptions::authentication_exception&) {
} catch (exceptions::authentication_exception&) {
// return anon user
return make_ready_future<authenticated_user>(anonymous_user());
}
@@ -241,7 +241,8 @@ static const class_registrator<
cql3::query_processor&,
::service::raft_group0_client&,
::service::migration_manager&,
auth::cache&> transitional_authenticator_reg(auth::PACKAGE_NAME + "TransitionalAuthenticator");
auth::cache&,
utils::alien_worker&> transitional_authenticator_reg(auth::PACKAGE_NAME + "TransitionalAuthenticator");
static const class_registrator<
auth::authorizer,

View File

@@ -10,9 +10,7 @@
#include <seastar/net/inet_address.hh>
#include <seastar/core/sstring.hh>
#include "seastarx.hh"
#include "utils/loading_shared_values.hh"
#include <list>
#include <optional>
enum class client_type {
@@ -29,20 +27,6 @@ enum class client_connection_stage {
ready,
};
// We implement a keys cache using a map-like utils::loading_shared_values container by storing empty values.
struct options_cache_value_type {};
using client_options_cache_type = utils::loading_shared_values<sstring, options_cache_value_type>;
using client_options_cache_entry_type = client_options_cache_type::entry_ptr;
using client_options_cache_key_type = client_options_cache_type::key_type;
// This struct represents a single OPTION key-value pair from the client's connection options.
// Both key and value are represented by corresponding "references" to their cached values.
// Each "reference" is effectively a lw_shared_ptr value.
struct client_option_key_value_cached_entry {
client_options_cache_entry_type key;
client_options_cache_entry_type value;
};
sstring to_string(client_connection_stage ct);
// Representation of a row in `system.clients'. std::optionals are for nullable cells.
@@ -53,8 +37,8 @@ struct client_data {
client_connection_stage connection_stage = client_connection_stage::established;
int32_t shard_id; /// ID of server-side shard which is processing the connection.
std::optional<client_options_cache_entry_type> driver_name;
std::optional<client_options_cache_entry_type> driver_version;
std::optional<sstring> driver_name;
std::optional<sstring> driver_version;
std::optional<sstring> hostname;
std::optional<int32_t> protocol_version;
std::optional<sstring> ssl_cipher_suite;
@@ -62,7 +46,6 @@ struct client_data {
std::optional<sstring> ssl_protocol;
std::optional<sstring> username;
std::optional<sstring> scheduling_group_name;
std::list<client_option_key_value_cached_entry> client_options;
sstring stage_str() const { return to_string(connection_stage); }
sstring client_type_str() const { return to_string(ct); }

View File

@@ -125,6 +125,10 @@ if(target_arch)
add_compile_options("-march=${target_arch}")
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
add_compile_options("SHELL:-Xclang -fexperimental-assignment-tracking=disabled")
endif()
function(maybe_limit_stack_usage_in_KB stack_usage_threshold_in_KB config)
math(EXPR _stack_usage_threshold_in_bytes "${stack_usage_threshold_in_KB} * 1024")
set(_stack_usage_threshold_flag "-Wstack-usage=${_stack_usage_threshold_in_bytes}")

View File

@@ -859,7 +859,6 @@ scylla_core = (['message/messaging_service.cc',
'utils/alien_worker.cc',
'utils/array-search.cc',
'utils/base64.cc',
'utils/crypt_sha512.cc',
'utils/logalloc.cc',
'utils/large_bitset.cc',
'utils/buffer_input_stream.cc',
@@ -1158,7 +1157,6 @@ scylla_core = (['message/messaging_service.cc',
'locator/topology.cc',
'locator/util.cc',
'service/client_state.cc',
'service/client_routes.cc',
'service/storage_service.cc',
'service/session.cc',
'service/task_manager_module.cc',
@@ -1319,8 +1317,6 @@ api = ['api/api.cc',
'api/storage_proxy.cc',
Json2Code('api/api-doc/cache_service.json'),
'api/cache_service.cc',
Json2Code('api/api-doc/client_routes.json'),
'api/client_routes.cc',
Json2Code('api/api-doc/collectd.json'),
'api/collectd.cc',
Json2Code('api/api-doc/endpoint_snitch_info.json'),
@@ -1483,6 +1479,7 @@ deps = {
pure_boost_tests = set([
'test/boost/anchorless_list_test',
'test/boost/auth_passwords_test',
'test/boost/auth_resource_test',
'test/boost/big_decimal_test',
'test/boost/caching_options_test',
@@ -2251,6 +2248,15 @@ def get_extra_cxxflags(mode, mode_config, cxx, debuginfo):
if debuginfo and mode_config['can_have_debug_info']:
cxxflags += ['-g', '-gz']
if 'clang' in cxx:
# Since AssignmentTracking was enabled by default in clang
# (llvm/llvm-project@de6da6ad55d3ca945195d1cb109cb8efdf40a52a)
# coroutine frame debugging info (`coro_frame_ty`) is broken.
#
# It seems that we aren't losing much by disabling AssigmentTracking,
# so for now we choose to disable it to get `coro_frame_ty` back.
cxxflags.append('-Xclang -fexperimental-assignment-tracking=disabled')
return cxxflags

View File

@@ -64,10 +64,6 @@ bool query_processor::topology_global_queue_empty() {
return remote().first.get().ss.topology_global_queue_empty();
}
future<bool> query_processor::ongoing_rf_change(const service::group0_guard& guard, sstring ks) {
return remote().first.get().ss.ongoing_rf_change(guard, std::move(ks));
}
static service::query_state query_state_for_internal_call() {
return {service::client_state::for_internal_calls(), empty_service_permit()};
}

View File

@@ -474,7 +474,6 @@ public:
void reset_cache();
bool topology_global_queue_empty();
future<bool> ongoing_rf_change(const service::group0_guard& guard, sstring ks);
query_options make_internal_options(
const statements::prepared_statement::checked_weak_ptr& p,

View File

@@ -1322,10 +1322,6 @@ const std::vector<expr::expression>& statement_restrictions::index_restrictions(
return _index_restrictions;
}
bool statement_restrictions::is_empty() const {
return !_where.has_value();
}
// Current score table:
// local and restrictions include full partition key: 2
// global: 1

View File

@@ -408,8 +408,6 @@ public:
/// Checks that the primary key restrictions don't contain null values, throws invalid_request_exception otherwise.
void validate_primary_key(const query_options& options) const;
bool is_empty() const;
};
statement_restrictions analyze_statement_restrictions(

View File

@@ -19,7 +19,6 @@
#include "locator/abstract_replication_strategy.hh"
#include "mutation/canonical_mutation.hh"
#include "prepared_statement.hh"
#include "seastar/coroutine/exception.hh"
#include "service/migration_manager.hh"
#include "service/storage_proxy.hh"
#include "service/topology_mutation.hh"
@@ -139,7 +138,6 @@ bool cql3::statements::alter_keyspace_statement::changes_tablets(query_processor
future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, cql3::cql_warnings_vec>>
cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_processor& qp, service::query_state& state, const query_options& options, service::group0_batch& mc) const {
using namespace cql_transport;
bool unknown_keyspace = false;
try {
event::schema_change::target_type target_type = event::schema_change::target_type::KEYSPACE;
auto ks = qp.db().find_keyspace(_name);
@@ -160,12 +158,8 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
// when in reality nothing or only schema is being changed
if (changes_tablets(qp)) {
if (!qp.proxy().features().topology_global_request_queue && !qp.topology_global_queue_empty()) {
co_await coroutine::return_exception(
exceptions::invalid_request_exception("Another global topology request is ongoing, please retry."));
}
if (qp.proxy().features().rack_list_rf && co_await qp.ongoing_rf_change(mc.guard(),_name)) {
co_await coroutine::return_exception(
exceptions::invalid_request_exception(format("Another RF change for this keyspace {} ongoing, please retry.", _name)));
return make_exception_future<std::tuple<::shared_ptr<::cql_transport::event::schema_change>, cql3::cql_warnings_vec>>(
exceptions::invalid_request_exception("Another global topology request is ongoing, please retry."));
}
qp.db().real_database().validate_keyspace_update(*ks_md_update);
@@ -248,15 +242,10 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
target_type,
keyspace());
mc.add_mutations(std::move(muts), "CQL alter keyspace");
co_return std::make_tuple(std::move(ret), warnings);
return make_ready_future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, cql3::cql_warnings_vec>>(std::make_tuple(std::move(ret), warnings));
} catch (data_dictionary::no_such_keyspace& e) {
unknown_keyspace = true;
return make_exception_future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, cql3::cql_warnings_vec>>(exceptions::invalid_request_exception("Unknown keyspace " + _name));
}
if (unknown_keyspace) {
co_await coroutine::return_exception(
exceptions::invalid_request_exception("Unknown keyspace " + _name));
}
std::unreachable();
}
std::unique_ptr<cql3::statements::prepared_statement>

View File

@@ -61,7 +61,7 @@ expand_to_racks(const locator::token_metadata& tm,
// Handle ALTER:
// ([]|0) -> numeric is allowed, there are no existing replicas
// numeric -> numeric' is not supported unless numeric == numeric'. User should convert RF to rack list of equal count first.
// numeric -> numeric' is not supported. User should convert RF to rack list of equal count first.
// rack_list -> len(rack_list) is allowed (no-op)
// rack_list -> numeric is not allowed
if (old_options.contains(dc)) {
@@ -75,8 +75,6 @@ expand_to_racks(const locator::token_metadata& tm,
"Cannot change replication factor for '{}' from {} to numeric {}, use rack list instead",
dc, old_rf_val, data.count()));
}
} else if (old_rf.count() == data.count()) {
return rf;
} else if (old_rf.count() > 0) {
throw exceptions::configuration_exception(fmt::format(
"Cannot change replication factor for '{}' from {} to {}, only rack list is allowed",
@@ -155,8 +153,6 @@ static locator::replication_strategy_config_options prepare_options(
}
// Validate options.
bool numeric_to_rack_list_transition = false;
bool rf_change = false;
for (auto&& [dc, opt] : options) {
locator::replication_factor_data rf(opt);
@@ -166,7 +162,6 @@ static locator::replication_strategy_config_options prepare_options(
old_rf = locator::replication_factor_data(i->second);
}
rf_change = rf_change || (old_rf && old_rf->count() != rf.count()) || (!old_rf && rf.count() != 0);
if (!rf.is_rack_based()) {
if (old_rf && old_rf->is_rack_based() && rf.count() != 0) {
if (old_rf->count() != rf.count()) {
@@ -192,11 +187,12 @@ static locator::replication_strategy_config_options prepare_options(
throw exceptions::configuration_exception(fmt::format(
"Rack list for '{}' contains duplicate entries", dc));
}
numeric_to_rack_list_transition = numeric_to_rack_list_transition || (old_rf && !old_rf->is_rack_based() && old_rf->count() != 0);
}
if (numeric_to_rack_list_transition && rf_change) {
throw exceptions::configuration_exception("Cannot change replication factor from numeric to rack list and rf value at the same time");
if (old_rf && !old_rf->is_rack_based() && old_rf->count() != 0) {
// FIXME: Allow this if replicas already conform to the given rack list.
// FIXME: Implement automatic colocation to allow transition to rack list.
throw exceptions::configuration_exception(fmt::format(
"Cannot change replication factor from numeric to rack list for '{}'", dc));
}
}
if (!rf && options.empty() && old_options.empty()) {
@@ -416,7 +412,7 @@ lw_shared_ptr<data_dictionary::keyspace_metadata> ks_prop_defs::as_ks_metadata(s
? std::optional<unsigned>(0) : std::nullopt;
auto initial_tablets = get_initial_tablets(default_initial_tablets, cfg.enforce_tablets());
bool uses_tablets = initial_tablets.has_value();
bool rack_list_enabled = utils::get_local_injector().enter("create_with_numeric") ? false : feat.rack_list_rf;
bool rack_list_enabled = feat.rack_list_rf;
auto options = prepare_options(sc, tm, cfg.rf_rack_valid_keyspaces(), get_replication_options(), {}, rack_list_enabled, uses_tablets);
return data_dictionary::keyspace_metadata::new_keyspace(ks_name, sc,
std::move(options), initial_tablets, get_consistency_option(), get_boolean(KW_DURABLE_WRITES, true), get_storage_options());
@@ -432,7 +428,7 @@ lw_shared_ptr<data_dictionary::keyspace_metadata> ks_prop_defs::as_ks_metadata_u
throw exceptions::invalid_request_exception("Cannot alter replication strategy vnode/tablets flavor");
}
auto sc = get_replication_strategy_class();
bool rack_list_enabled = utils::get_local_injector().enter("create_with_numeric") ? false : feat.rack_list_rf;
bool rack_list_enabled = feat.rack_list_rf;
if (sc) {
options = prepare_options(*sc, tm, cfg.rf_rack_valid_keyspaces(), get_replication_options(), old_options, rack_list_enabled, uses_tablets);
} else {

View File

@@ -1976,7 +1976,7 @@ mutation_fragments_select_statement::do_execute(query_processor& qp, service::qu
if (it == indexes.end()) {
throw exceptions::invalid_request_exception("ANN ordering by vector requires the column to be indexed using 'vector_index'");
}
if (index_opt || parameters->allow_filtering() || !(restrictions->is_empty()) || check_needs_allow_filtering_anyway(*restrictions)) {
if (index_opt || parameters->allow_filtering() || restrictions->need_filtering() || check_needs_allow_filtering_anyway(*restrictions)) {
throw exceptions::invalid_request_exception("ANN ordering by vector does not support filtering");
}
index_opt = *it;

View File

@@ -42,11 +42,6 @@ table::get_index_manager() const {
return _ops->get_index_manager(*this);
}
db_clock::time_point
table::get_truncation_time() const {
return _ops->get_truncation_time(*this);
}
lw_shared_ptr<keyspace_metadata>
keyspace::metadata() const {
return _ops->get_keyspace_metadata(*this);

View File

@@ -77,7 +77,6 @@ public:
schema_ptr schema() const;
const std::vector<view_ptr>& views() const;
const secondary_index::secondary_index_manager& get_index_manager() const;
db_clock::time_point get_truncation_time() const;
};
class keyspace {

View File

@@ -27,7 +27,6 @@ public:
virtual std::optional<table> try_find_table(database db, table_id id) const = 0;
virtual const secondary_index::secondary_index_manager& get_index_manager(table t) const = 0;
virtual schema_ptr get_table_schema(table t) const = 0;
virtual db_clock::time_point get_truncation_time(table t) const = 0;
virtual lw_shared_ptr<keyspace_metadata> get_keyspace_metadata(keyspace ks) const = 0;
virtual bool is_internal(keyspace ks) const = 0;
virtual const locator::abstract_replication_strategy& get_replication_strategy(keyspace ks) const = 0;

View File

@@ -1,20 +0,0 @@
/*
* Copyright (C) 2025-present ScyllaDB
*/
/*
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
*/
#pragma once
#include "mutation/mutation.hh"
#include "utils/UUID.hh"
namespace db {
mutation get_batchlog_mutation_for(schema_ptr schema, const utils::chunked_vector<mutation>& mutations, int32_t version, db_clock::time_point now, const utils::UUID& id);
mutation get_batchlog_delete_mutation(schema_ptr schema, int32_t version, db_clock::time_point now, const utils::UUID& id);
}

View File

@@ -10,7 +10,6 @@
#include <chrono>
#include <exception>
#include <ranges>
#include <seastar/core/future-util.hh>
#include <seastar/core/do_with.hh>
#include <seastar/core/semaphore.hh>
@@ -19,14 +18,12 @@
#include <seastar/core/sleep.hh>
#include "batchlog_manager.hh"
#include "batchlog.hh"
#include "data_dictionary/data_dictionary.hh"
#include "mutation/canonical_mutation.hh"
#include "service/storage_proxy.hh"
#include "system_keyspace.hh"
#include "utils/rate_limiter.hh"
#include "utils/log.hh"
#include "utils/murmur_hash.hh"
#include "db_clock.hh"
#include "unimplemented.hh"
#include "idl/frozen_schema.dist.hh"
@@ -36,94 +33,17 @@
#include "cql3/untyped_result_set.hh"
#include "service_permit.hh"
#include "cql3/query_processor.hh"
#include "replica/database.hh"
static logging::logger blogger("batchlog_manager");
namespace db {
// Yields 256 batchlog shards. Even on the largest nodes we currently run on,
// this should be enough to give every core a batchlog partition.
static constexpr unsigned batchlog_shard_bits = 8;
int32_t batchlog_shard_of(db_clock::time_point written_at) {
const int64_t count = written_at.time_since_epoch().count();
std::array<uint64_t, 2> result;
utils::murmur_hash::hash3_x64_128(bytes_view(reinterpret_cast<const signed char*>(&count), sizeof(count)), 0, result);
uint64_t hash = result[0] ^ result[1];
return hash & ((1ULL << batchlog_shard_bits) - 1);
}
std::pair<partition_key, clustering_key>
get_batchlog_key(const schema& schema, int32_t version, db::batchlog_stage stage, int32_t batchlog_shard, db_clock::time_point written_at, std::optional<utils::UUID> id) {
auto pkey = partition_key::from_exploded(schema, {serialized(version), serialized(int8_t(stage)), serialized(batchlog_shard)});
std::vector<bytes> ckey_components;
ckey_components.reserve(2);
ckey_components.push_back(serialized(written_at));
if (id) {
ckey_components.push_back(serialized(*id));
}
auto ckey = clustering_key::from_exploded(schema, ckey_components);
return {std::move(pkey), std::move(ckey)};
}
std::pair<partition_key, clustering_key>
get_batchlog_key(const schema& schema, int32_t version, db::batchlog_stage stage, db_clock::time_point written_at, std::optional<utils::UUID> id) {
return get_batchlog_key(schema, version, stage, batchlog_shard_of(written_at), written_at, id);
}
mutation get_batchlog_mutation_for(schema_ptr schema, managed_bytes data, int32_t version, db::batchlog_stage stage, db_clock::time_point now, const utils::UUID& id) {
auto [key, ckey] = get_batchlog_key(*schema, version, stage, now, id);
auto timestamp = api::new_timestamp();
mutation m(schema, key);
// Avoid going through data_value and therefore `bytes`, as it can be large (#24809).
auto cdef_data = schema->get_column_definition(to_bytes("data"));
m.set_cell(ckey, *cdef_data, atomic_cell::make_live(*cdef_data->type, timestamp, std::move(data)));
return m;
}
mutation get_batchlog_mutation_for(schema_ptr schema, const utils::chunked_vector<mutation>& mutations, int32_t version, db::batchlog_stage stage, db_clock::time_point now, const utils::UUID& id) {
auto data = [&mutations] {
utils::chunked_vector<canonical_mutation> fm(mutations.begin(), mutations.end());
bytes_ostream out;
for (auto& m : fm) {
ser::serialize(out, m);
}
return std::move(out).to_managed_bytes();
}();
return get_batchlog_mutation_for(std::move(schema), std::move(data), version, stage, now, id);
}
mutation get_batchlog_mutation_for(schema_ptr schema, const utils::chunked_vector<mutation>& mutations, int32_t version, db_clock::time_point now, const utils::UUID& id) {
return get_batchlog_mutation_for(std::move(schema), mutations, version, batchlog_stage::initial, now, id);
}
mutation get_batchlog_delete_mutation(schema_ptr schema, int32_t version, db::batchlog_stage stage, db_clock::time_point now, const utils::UUID& id) {
auto [key, ckey] = get_batchlog_key(*schema, version, stage, now, id);
mutation m(schema, key);
auto timestamp = api::new_timestamp();
m.partition().apply_delete(*schema, ckey, tombstone(timestamp, gc_clock::now()));
return m;
}
mutation get_batchlog_delete_mutation(schema_ptr schema, int32_t version, db_clock::time_point now, const utils::UUID& id) {
return get_batchlog_delete_mutation(std::move(schema), version, batchlog_stage::initial, now, id);
}
} // namespace db
const std::chrono::seconds db::batchlog_manager::replay_interval;
const uint32_t db::batchlog_manager::page_size;
db::batchlog_manager::batchlog_manager(cql3::query_processor& qp, db::system_keyspace& sys_ks, batchlog_manager_config config)
: _qp(qp)
, _sys_ks(sys_ks)
, _replay_timeout(config.replay_timeout)
, _write_request_timeout(std::chrono::duration_cast<db_clock::duration>(config.write_request_timeout))
, _replay_rate(config.replay_rate)
, _delay(config.delay)
, _replay_cleanup_after_replays(config.replay_cleanup_after_replays)
@@ -232,75 +152,18 @@ future<> db::batchlog_manager::stop() {
}
future<size_t> db::batchlog_manager::count_all_batches() const {
sstring query = format("SELECT count(*) FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG_V2);
sstring query = format("SELECT count(*) FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG);
return _qp.execute_internal(query, cql3::query_processor::cache_internal::yes).then([](::shared_ptr<cql3::untyped_result_set> rs) {
return size_t(rs->one().get_as<int64_t>("count"));
});
}
future<> db::batchlog_manager::maybe_migrate_v1_to_v2() {
if (_migration_done) {
return make_ready_future<>();
}
return with_gate(_gate, [this] () mutable -> future<> {
blogger.info("Migrating batchlog entries from v1 -> v2");
auto schema_v1 = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG);
auto schema_v2 = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG_V2);
auto batch = [this, schema_v1, schema_v2] (const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
// check version of serialization format
if (!row.has("version")) {
blogger.warn("Not migrating logged batch because of unknown version");
co_return stop_iteration::no;
}
auto version = row.get_as<int32_t>("version");
if (version != netw::messaging_service::current_version) {
blogger.warn("Not migrating logged batch because of incorrect version");
co_return stop_iteration::no;
}
auto id = row.get_as<utils::UUID>("id");
auto written_at = row.get_as<db_clock::time_point>("written_at");
auto data = row.get_blob_fragmented("data");
auto& sp = _qp.proxy();
utils::get_local_injector().inject("batchlog_manager_fail_migration", [] { throw std::runtime_error("Error injection: failing batchlog migration"); });
auto migrate_mut = get_batchlog_mutation_for(schema_v2, std::move(data), version, batchlog_stage::failed_replay, written_at, id);
co_await sp.mutate_locally(migrate_mut, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
mutation delete_mut(schema_v1, partition_key::from_single_value(*schema_v1, serialized(id)));
delete_mut.partition().apply_delete(*schema_v1, clustering_key_prefix::make_empty(), tombstone(api::new_timestamp(), gc_clock::now()));
co_await sp.mutate_locally(delete_mut, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
co_return stop_iteration::no;
};
try {
co_await _qp.query_internal(
format("SELECT * FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG),
db::consistency_level::ONE,
{},
page_size,
std::move(batch));
} catch (...) {
blogger.warn("Batchlog v1 to v2 migration failed: {}; will retry", std::current_exception());
co_return;
}
co_await container().invoke_on_all([] (auto& bm) {
bm._migration_done = true;
});
blogger.info("Done migrating batchlog entries from v1 -> v2");
});
db_clock::duration db::batchlog_manager::get_batch_log_timeout() const {
// enough time for the actual write + BM removal mutation
return _write_request_timeout * 2;
}
future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches(post_replay_cleanup cleanup) {
co_await maybe_migrate_v1_to_v2();
typedef db_clock::rep clock_type;
db::all_batches_replayed all_replayed = all_batches_replayed::yes;
@@ -309,26 +172,21 @@ future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches
auto throttle = _replay_rate / _qp.proxy().get_token_metadata_ptr()->count_normal_token_owners();
auto limiter = make_lw_shared<utils::rate_limiter>(throttle);
auto schema = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG_V2);
struct replay_stats {
std::optional<db_clock::time_point> min_too_fresh;
bool need_cleanup = false;
auto schema = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG);
auto delete_batch = [this, schema = std::move(schema)] (utils::UUID id) {
auto key = partition_key::from_singular(*schema, id);
mutation m(schema, key);
auto now = service::client_state(service::client_state::internal_tag()).get_timestamp();
m.partition().apply_delete(*schema, clustering_key_prefix::make_empty(), tombstone(now, gc_clock::now()));
return _qp.proxy().mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
};
std::unordered_map<int32_t, replay_stats> replay_stats_per_shard;
// Use a stable `now` accross all batches, so skip/replay decisions are the
// same accross a while prefix of written_at (accross all ids).
const auto now = db_clock::now();
auto batch = [this, cleanup, limiter, schema, &all_replayed, &replay_stats_per_shard, now] (const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
const auto stage = static_cast<batchlog_stage>(row.get_as<int8_t>("stage"));
const auto batch_shard = row.get_as<int32_t>("shard");
auto batch = [this, limiter, delete_batch = std::move(delete_batch), &all_replayed](const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
auto written_at = row.get_as<db_clock::time_point>("written_at");
auto id = row.get_as<utils::UUID>("id");
// enough time for the actual write + batchlog entry mutation delivery (two separate requests).
auto timeout = _replay_timeout;
auto now = db_clock::now();
auto timeout = get_batch_log_timeout();
if (utils::get_local_injector().is_enabled("skip_batch_replay")) {
blogger.debug("Skipping batch replay due to skip_batch_replay injection");
@@ -336,48 +194,52 @@ future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches
co_return stop_iteration::no;
}
// check version of serialization format
if (!row.has("version")) {
blogger.warn("Skipping logged batch because of unknown version");
co_await delete_batch(id);
co_return stop_iteration::no;
}
auto version = row.get_as<int32_t>("version");
if (version != netw::messaging_service::current_version) {
blogger.warn("Skipping logged batch because of incorrect version {}; current version = {}", version, netw::messaging_service::current_version);
co_await delete_batch(id);
co_return stop_iteration::no;
}
auto data = row.get_blob_unfragmented("data");
blogger.debug("Replaying batch {} from stage {} and batch shard {}", id, int32_t(stage), batch_shard);
utils::chunked_vector<mutation> mutations;
bool send_failed = false;
auto& shard_written_at = replay_stats_per_shard.try_emplace(batch_shard, replay_stats{}).first->second;
blogger.debug("Replaying batch {}", id);
try {
utils::chunked_vector<std::pair<canonical_mutation, schema_ptr>> fms;
auto fms = make_lw_shared<std::deque<canonical_mutation>>();
auto in = ser::as_input_stream(data);
while (in.size()) {
auto fm = ser::deserialize(in, std::type_identity<canonical_mutation>());
const auto tbl = _qp.db().try_find_table(fm.column_family_id());
if (!tbl) {
continue;
}
if (written_at <= tbl->get_truncation_time()) {
continue;
}
schema_ptr s = tbl->schema();
if (s->tombstone_gc_options().mode() == tombstone_gc_mode::repair) {
timeout = std::min(timeout, std::chrono::duration_cast<db_clock::duration>(s->tombstone_gc_options().propagation_delay_in_seconds()));
}
fms.emplace_back(std::move(fm), std::move(s));
fms->emplace_back(ser::deserialize(in, std::type_identity<canonical_mutation>()));
schema_ptr s = _qp.db().find_schema(fms->back().column_family_id());
timeout = std::min(timeout, std::chrono::duration_cast<db_clock::duration>(s->tombstone_gc_options().propagation_delay_in_seconds()));
}
if (now < written_at + timeout) {
blogger.debug("Skipping replay of {}, too fresh", id);
shard_written_at.min_too_fresh = std::min(shard_written_at.min_too_fresh.value_or(written_at), written_at);
co_return stop_iteration::no;
}
auto size = data.size();
for (const auto& [fm, s] : fms) {
mutations.emplace_back(fm.to_mutation(s));
co_await maybe_yield();
}
auto mutations = co_await map_reduce(*fms, [this, written_at] (canonical_mutation& fm) {
const auto& cf = _qp.proxy().local_db().find_column_family(fm.column_family_id());
return make_ready_future<canonical_mutation*>(written_at > cf.get_truncation_time() ? &fm : nullptr);
},
utils::chunked_vector<mutation>(),
[this] (utils::chunked_vector<mutation> mutations, canonical_mutation* fm) {
if (fm) {
schema_ptr s = _qp.db().find_schema(fm->column_family_id());
mutations.emplace_back(fm->to_mutation(s));
}
return mutations;
});
if (!mutations.empty()) {
const auto ttl = [written_at]() -> clock_type {
@@ -403,11 +265,7 @@ future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches
co_await limiter->reserve(size);
_stats.write_attempts += mutations.size();
auto timeout = db::timeout_clock::now() + write_timeout;
if (cleanup) {
co_await _qp.proxy().send_batchlog_replay_to_all_replicas(mutations, timeout);
} else {
co_await _qp.proxy().send_batchlog_replay_to_all_replicas(std::move(mutations), timeout);
}
co_await _qp.proxy().send_batchlog_replay_to_all_replicas(std::move(mutations), timeout);
}
}
} catch (data_dictionary::no_such_keyspace& ex) {
@@ -421,80 +279,31 @@ future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches
// Do _not_ remove the batch, assuning we got a node write error.
// Since we don't have hints (which origin is satisfied with),
// we have to resort to keeping this batch to next lap.
if (!cleanup || stage == batchlog_stage::failed_replay) {
co_return stop_iteration::no;
}
send_failed = true;
co_return stop_iteration::no;
}
auto& sp = _qp.proxy();
if (send_failed) {
blogger.debug("Moving batch {} to stage failed_replay", id);
auto m = get_batchlog_mutation_for(schema, mutations, netw::messaging_service::current_version, batchlog_stage::failed_replay, written_at, id);
co_await sp.mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
}
// delete batch
auto m = get_batchlog_delete_mutation(schema, netw::messaging_service::current_version, stage, written_at, id);
co_await _qp.proxy().mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
shard_written_at.need_cleanup = true;
co_await delete_batch(id);
co_return stop_iteration::no;
};
co_await with_gate(_gate, [this, cleanup, &all_replayed, batch = std::move(batch), now, &replay_stats_per_shard] () mutable -> future<> {
blogger.debug("Started replayAllFailedBatches with cleanup: {}", cleanup);
co_await with_gate(_gate, [this, cleanup, batch = std::move(batch)] () mutable -> future<> {
blogger.debug("Started replayAllFailedBatches (cpu {})", this_shard_id());
co_await utils::get_local_injector().inject("add_delay_to_batch_replay", std::chrono::milliseconds(1000));
auto schema = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG_V2);
co_await coroutine::parallel_for_each(std::views::iota(0, 16), [&] (int32_t chunk) -> future<> {
const int32_t batchlog_chunk_base = chunk * 16;
for (int32_t i = 0; i < 16; ++i) {
int32_t batchlog_shard = batchlog_chunk_base + i;
co_await _qp.query_internal(
format("SELECT * FROM {}.{} WHERE version = ? AND stage = ? AND shard = ? BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG_V2),
db::consistency_level::ONE,
{data_value(netw::messaging_service::current_version), data_value(int8_t(batchlog_stage::failed_replay)), data_value(batchlog_shard)},
page_size,
batch);
co_await _qp.query_internal(
format("SELECT * FROM {}.{} WHERE version = ? AND stage = ? AND shard = ? BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG_V2),
db::consistency_level::ONE,
{data_value(netw::messaging_service::current_version), data_value(int8_t(batchlog_stage::initial)), data_value(batchlog_shard)},
page_size,
batch);
if (cleanup != post_replay_cleanup::yes) {
continue;
}
auto it = replay_stats_per_shard.find(batchlog_shard);
if (it == replay_stats_per_shard.end() || !it->second.need_cleanup) {
// Nothing was replayed on this batchlog shard, nothing to cleanup.
continue;
}
const auto write_time = it->second.min_too_fresh.value_or(now - _replay_timeout);
const auto end_weight = it->second.min_too_fresh ? bound_weight::before_all_prefixed : bound_weight::after_all_prefixed;
auto [key, ckey] = get_batchlog_key(*schema, netw::messaging_service::current_version, batchlog_stage::initial, batchlog_shard, write_time, {});
auto end_pos = position_in_partition(partition_region::clustered, end_weight, std::move(ckey));
range_tombstone rt(position_in_partition::before_all_clustered_rows(), std::move(end_pos), tombstone(api::new_timestamp(), gc_clock::now()));
blogger.trace("Clean up batchlog shard {} with range tombstone {}", batchlog_shard, rt);
mutation m(schema, key);
m.partition().apply_row_tombstone(*schema, std::move(rt));
co_await _qp.proxy().mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
co_await _qp.query_internal(
format("SELECT id, data, written_at, version FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG),
db::consistency_level::ONE,
{},
page_size,
std::move(batch)).then([this, cleanup] {
if (cleanup == post_replay_cleanup::no) {
return make_ready_future<>();
}
// Replaying batches could have generated tombstones, flush to disk,
// where they can be compacted away.
return replica::database::flush_table_on_all_shards(_qp.proxy().get_db(), system_keyspace::NAME, system_keyspace::BATCHLOG);
}).then([] {
blogger.debug("Finished replayAllFailedBatches");
});
blogger.debug("Finished replayAllFailedBatches with all_replayed: {}", all_replayed);
});
co_return all_replayed;

View File

@@ -34,17 +34,12 @@ class system_keyspace;
using all_batches_replayed = bool_class<struct all_batches_replayed_tag>;
struct batchlog_manager_config {
db_clock::duration replay_timeout;
std::chrono::duration<double> write_request_timeout;
uint64_t replay_rate = std::numeric_limits<uint64_t>::max();
std::chrono::milliseconds delay = std::chrono::milliseconds(0);
unsigned replay_cleanup_after_replays;
};
enum class batchlog_stage : int8_t {
initial,
failed_replay
};
class batchlog_manager : public peering_sharded_service<batchlog_manager> {
public:
using post_replay_cleanup = bool_class<class post_replay_cleanup_tag>;
@@ -64,7 +59,7 @@ private:
cql3::query_processor& _qp;
db::system_keyspace& _sys_ks;
db_clock::duration _replay_timeout;
db_clock::duration _write_request_timeout;
uint64_t _replay_rate;
std::chrono::milliseconds _delay;
unsigned _replay_cleanup_after_replays = 100;
@@ -76,14 +71,6 @@ private:
gc_clock::time_point _last_replay;
// Was the v1 -> v2 migration already done since last restart?
// The migration is attempted once after each restart. This is redundant but
// keeps thing simple. Once no upgrade path exists from a ScyllaDB version
// which can still produce v1 entries, this migration code can be removed.
bool _migration_done = false;
future<> maybe_migrate_v1_to_v2();
future<all_batches_replayed> replay_all_failed_batches(post_replay_cleanup cleanup);
public:
// Takes a QP, not a distributes. Because this object is supposed
@@ -98,13 +85,10 @@ public:
future<all_batches_replayed> do_batch_log_replay(post_replay_cleanup cleanup);
future<size_t> count_all_batches() const;
db_clock::duration get_batch_log_timeout() const;
gc_clock::time_point get_last_replay() const {
return _last_replay;
}
const stats& stats() const {
return _stats;
}
private:
future<> batchlog_replay_loop();
};

View File

@@ -54,14 +54,12 @@ public:
uint64_t applied_mutations = 0;
uint64_t corrupt_bytes = 0;
uint64_t truncated_at = 0;
uint64_t broken_files = 0;
stats& operator+=(const stats& s) {
invalid_mutations += s.invalid_mutations;
skipped_mutations += s.skipped_mutations;
applied_mutations += s.applied_mutations;
corrupt_bytes += s.corrupt_bytes;
broken_files += s.broken_files;
return *this;
}
stats operator+(const stats& s) const {
@@ -194,8 +192,6 @@ db::commitlog_replayer::impl::recover(const commitlog::descriptor& d, const comm
s->corrupt_bytes += e.bytes();
} catch (commitlog::segment_truncation& e) {
s->truncated_at = e.position();
} catch (commitlog::header_checksum_error&) {
++s->broken_files;
} catch (...) {
throw;
}
@@ -374,9 +370,6 @@ future<> db::commitlog_replayer::recover(std::vector<sstring> files, sstring fna
if (stats.truncated_at != 0) {
rlogger.warn("Truncated file: {} at position {}.", f, stats.truncated_at);
}
if (stats.broken_files != 0) {
rlogger.warn("Corrupted file header: {}. Skipped.", f);
}
rlogger.debug("Log replay of {} complete, {} replayed mutations ({} invalid, {} skipped)"
, f
, stats.applied_mutations

View File

@@ -1152,7 +1152,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
"Number of threads with which to deliver hints. In multiple data-center deployments, consider increasing this number because cross data-center handoff is generally slower.")
, batchlog_replay_throttle_in_kb(this, "batchlog_replay_throttle_in_kb", value_status::Unused, 1024,
"Total maximum throttle. Throttling is reduced proportionally to the number of nodes in the cluster.")
, batchlog_replay_cleanup_after_replays(this, "batchlog_replay_cleanup_after_replays", liveness::LiveUpdate, value_status::Used, 1,
, batchlog_replay_cleanup_after_replays(this, "batchlog_replay_cleanup_after_replays", liveness::LiveUpdate, value_status::Used, 60,
"Clean up batchlog memtable after every N replays. Replays are issued on a timer, every 60 seconds. So if batchlog_replay_cleanup_after_replays is set to 60, the batchlog memtable is flushed every 60 * 60 seconds.")
/**
* @Group Request scheduler properties

View File

@@ -248,7 +248,7 @@ future<db::commitlog> hint_endpoint_manager::add_store() noexcept {
// which is larger than the segment ID of the RP of the last written hint.
cfg.base_segment_id = _last_written_rp.base_id();
return commitlog::create_commitlog(std::move(cfg)).then([this] (this auto, commitlog l) -> future<commitlog> {
return commitlog::create_commitlog(std::move(cfg)).then([this] (commitlog l) -> future<commitlog> {
// add_store() is triggered every time hint files are forcefully flushed to I/O (every hints_flush_period).
// When this happens we want to refill _sender's segments only if it has finished with the segments he had before.
if (_sender.have_segments()) {

View File

@@ -135,5 +135,5 @@ const std::string db::object_storage_endpoint_param::gs_type = "gs";
auto fmt::formatter<db::object_storage_endpoint_param>::format(const db::object_storage_endpoint_param& e, fmt::format_context& ctx) const
-> decltype(ctx.out()) {
return fmt::format_to(ctx.out(), "object_storage_endpoint_param{}", e.to_json_string());
return fmt::format_to(ctx.out(), "object_storage_endpoint_param{{}}", e.to_json_string());
}

View File

@@ -1262,9 +1262,16 @@ static future<> do_merge_schema(sharded<service::storage_proxy>& proxy, sharded
{
slogger.trace("do_merge_schema: {}", mutations);
schema_applier ap(proxy, ss, sys_ks, reload);
co_await execute_do_merge_schema(proxy, ap, std::move(mutations)).finally([&ap]() {
return ap.destroy();
});
std::exception_ptr ex;
try {
co_await execute_do_merge_schema(proxy, ap, std::move(mutations));
} catch (...) {
ex = std::current_exception();
}
co_await ap.destroy();
if (ex) {
throw ex;
}
}
/**

View File

@@ -65,7 +65,7 @@ future<> snapshot_ctl::run_snapshot_modify_operation(noncopyable_function<future
});
}
future<> snapshot_ctl::take_snapshot(sstring tag, std::vector<sstring> keyspace_names, skip_flush sf) {
future<> snapshot_ctl::take_snapshot(sstring tag, std::vector<sstring> keyspace_names, snapshot_options opts) {
if (tag.empty()) {
throw std::runtime_error("You must supply a snapshot name.");
}
@@ -74,21 +74,21 @@ future<> snapshot_ctl::take_snapshot(sstring tag, std::vector<sstring> keyspace_
std::ranges::copy(_db.local().get_keyspaces() | std::views::keys, std::back_inserter(keyspace_names));
};
return run_snapshot_modify_operation([tag = std::move(tag), keyspace_names = std::move(keyspace_names), sf, this] () mutable {
return do_take_snapshot(std::move(tag), std::move(keyspace_names), sf);
return run_snapshot_modify_operation([tag = std::move(tag), keyspace_names = std::move(keyspace_names), opts, this] () mutable {
return do_take_snapshot(std::move(tag), std::move(keyspace_names), opts);
});
}
future<> snapshot_ctl::do_take_snapshot(sstring tag, std::vector<sstring> keyspace_names, skip_flush sf) {
future<> snapshot_ctl::do_take_snapshot(sstring tag, std::vector<sstring> keyspace_names, snapshot_options opts) {
co_await coroutine::parallel_for_each(keyspace_names, [tag, this] (const auto& ks_name) {
return check_snapshot_not_exist(ks_name, tag);
});
co_await coroutine::parallel_for_each(keyspace_names, [this, tag = std::move(tag), sf] (const auto& ks_name) {
return replica::database::snapshot_keyspace_on_all_shards(_db, ks_name, tag, bool(sf));
co_await coroutine::parallel_for_each(keyspace_names, [this, tag = std::move(tag), opts] (const auto& ks_name) {
return replica::database::snapshot_keyspace_on_all_shards(_db, ks_name, tag, opts);
});
}
future<> snapshot_ctl::take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, skip_flush sf) {
future<> snapshot_ctl::take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts) {
if (ks_name.empty()) {
throw std::runtime_error("You must supply a keyspace name");
}
@@ -99,14 +99,14 @@ future<> snapshot_ctl::take_column_family_snapshot(sstring ks_name, std::vector<
throw std::runtime_error("You must supply a snapshot name.");
}
return run_snapshot_modify_operation([this, ks_name = std::move(ks_name), tables = std::move(tables), tag = std::move(tag), sf] () mutable {
return do_take_column_family_snapshot(std::move(ks_name), std::move(tables), std::move(tag), sf);
return run_snapshot_modify_operation([this, ks_name = std::move(ks_name), tables = std::move(tables), tag = std::move(tag), opts] () mutable {
return do_take_column_family_snapshot(std::move(ks_name), std::move(tables), std::move(tag), opts);
});
}
future<> snapshot_ctl::do_take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, skip_flush sf) {
future<> snapshot_ctl::do_take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts) {
co_await check_snapshot_not_exist(ks_name, tag, tables);
co_await replica::database::snapshot_tables_on_all_shards(_db, ks_name, std::move(tables), std::move(tag), bool(sf));
co_await replica::database::snapshot_tables_on_all_shards(_db, ks_name, std::move(tables), std::move(tag), opts);
}
future<> snapshot_ctl::clear_snapshot(sstring tag, std::vector<sstring> keyspace_names, sstring cf_name) {

View File

@@ -38,10 +38,13 @@ class backup_task_impl;
} // snapshot namespace
struct snapshot_options {
bool skip_flush = false;
bool use_sstable_identifier = false;
};
class snapshot_ctl : public peering_sharded_service<snapshot_ctl> {
public:
using skip_flush = bool_class<class skip_flush_tag>;
struct table_snapshot_details {
int64_t total;
int64_t live;
@@ -70,8 +73,8 @@ public:
*
* @param tag the tag given to the snapshot; may not be null or empty
*/
future<> take_snapshot(sstring tag, skip_flush sf = skip_flush::no) {
return take_snapshot(tag, {}, sf);
future<> take_snapshot(sstring tag, snapshot_options opts = {}) {
return take_snapshot(tag, {}, opts);
}
/**
@@ -80,7 +83,7 @@ public:
* @param tag the tag given to the snapshot; may not be null or empty
* @param keyspace_names the names of the keyspaces to snapshot; empty means "all"
*/
future<> take_snapshot(sstring tag, std::vector<sstring> keyspace_names, skip_flush sf = skip_flush::no);
future<> take_snapshot(sstring tag, std::vector<sstring> keyspace_names, snapshot_options opts = {});
/**
* Takes the snapshot of multiple tables. A snapshot name must be specified.
@@ -89,7 +92,7 @@ public:
* @param tables a vector of tables names to snapshot
* @param tag the tag given to the snapshot; may not be null or empty
*/
future<> take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, skip_flush sf = skip_flush::no);
future<> take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts = {});
/**
* Remove the snapshot with the given name from the given keyspaces.
@@ -127,8 +130,8 @@ private:
friend class snapshot::backup_task_impl;
future<> do_take_snapshot(sstring tag, std::vector<sstring> keyspace_names, skip_flush sf = skip_flush::no);
future<> do_take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, skip_flush sf = skip_flush::no);
future<> do_take_snapshot(sstring tag, std::vector<sstring> keyspace_names, snapshot_options opts = {} );
future<> do_take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts = {});
};
}

View File

@@ -110,7 +110,6 @@ namespace {
system_keyspace::v3::CDC_LOCAL,
system_keyspace::DICTS,
system_keyspace::VIEW_BUILDING_TASKS,
system_keyspace::CLIENT_ROUTES,
};
if (ks_name == system_keyspace::NAME && tables.contains(cf_name)) {
props.enable_schema_commitlog();
@@ -138,7 +137,8 @@ namespace {
system_keyspace::ROLE_PERMISSIONS,
system_keyspace::DICTS,
system_keyspace::VIEW_BUILDING_TASKS,
system_keyspace::CLIENT_ROUTES,
// repair tasks
system_keyspace::REPAIR_TASKS,
};
if (ks_name == system_keyspace::NAME && tables.contains(cf_name)) {
props.is_group0_table = true;
@@ -215,30 +215,6 @@ schema_ptr system_keyspace::batchlog() {
return batchlog;
}
schema_ptr system_keyspace::batchlog_v2() {
static thread_local auto batchlog_v2 = [] {
schema_builder builder(generate_legacy_id(NAME, BATCHLOG_V2), NAME, BATCHLOG_V2,
// partition key
{{"version", int32_type}, {"stage", byte_type}, {"shard", int32_type}},
// clustering key
{{"written_at", timestamp_type}, {"id", uuid_type}},
// regular columns
{{"data", bytes_type}},
// static columns
{},
// regular column name type
utf8_type,
// comment
"batches awaiting replay"
);
builder.set_gc_grace_seconds(0);
builder.set_caching_options(caching_options::get_disabled_caching_options());
builder.with_hash_version();
return builder.build(schema_builder::compact_storage::no);
}();
return batchlog_v2;
}
/*static*/ schema_ptr system_keyspace::paxos() {
static thread_local auto paxos = [] {
// FIXME: switch to the new schema_builder interface (with_column(...), etc)
@@ -311,7 +287,6 @@ schema_ptr system_keyspace::topology() {
.with_column("tablet_balancing_enabled", boolean_type, column_kind::static_column)
.with_column("upgrade_state", utf8_type, column_kind::static_column)
.with_column("global_requests", set_type_impl::get_instance(timeuuid_type, true), column_kind::static_column)
.with_column("paused_rf_change_requests", set_type_impl::get_instance(timeuuid_type, true), column_kind::static_column)
.set_comment("Current state of topology change machine")
.with_hash_version()
.build();
@@ -489,6 +464,24 @@ schema_ptr system_keyspace::repair_history() {
return schema;
}
schema_ptr system_keyspace::repair_tasks() {
static thread_local auto schema = [] {
auto id = generate_legacy_id(NAME, REPAIR_TASKS);
return schema_builder(NAME, REPAIR_TASKS, std::optional(id))
.with_column("task_uuid", uuid_type, column_kind::partition_key)
.with_column("operation", utf8_type, column_kind::clustering_key)
// First and last token for of the tablet
.with_column("first_token", long_type, column_kind::clustering_key)
.with_column("last_token", long_type, column_kind::clustering_key)
.with_column("timestamp", timestamp_type)
.with_column("table_uuid", uuid_type, column_kind::static_column)
.set_comment("Record tablet repair tasks")
.with_hash_version()
.build();
}();
return schema;
}
schema_ptr system_keyspace::built_indexes() {
static thread_local auto built_indexes = [] {
schema_builder builder(generate_legacy_id(NAME, BUILT_INDEXES), NAME, BUILT_INDEXES,
@@ -1418,23 +1411,6 @@ schema_ptr system_keyspace::view_building_tasks() {
return schema;
}
schema_ptr system_keyspace::client_routes() {
static thread_local auto schema = [] {
auto id = generate_legacy_id(NAME, CLIENT_ROUTES);
return schema_builder(NAME, CLIENT_ROUTES, std::make_optional(id))
.with_column("connection_id", utf8_type, column_kind::partition_key)
.with_column("host_id", uuid_type, column_kind::clustering_key)
.with_column("address", utf8_type)
.with_column("port", int32_type)
.with_column("tls_port", int32_type)
.with_column("alternator_port", int32_type)
.with_column("alternator_https_port", int32_type)
.with_hash_version()
.build();
}();
return schema;
}
future<system_keyspace::local_info> system_keyspace::load_local_info() {
auto msg = co_await execute_cql(format("SELECT host_id, cluster_name, data_center, rack FROM system.{} WHERE key=?", LOCAL), sstring(LOCAL));
@@ -2348,13 +2324,14 @@ std::vector<schema_ptr> system_keyspace::all_tables(const db::config& cfg) {
std::copy(schema_tables.begin(), schema_tables.end(), std::back_inserter(r));
auto auth_tables = system_keyspace::auth_tables();
std::copy(auth_tables.begin(), auth_tables.end(), std::back_inserter(r));
r.insert(r.end(), { built_indexes(), hints(), batchlog(), batchlog_v2(), paxos(), local(),
r.insert(r.end(), { built_indexes(), hints(), batchlog(), paxos(), local(),
peers(), peer_events(), range_xfers(),
compactions_in_progress(), compaction_history(),
sstable_activity(), size_estimates(), large_partitions(), large_rows(), large_cells(),
corrupt_data(),
scylla_local(), db::schema_tables::scylla_table_schema_history(),
repair_history(),
repair_tasks(),
v3::views_builds_in_progress(), v3::built_views(),
v3::scylla_views_builds_in_progress(),
v3::truncated(),
@@ -2362,7 +2339,7 @@ std::vector<schema_ptr> system_keyspace::all_tables(const db::config& cfg) {
v3::cdc_local(),
raft(), raft_snapshots(), raft_snapshot_config(), group0_history(), discovery(),
topology(), cdc_generations_v3(), topology_requests(), service_levels_v2(), view_build_status_v2(),
dicts(), view_building_tasks(), client_routes(), cdc_streams_state(), cdc_streams_history()
dicts(), view_building_tasks(), cdc_streams_state(), cdc_streams_history()
});
if (cfg.check_experimental(db::experimental_features_t::feature::BROADCAST_TABLES)) {
@@ -2379,9 +2356,7 @@ std::vector<schema_ptr> system_keyspace::all_tables(const db::config& cfg) {
}
static bool maybe_write_in_user_memory(schema_ptr s) {
return (s.get() == system_keyspace::batchlog().get())
|| (s.get() == system_keyspace::batchlog_v2().get())
|| (s.get() == system_keyspace::paxos().get())
return (s.get() == system_keyspace::batchlog().get()) || (s.get() == system_keyspace::paxos().get())
|| s == system_keyspace::v3::scylla_views_builds_in_progress();
}
@@ -2598,6 +2573,32 @@ future<> system_keyspace::get_repair_history(::table_id table_id, repair_history
});
}
future<utils::chunked_vector<canonical_mutation>> system_keyspace::get_update_repair_task_mutations(const repair_task_entry& entry, api::timestamp_type ts) {
// Default to timeout the repair task entries in 10 days, this should be enough time for the management tools to query
constexpr int ttl = 10 * 24 * 3600;
sstring req = format("INSERT INTO system.{} (task_uuid, operation, first_token, last_token, timestamp, table_uuid) VALUES (?, ?, ?, ?, ?, ?) USING TTL {}", REPAIR_TASKS, ttl);
auto muts = co_await _qp.get_mutations_internal(req, internal_system_query_state(), ts,
{entry.task_uuid.uuid(), repair_task_operation_to_string(entry.operation),
entry.first_token, entry.last_token, entry.timestamp, entry.table_uuid.uuid()});
utils::chunked_vector<canonical_mutation> cmuts = {muts.begin(), muts.end()};
co_return cmuts;
}
future<> system_keyspace::get_repair_task(tasks::task_id task_uuid, repair_task_consumer f) {
sstring req = format("SELECT * from system.{} WHERE task_uuid = {}", REPAIR_TASKS, task_uuid);
co_await _qp.query_internal(req, [&f] (const cql3::untyped_result_set::row& row) mutable -> future<stop_iteration> {
repair_task_entry ent;
ent.task_uuid = tasks::task_id(row.get_as<utils::UUID>("task_uuid"));
ent.operation = repair_task_operation_from_string(row.get_as<sstring>("operation"));
ent.first_token = row.get_as<int64_t>("first_token");
ent.last_token = row.get_as<int64_t>("last_token");
ent.timestamp = row.get_as<db_clock::time_point>("timestamp");
ent.table_uuid = ::table_id(row.get_as<utils::UUID>("table_uuid"));
co_await f(std::move(ent));
co_return stop_iteration::no;
});
}
future<gms::generation_type> system_keyspace::increment_and_get_generation() {
auto req = format("SELECT gossip_generation FROM system.{} WHERE key='{}'", LOCAL, LOCAL);
auto rs = co_await _qp.execute_internal(req, cql3::query_processor::cache_internal::yes);
@@ -3157,10 +3158,7 @@ static bool must_have_tokens(service::node_state nst) {
// A decommissioning node doesn't have tokens at the end, they are
// removed during transition to the left_token_ring state.
case service::node_state::decommissioning: return false;
// A removing node might or might not have tokens depending on whether
// REMOVENODE_WITH_LEFT_TOKEN_RING feature is enabled. To support both
// cases, we allow removing nodes to not have tokens.
case service::node_state::removing: return false;
case service::node_state::removing: return true;
case service::node_state::rebuilding: return true;
case service::node_state::normal: return true;
case service::node_state::left: return false;
@@ -3400,12 +3398,6 @@ future<service::topology> system_keyspace::load_topology_state(const std::unorde
}
}
if (some_row.has("paused_rf_change_requests")) {
for (auto&& v : deserialize_set_column(*topology(), some_row, "paused_rf_change_requests")) {
ret.paused_rf_change_requests.insert(value_cast<utils::UUID>(v));
}
}
if (some_row.has("enabled_features")) {
ret.enabled_features = decode_features(deserialize_set_column(*topology(), some_row, "enabled_features"));
}
@@ -3617,43 +3609,35 @@ system_keyspace::topology_requests_entry system_keyspace::topology_request_row_t
return entry;
}
future<system_keyspace::topology_requests_entry> system_keyspace::get_topology_request_entry(utils::UUID id) {
auto r = co_await get_topology_request_entry_opt(id);
if (!r) {
on_internal_error(slogger, format("no entry for request id {}", id));
}
co_return std::move(*r);
}
future<std::optional<system_keyspace::topology_requests_entry>> system_keyspace::get_topology_request_entry_opt(utils::UUID id) {
future<system_keyspace::topology_requests_entry> system_keyspace::get_topology_request_entry(utils::UUID id, bool require_entry) {
auto rs = co_await execute_cql(
format("SELECT * FROM system.{} WHERE id = {}", TOPOLOGY_REQUESTS, id));
if (!rs || rs->empty()) {
co_return std::nullopt;
if (require_entry) {
on_internal_error(slogger, format("no entry for request id {}", id));
} else {
co_return topology_requests_entry{
.id = utils::null_uuid()
};
}
}
const auto& row = rs->one();
co_return topology_request_row_to_entry(id, row);
}
future<system_keyspace::topology_requests_entries> system_keyspace::get_topology_request_entries(std::vector<std::variant<service::topology_request, service::global_topology_request>> request_types, db_clock::time_point end_time_limit) {
sstring request_types_str = "";
bool first = true;
for (const auto& rt : request_types) {
if (!std::exchange(first, false)) {
request_types_str += ", ";
}
request_types_str += std::visit([] (auto&& arg) { return fmt::format("'{}'", arg); }, rt);
}
future<system_keyspace::topology_requests_entries> system_keyspace::get_node_ops_request_entries(db_clock::time_point end_time_limit) {
// Running requests.
auto rs_running = co_await execute_cql(
format("SELECT * FROM system.{} WHERE done = false AND request_type IN ({}) ALLOW FILTERING", TOPOLOGY_REQUESTS, request_types_str));
format("SELECT * FROM system.{} WHERE done = false AND request_type IN ('{}', '{}', '{}', '{}', '{}') ALLOW FILTERING", TOPOLOGY_REQUESTS,
service::topology_request::join, service::topology_request::replace, service::topology_request::rebuild, service::topology_request::leave, service::topology_request::remove));
// Requests which finished after end_time_limit.
auto rs_done = co_await execute_cql(
format("SELECT * FROM system.{} WHERE end_time > {} AND request_type IN ({}) ALLOW FILTERING", TOPOLOGY_REQUESTS, end_time_limit.time_since_epoch().count(), request_types_str));
format("SELECT * FROM system.{} WHERE end_time > {} AND request_type IN ('{}', '{}', '{}', '{}', '{}') ALLOW FILTERING", TOPOLOGY_REQUESTS, end_time_limit.time_since_epoch().count(),
service::topology_request::join, service::topology_request::replace, service::topology_request::rebuild, service::topology_request::leave, service::topology_request::remove));
topology_requests_entries m;
for (const auto& row: *rs_done) {
@@ -3671,16 +3655,6 @@ future<system_keyspace::topology_requests_entries> system_keyspace::get_topology
co_return m;
}
future<system_keyspace::topology_requests_entries> system_keyspace::get_node_ops_request_entries(db_clock::time_point end_time_limit) {
return get_topology_request_entries({
service::topology_request::join,
service::topology_request::replace,
service::topology_request::rebuild,
service::topology_request::leave,
service::topology_request::remove
}, end_time_limit);
}
future<mutation> system_keyspace::get_insert_dict_mutation(
std::string_view name,
bytes data,
@@ -3796,4 +3770,35 @@ future<> system_keyspace::apply_mutation(mutation m) {
return _qp.proxy().mutate_locally(m, {}, db::commitlog::force_sync(m.schema()->static_props().wait_for_sync_to_commitlog), db::no_timeout);
}
// The names are persisted in system tables so should not be changed.
static const std::unordered_map<system_keyspace::repair_task_operation, sstring> repair_task_operation_to_name = {
{system_keyspace::repair_task_operation::requested, "requested"},
{system_keyspace::repair_task_operation::finished, "finished"},
};
static const std::unordered_map<sstring, system_keyspace::repair_task_operation> repair_task_operation_from_name = std::invoke([] {
std::unordered_map<sstring, system_keyspace::repair_task_operation> result;
for (auto&& [v, s] : repair_task_operation_to_name) {
result.emplace(s, v);
}
return result;
});
sstring system_keyspace::repair_task_operation_to_string(system_keyspace::repair_task_operation op) {
auto i = repair_task_operation_to_name.find(op);
if (i == repair_task_operation_to_name.end()) {
on_internal_error(slogger, format("Invalid repair task operation: {}", static_cast<int>(op)));
}
return i->second;
}
system_keyspace::repair_task_operation system_keyspace::repair_task_operation_from_string(const sstring& name) {
return repair_task_operation_from_name.at(name);
}
} // namespace db
auto fmt::formatter<db::system_keyspace::repair_task_operation>::format(const db::system_keyspace::repair_task_operation& op, fmt::format_context& ctx) const
-> decltype(ctx.out()) {
return fmt::format_to(ctx.out(), "{}", db::system_keyspace::repair_task_operation_to_string(op));
}

View File

@@ -57,6 +57,8 @@ namespace paxos {
struct topology_request_state;
class group0_guard;
class raft_group0_client;
}
namespace netw {
@@ -163,7 +165,6 @@ public:
static constexpr auto NAME = "system";
static constexpr auto HINTS = "hints";
static constexpr auto BATCHLOG = "batchlog";
static constexpr auto BATCHLOG_V2 = "batchlog_v2";
static constexpr auto PAXOS = "paxos";
static constexpr auto BUILT_INDEXES = "IndexInfo";
static constexpr auto LOCAL = "local";
@@ -185,6 +186,7 @@ public:
static constexpr auto RAFT_SNAPSHOTS = "raft_snapshots";
static constexpr auto RAFT_SNAPSHOT_CONFIG = "raft_snapshot_config";
static constexpr auto REPAIR_HISTORY = "repair_history";
static constexpr auto REPAIR_TASKS = "repair_tasks";
static constexpr auto GROUP0_HISTORY = "group0_history";
static constexpr auto DISCOVERY = "discovery";
static constexpr auto BROADCAST_KV_STORE = "broadcast_kv_store";
@@ -199,7 +201,6 @@ public:
static constexpr auto VIEW_BUILD_STATUS_V2 = "view_build_status_v2";
static constexpr auto DICTS = "dicts";
static constexpr auto VIEW_BUILDING_TASKS = "view_building_tasks";
static constexpr auto CLIENT_ROUTES = "client_routes";
// auth
static constexpr auto ROLES = "roles";
@@ -257,12 +258,12 @@ public:
static schema_ptr hints();
static schema_ptr batchlog();
static schema_ptr batchlog_v2();
static schema_ptr paxos();
static schema_ptr built_indexes(); // TODO (from Cassandra): make private
static schema_ptr raft();
static schema_ptr raft_snapshots();
static schema_ptr repair_history();
static schema_ptr repair_tasks();
static schema_ptr group0_history();
static schema_ptr discovery();
static schema_ptr broadcast_kv_store();
@@ -277,7 +278,6 @@ public:
static schema_ptr view_build_status_v2();
static schema_ptr dicts();
static schema_ptr view_building_tasks();
static schema_ptr client_routes();
// auth
static schema_ptr roles();
@@ -402,6 +402,22 @@ public:
int64_t range_end;
};
enum class repair_task_operation {
requested,
finished,
};
static sstring repair_task_operation_to_string(repair_task_operation op);
static repair_task_operation repair_task_operation_from_string(const sstring& name);
struct repair_task_entry {
tasks::task_id task_uuid;
repair_task_operation operation;
int64_t first_token;
int64_t last_token;
db_clock::time_point timestamp;
table_id table_uuid;
};
struct topology_requests_entry {
utils::UUID id;
utils::UUID initiating_host;
@@ -423,6 +439,10 @@ public:
using repair_history_consumer = noncopyable_function<future<>(const repair_history_entry&)>;
future<> get_repair_history(table_id, repair_history_consumer f);
future<utils::chunked_vector<canonical_mutation>> get_update_repair_task_mutations(const repair_task_entry& entry, api::timestamp_type ts);
using repair_task_consumer = noncopyable_function<future<>(const repair_task_entry&)>;
future<> get_repair_task(tasks::task_id task_uuid, repair_task_consumer f);
future<> save_truncation_record(const replica::column_family&, db_clock::time_point truncated_at, db::replay_position);
future<replay_positions> get_truncated_positions(table_id);
future<> drop_truncation_rp_records();
@@ -669,9 +689,7 @@ public:
future<service::topology_request_state> get_topology_request_state(utils::UUID id, bool require_entry);
topology_requests_entry topology_request_row_to_entry(utils::UUID id, const cql3::untyped_result_set_row& row);
future<topology_requests_entry> get_topology_request_entry(utils::UUID id);
future<std::optional<topology_requests_entry>> get_topology_request_entry_opt(utils::UUID id);
future<system_keyspace::topology_requests_entries> get_topology_request_entries(std::vector<std::variant<service::topology_request, service::global_topology_request>> request_types, db_clock::time_point end_time_limit);
future<topology_requests_entry> get_topology_request_entry(utils::UUID id, bool require_entry);
future<topology_requests_entries> get_node_ops_request_entries(db_clock::time_point end_time_limit);
public:
@@ -732,3 +750,8 @@ public:
}; // class system_keyspace
} // namespace db
template <>
struct fmt::formatter<db::system_keyspace::repair_task_operation> : fmt::formatter<string_view> {
auto format(const db::system_keyspace::repair_task_operation&, fmt::format_context& ctx) const -> decltype(ctx.out());
};

View File

@@ -198,7 +198,6 @@ future<> view_building_worker::register_staging_sstable_tasks(std::vector<sstabl
future<> view_building_worker::run_staging_sstables_registrator() {
while (!_as.abort_requested()) {
bool sleep = false;
try {
auto lock = co_await get_units(_staging_sstables_mutex, 1, _as);
co_await create_staging_sstable_tasks();
@@ -215,14 +214,6 @@ future<> view_building_worker::run_staging_sstables_registrator() {
vbw_logger.warn("Got group0_concurrent_modification while creating staging sstable tasks");
} catch (raft::request_aborted&) {
vbw_logger.warn("Got raft::request_aborted while creating staging sstable tasks");
} catch (...) {
vbw_logger.error("Exception while creating staging sstable tasks: {}", std::current_exception());
sleep = true;
}
if (sleep) {
vbw_logger.debug("Sleeping after exception.");
co_await seastar::sleep_abortable(1s, _as).handle_exception([] (auto x) { return make_ready_future<>(); });
}
}
}
@@ -426,12 +417,9 @@ future<> view_building_worker::check_for_aborted_tasks() {
auto my_host_id = vbw._db.get_token_metadata().get_topology().my_host_id();
auto my_replica = locator::tablet_replica{my_host_id, this_shard_id()};
auto it = vbw._state._batch->tasks.begin();
while (it != vbw._state._batch->tasks.end()) {
auto id = it->first;
auto task_opt = building_state.get_task(it->second.base_id, my_replica, id);
++it; // Advance the iterator before potentially removing the entry from the map.
auto tasks_map = vbw._state._batch->tasks; // Potentially, we'll remove elements from the map, so we need a copy to iterate over it
for (auto& [id, t]: tasks_map) {
auto task_opt = building_state.get_task(t.base_id, my_replica, id);
if (!task_opt || task_opt->get().aborted) {
co_await vbw._state._batch->abort_task(id);
}
@@ -461,7 +449,7 @@ static std::unordered_set<table_id> get_ids_of_all_views(replica::database& db,
}) | std::ranges::to<std::unordered_set>();;
}
// If `state::processing_base_table` is different that the `view_building_state::currently_processed_base_table`,
// If `state::processing_base_table` is diffrent that the `view_building_state::currently_processed_base_table`,
// clear the state, save and flush new base table
future<> view_building_worker::state::update_processing_base_table(replica::database& db, const view_building_state& building_state, abort_source& as) {
if (processing_base_table != building_state.currently_processed_base_table) {
@@ -583,6 +571,8 @@ future<> view_building_worker::batch::do_work() {
break;
}
}
_vbw.local()._vb_state_machine.event.broadcast();
}
future<> view_building_worker::do_build_range(table_id base_id, std::vector<table_id> views_ids, dht::token last_token, abort_source& as) {
@@ -784,15 +774,13 @@ future<std::vector<utils::UUID>> view_building_worker::work_on_tasks(raft::term_
tasks.insert({id, *task_opt});
}
#ifdef SEASTAR_DEBUG
{
auto& some_task = tasks.begin()->second;
for (auto& [_, t]: tasks) {
SCYLLA_ASSERT(t.base_id == some_task.base_id);
SCYLLA_ASSERT(t.last_token == some_task.last_token);
SCYLLA_ASSERT(t.replica == some_task.replica);
SCYLLA_ASSERT(t.type == some_task.type);
SCYLLA_ASSERT(t.replica.shard == this_shard_id());
}
auto& some_task = tasks.begin()->second;
for (auto& [_, t]: tasks) {
SCYLLA_ASSERT(t.base_id == some_task.base_id);
SCYLLA_ASSERT(t.last_token == some_task.last_token);
SCYLLA_ASSERT(t.replica == some_task.replica);
SCYLLA_ASSERT(t.type == some_task.type);
SCYLLA_ASSERT(t.replica.shard == this_shard_id());
}
#endif
@@ -823,6 +811,25 @@ future<std::vector<utils::UUID>> view_building_worker::work_on_tasks(raft::term_
co_return collect_completed_tasks();
}
}
}

View File

@@ -749,7 +749,6 @@ class clients_table : public streaming_virtual_table {
.with_column("ssl_protocol", utf8_type)
.with_column("username", utf8_type)
.with_column("scheduling_group", utf8_type)
.with_column("client_options", map_type_impl::get_instance(utf8_type, utf8_type, false))
.with_hash_version()
.build();
}
@@ -767,7 +766,7 @@ class clients_table : public streaming_virtual_table {
future<> execute(reader_permit permit, result_collector& result, const query_restrictions& qr) override {
// Collect
using client_data_vec = utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>;
using client_data_vec = utils::chunked_vector<client_data>;
using shard_client_data = std::vector<client_data_vec>;
std::vector<foreign_ptr<std::unique_ptr<shard_client_data>>> cd_vec;
cd_vec.resize(smp::count);
@@ -807,13 +806,13 @@ class clients_table : public streaming_virtual_table {
for (unsigned i = 0; i < smp::count; i++) {
for (auto&& ps_cdc : *cd_vec[i]) {
for (auto&& cd : ps_cdc) {
if (cd_map.contains(cd->ip)) {
cd_map[cd->ip].emplace_back(std::move(cd));
if (cd_map.contains(cd.ip)) {
cd_map[cd.ip].emplace_back(std::move(cd));
} else {
dht::decorated_key key = make_partition_key(cd->ip);
dht::decorated_key key = make_partition_key(cd.ip);
if (this_shard_owns(key) && contains_key(qr.partition_range(), key)) {
ips.insert(decorated_ip{std::move(key), cd->ip});
cd_map[cd->ip].emplace_back(std::move(cd));
ips.insert(decorated_ip{std::move(key), cd.ip});
cd_map[cd.ip].emplace_back(std::move(cd));
}
}
co_await coroutine::maybe_yield();
@@ -826,58 +825,39 @@ class clients_table : public streaming_virtual_table {
co_await result.emit_partition_start(dip.key);
auto& clients = cd_map[dip.ip];
std::ranges::sort(clients, [] (const foreign_ptr<std::unique_ptr<client_data>>& a, const foreign_ptr<std::unique_ptr<client_data>>& b) {
return a->port < b->port || a->client_type_str() < b->client_type_str();
std::ranges::sort(clients, [] (const client_data& a, const client_data& b) {
return a.port < b.port || a.client_type_str() < b.client_type_str();
});
for (const auto& cd : clients) {
clustering_row cr(make_clustering_key(cd->port, cd->client_type_str()));
set_cell(cr.cells(), "shard_id", cd->shard_id);
set_cell(cr.cells(), "connection_stage", cd->stage_str());
if (cd->driver_name) {
set_cell(cr.cells(), "driver_name", cd->driver_name->key());
clustering_row cr(make_clustering_key(cd.port, cd.client_type_str()));
set_cell(cr.cells(), "shard_id", cd.shard_id);
set_cell(cr.cells(), "connection_stage", cd.stage_str());
if (cd.driver_name) {
set_cell(cr.cells(), "driver_name", *cd.driver_name);
}
if (cd->driver_version) {
set_cell(cr.cells(), "driver_version", cd->driver_version->key());
if (cd.driver_version) {
set_cell(cr.cells(), "driver_version", *cd.driver_version);
}
if (cd->hostname) {
set_cell(cr.cells(), "hostname", *cd->hostname);
if (cd.hostname) {
set_cell(cr.cells(), "hostname", *cd.hostname);
}
if (cd->protocol_version) {
set_cell(cr.cells(), "protocol_version", *cd->protocol_version);
if (cd.protocol_version) {
set_cell(cr.cells(), "protocol_version", *cd.protocol_version);
}
if (cd->ssl_cipher_suite) {
set_cell(cr.cells(), "ssl_cipher_suite", *cd->ssl_cipher_suite);
if (cd.ssl_cipher_suite) {
set_cell(cr.cells(), "ssl_cipher_suite", *cd.ssl_cipher_suite);
}
if (cd->ssl_enabled) {
set_cell(cr.cells(), "ssl_enabled", *cd->ssl_enabled);
if (cd.ssl_enabled) {
set_cell(cr.cells(), "ssl_enabled", *cd.ssl_enabled);
}
if (cd->ssl_protocol) {
set_cell(cr.cells(), "ssl_protocol", *cd->ssl_protocol);
if (cd.ssl_protocol) {
set_cell(cr.cells(), "ssl_protocol", *cd.ssl_protocol);
}
set_cell(cr.cells(), "username", cd->username ? *cd->username : sstring("anonymous"));
if (cd->scheduling_group_name) {
set_cell(cr.cells(), "scheduling_group", *cd->scheduling_group_name);
set_cell(cr.cells(), "username", cd.username ? *cd.username : sstring("anonymous"));
if (cd.scheduling_group_name) {
set_cell(cr.cells(), "scheduling_group", *cd.scheduling_group_name);
}
auto map_type = map_type_impl::get_instance(
utf8_type,
utf8_type,
false
);
auto prepare_client_options = [] (const auto& client_options) {
map_type_impl::native_type tmp;
for (auto& co: client_options) {
auto map_element = std::make_pair(data_value(co.key.key()), data_value(co.value.key()));
tmp.push_back(std::move(map_element));
}
return tmp;
};
set_cell(cr.cells(), "client_options",
make_map_value(map_type, prepare_client_options(cd->client_options)));
co_await result.emit_row(std::move(cr));
}
co_await result.emit_partition_end();

View File

@@ -71,7 +71,7 @@ Use "Bash on Ubuntu on Windows" for the same tools and capabilities as on Linux
### Building the Docs
1. Run `make preview` in the `docs/` directory to build the documentation.
1. Run `make preview` to build the documentation.
1. Preview the built documentation locally at http://127.0.0.1:5500/.
### Cleanup

View File

@@ -41,8 +41,6 @@ class MetricsProcessor:
# Get metrics from the file
try:
metrics_file = metrics.get_metrics_from_file(relative_path, "scylla_", metrics_info, strict=strict)
except SystemExit:
pass
finally:
os.chdir(old_cwd)
if metrics_file:

View File

@@ -1,17 +1,17 @@
# Alternator: DynamoDB API in ScyllaDB
# Alternator: DynamoDB API in Scylla
## Introduction
Alternator is a ScyllaDB feature adding compatibility with Amazon DynamoDB(TM).
Alternator is a Scylla feature adding compatibility with Amazon DynamoDB(TM).
DynamoDB's API uses JSON-encoded requests and responses which are sent over
an HTTP or HTTPS transport. It is described in detail in Amazon's [DynamoDB
API Reference](https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/).
Our goal is that any application written to use Amazon DynamoDB could
be run, unmodified, against ScyllaDB with Alternator enabled. Alternator's
be run, unmodified, against Scylla with Alternator enabled. Alternator's
compatibility with DynamoDB is fairly complete, but users should be aware
of some differences and some unimplemented features. The extent of
Alternator's compatibility with DynamoDB is described in the
[ScyllaDB Alternator for DynamoDB users](compatibility.md) document,
[Scylla Alternator for DynamoDB users](compatibility.md) document,
which is updated as the work on Alternator progresses and compatibility
continues to improve.
@@ -19,8 +19,8 @@ Alternator also adds several features and APIs that are not available in
DynamoDB. These are described in [Alternator-specific APIs](new-apis.md).
## Running Alternator
By default, ScyllaDB does not listen for DynamoDB API requests. To enable
this API in ScyllaDB you must set at least two configuration options,
By default, Scylla does not listen for DynamoDB API requests. To enable
this API in Scylla you must set at least two configuration options,
**alternator_port** and **alternator_write_isolation**. For example in the
YAML configuration file:
```yaml
@@ -30,7 +30,7 @@ alternator_write_isolation: only_rmw_uses_lwt # or always, forbid or unsafe
or, equivalently, via command-line arguments: `--alternator-port=8000
--alternator-write-isolation=only_rmw_uses_lwt.
the **alternator_port** option determines on which port ScyllaDB listens for
the **alternator_port** option determines on which port Scylla listens for
DynamoDB API requests. By default, it listens on this port on all network
interfaces. To listen only on a specific interface, configure also the
**alternator_address** option.
@@ -41,12 +41,12 @@ Alternator has four different choices
for the implementation of writes, each with different advantages. You should
carefully consider which of the options makes more sense for your intended
use case and configure alternator_write_isolation accordingly. There is
currently no default for this option: Trying to run ScyllaDB with an Alternator
currently no default for this option: Trying to run Scylla with an Alternator
port selected but without configuring write isolation will result in an error message,
asking you to set it.
In addition to (or instead of) serving HTTP requests on alternator_port,
ScyllaDB can accept DynamoDB API requests over HTTPS (encrypted), on the port
Scylla can accept DynamoDB API requests over HTTPS (encrypted), on the port
specified by **alternator_https_port**. As usual for HTTPS servers, the
operator must specify certificate and key files. By default these should
be placed in `/etc/scylla/scylla.crt` and `/etc/scylla/scylla.key`, but
@@ -54,7 +54,7 @@ these default locations can overridden by specifying
`--alternator-encryption-options keyfile="..."` and
`--alternator-encryption-options certificate="..."`.
By default, ScyllaDB saves a snapshot of deleted tables. But Alternator does
By default, Scylla saves a snapshot of deleted tables. But Alternator does
not offer an API to restore these snapshots, so these snapshots are not useful
and waste disk space - deleting a table does not recover any disk space.
It is therefore recommended to disable this automatic-snapshotting feature
@@ -73,11 +73,11 @@ itself. Instructions, code and examples for doing this can be found in the
This section provides only a very brief introduction to Alternator's
design. A much more detailed document about the features of the DynamoDB
API and how they are, or could be, implemented in ScyllaDB can be found in:
API and how they are, or could be, implemented in Scylla can be found in:
<https://docs.google.com/document/d/1i4yjF5OSAazAY_-T8CBce9-2ykW4twx_E_Nt2zDoOVs>
Almost all of Alternator's source code (except some initialization code)
can be found in the alternator/ subdirectory of ScyllaDB's source code.
can be found in the alternator/ subdirectory of Scylla's source code.
Extensive functional tests can be found in the test/alternator
subdirectory. These tests are written in Python, and can be run against
both Alternator and Amazon's DynamoDB; This allows verifying that
@@ -85,15 +85,15 @@ Alternator's behavior matches the one observed on DynamoDB.
See test/alternator/README.md for more information about the tests and
how to run them.
With Alternator enabled on port 8000 (for example), every ScyllaDB node
With Alternator enabled on port 8000 (for example), every Scylla node
listens for DynamoDB API requests on this port. These requests, in
JSON format over HTTP, are parsed and result in calls to internal Scylla
C++ functions - there is no CQL generation or parsing involved.
In ScyllaDB terminology, the node receiving the request acts as the
In Scylla terminology, the node receiving the request acts as the
*coordinator*, and often passes the request on to one or more other nodes -
*replicas* which hold copies of the requested data.
Alternator tables are stored as ScyllaDB tables, each in a separate keyspace.
Alternator tables are stored as Scylla tables, each in a separate keyspace.
Each keyspace is initialized when the corresponding Alternator table is
created (with a CreateTable request). The replication factor (RF) for this
keyspace is chosen at that point, depending on the size of the cluster:
@@ -101,19 +101,19 @@ RF=3 is used on clusters with three or more nodes, and RF=1 is used for
smaller clusters. Such smaller clusters are, of course, only recommended
for tests because of the risk of data loss.
Each table in Alternator is stored as a ScyllaDB table in a separate
Each table in Alternator is stored as a Scylla table in a separate
keyspace. The DynamoDB key columns (hash and sort key) have known types,
and become partition and clustering key columns of the ScyllaDB table.
and become partition and clustering key columns of the Scylla table.
All other attributes may be different for each row, so are stored in one
map column in ScyllaDB, and not as separate columns.
map column in Scylla, and not as separate columns.
DynamoDB supports two consistency levels for reads, "eventual consistency"
and "strong consistency". These two modes are implemented using ScyllaDB's CL
and "strong consistency". These two modes are implemented using Scylla's CL
(consistency level) feature: All writes are done using the `LOCAL_QUORUM`
consistency level, then strongly-consistent reads are done with
`LOCAL_QUORUM`, while eventually-consistent reads are with just `LOCAL_ONE`.
In ScyllaDB (and its inspiration, Cassandra), high write performance is
In Scylla (and its inspiration, Cassandra), high write performance is
achieved by ensuring that writes do not require reads from disk.
The DynamoDB API, however, provides many types of requests that need a read
before the write (a.k.a. RMW requests - read-modify-write). For example,
@@ -121,7 +121,7 @@ a request may copy an existing attribute, increment an attribute,
be conditional on some expression involving existing values of attribute,
or request that the previous values of attributes be returned. These
read-modify-write transactions should be _isolated_ from each other, so
by default Alternator implements every write operation using ScyllaDB's
by default Alternator implements every write operation using Scylla's
LWT (lightweight transactions). This default can be overridden on a per-table
basis, by tagging the table as explained above in the "write isolation
policies" section.

View File

@@ -1,6 +1,6 @@
# ScyllaDB Alternator for DynamoDB users
ScyllaDB supports the DynamoDB API (this feature is codenamed "Alternator").
Scylla supports the DynamoDB API (this feature is codenamed "Alternator").
Our goal is to support any application written for Amazon DynamoDB.
Nevertheless, there are a few differences between DynamoDB and Scylla, and
and a few DynamoDB features that have not yet been implemented in Scylla.
@@ -8,16 +8,16 @@ The purpose of this document is to inform users of these differences.
## Provisioning
The most obvious difference between DynamoDB and ScyllaDB is that while
DynamoDB is a shared cloud service, ScyllaDB is a dedicated service running
The most obvious difference between DynamoDB and Scylla is that while
DynamoDB is a shared cloud service, Scylla is a dedicated service running
on your private cluster. Whereas DynamoDB allows you to "provision" the
number of requests per second you'll need - or at an extra cost not even
provision that - ScyllaDB requires you to provision your cluster. You need
provision that - Scylla requires you to provision your cluster. You need
to reason about the number and size of your nodes - not the throughput.
Moreover, DynamoDB's per-table provisioning (`BillingMode=PROVISIONED`) is
not yet supported by Scylla. The BillingMode and ProvisionedThroughput options
on a table need to be valid but are ignored, and ScyllaDB behaves like DynamoDB's
on a table need to be valid but are ignored, and Scylla behaves like DynamoDB's
`BillingMode=PAY_PER_REQUEST`: All requests are accepted without a per-table
throughput cap.
@@ -33,7 +33,7 @@ Instructions for doing this can be found in:
## Write isolation policies
ScyllaDB was designed to optimize the performance of pure write operations -
Scylla was designed to optimize the performance of pure write operations -
writes which do not need to read the previous value of the item.
In CQL, writes which do need the previous value of the item must explicitly
use the slower LWT ("LightWeight Transaction") feature to be correctly
@@ -79,11 +79,11 @@ a _higher_ timestamp - and this will be the "last write" that wins.
To avoid or mitigate this write reordering issue, users may consider
one or more of the following:
1. Use NTP to keep the clocks on the different ScyllaDB nodes synchronized.
1. Use NTP to keep the clocks on the different Scylla nodes synchronized.
If the delay between the two writes is longer than NTP's accuracy,
they will not be reordered.
2. If an application wants to ensure that two specific writes are not
reordered, it should send both requests to the same ScyllaDB node.
reordered, it should send both requests to the same Scylla node.
Care should be taken when using a load balancer - which might redirect
two requests to two different nodes.
3. Consider using the `always_use_lwt` write isolation policy.
@@ -210,7 +210,7 @@ CREATE SERVICE_LEVEL IF NOT EXISTS oltp WITH SHARES = 1000;
ATTACH SERVICE_LEVEL olap TO alice;
ATTACH SERVICE_LEVEL oltp TO bob;
```
Note that `alternator_enforce_authorization` has to be enabled in ScyllaDB configuration.
Note that `alternator_enforce_authorization` has to be enabled in Scylla configuration.
See [Authorization](##Authorization) section to learn more about roles and authorization.
See [Workload Prioritization](../features/workload-prioritization)
@@ -218,11 +218,11 @@ to read about Workload Prioritization in detail.
## Metrics
ScyllaDB has an advanced and extensive monitoring framework for inspecting
and graphing hundreds of different metrics of ScyllaDB's usage and performance.
ScyllaDB's monitoring stack, based on Grafana and Prometheus, is described in
Scylla has an advanced and extensive monitoring framework for inspecting
and graphing hundreds of different metrics of Scylla's usage and performance.
Scylla's monitoring stack, based on Grafana and Prometheus, is described in
<https://docs.scylladb.com/operating-scylla/monitoring/>.
This monitoring stack is different from DynamoDB's offering - but ScyllaDB's
This monitoring stack is different from DynamoDB's offering - but Scylla's
is significantly more powerful and gives the user better insights on
the internals of the database and its performance.
@@ -248,7 +248,7 @@ data in different partition order. Applications mustn't rely on that
undocumented order.
Note that inside each partition, the individual items will be sorted the same
in DynamoDB and ScyllaDB - determined by the _sort key_ defined for that table.
in DynamoDB and Scylla - determined by the _sort key_ defined for that table.
---
@@ -274,7 +274,7 @@ is different, or can be configured in Alternator:
## Experimental API features
Some DynamoDB API features are supported by Alternator, but considered
**experimental** in this release. An experimental feature in ScyllaDB is a
**experimental** in this release. An experimental feature in Scylla is a
feature whose functionality is complete, or mostly complete, but it is not
as thoroughly tested or optimized as regular features. Also, an experimental
feature's implementation is still subject to change and upgrades may not be
@@ -351,8 +351,8 @@ they should be easy to detect. Here is a list of these unimplemented features:
* The on-demand backup APIs are not supported: CreateBackup, DescribeBackup,
DeleteBackup, ListBackups, RestoreTableFromBackup.
For now, users can use ScyllaDB's existing backup solutions such as snapshots
or ScyllaDB Manager.
For now, users can use Scylla's existing backup solutions such as snapshots
or Scylla Manager.
<https://github.com/scylladb/scylla/issues/5063>
* Continuous backup (the ability to restore any point in time) is also not
@@ -370,7 +370,7 @@ they should be easy to detect. Here is a list of these unimplemented features:
<https://github.com/scylladb/scylla/issues/5068>
* DAX (DynamoDB Accelerator), an in-memory cache for DynamoDB, is not
available in for Alternator. Anyway, it should not be necessary - ScyllaDB's
available in for Alternator. Anyway, it should not be necessary - Scylla's
internal cache is already rather advanced and there is no need to place
another cache in front of the it. We wrote more about this here:
<https://www.scylladb.com/2017/07/31/database-caches-not-good/>
@@ -384,7 +384,7 @@ they should be easy to detect. Here is a list of these unimplemented features:
* The PartiQL syntax (SQL-like SELECT/UPDATE/INSERT/DELETE expressions)
and the operations ExecuteStatement, BatchExecuteStatement and
ExecuteTransaction are not yet supported.
A user that is interested in an SQL-like syntax can consider using ScyllaDB's
A user that is interested in an SQL-like syntax can consider using Scylla's
CQL protocol instead.
This feature was added to DynamoDB in November 2020.
<https://github.com/scylladb/scylla/issues/8787>
@@ -393,7 +393,7 @@ they should be easy to detect. Here is a list of these unimplemented features:
which is different from AWS's. In particular, the operations
DescribeContributorInsights, ListContributorInsights and
UpdateContributorInsights that configure Amazon's "CloudWatch Contributor
Insights" are not yet supported. ScyllaDB has different ways to retrieve the
Insights" are not yet supported. Scylla has different ways to retrieve the
same information, such as which items were accessed most often.
<https://github.com/scylladb/scylla/issues/8788>

View File

@@ -11,7 +11,7 @@ This section will guide you through the steps for setting up the cluster:
<https://hub.docker.com/r/scylladb/scylla/>, but add to every `docker run`
command a `-p 8000:8000` before the image name and
`--alternator-port=8000 --alternator-write-isolation=always` at the end.
The "alternator-port" option specifies on which port ScyllaDB will listen for
The "alternator-port" option specifies on which port Scylla will listen for
the (unencrypted) DynamoDB API, and the "alternator-write-isolation" chooses
whether or not Alternator will use LWT for every write.
For example,
@@ -24,10 +24,10 @@ This section will guide you through the steps for setting up the cluster:
By default, ScyllaDB run in this way will not have authentication or
authorization enabled, and any DynamoDB API request will be honored without
requiring them to be signed appropriately. See the
[ScyllaDB Alternator for DynamoDB users](compatibility.md#authentication-and-authorization)
[Scylla Alternator for DynamoDB users](compatibility.md#authentication-and-authorization)
document on how to configure authentication and authorization.
## Testing ScyllaDB's DynamoDB API support:
## Testing Scylla's DynamoDB API support:
### Running AWS Tic Tac Toe demo app to test the cluster:
1. Follow the instructions on the [AWS github page](https://github.com/awsdocs/amazon-dynamodb-developer-guide/blob/master/doc_source/TicTacToe.Phase1.md)
2. Enjoy your tic-tac-toe game :-)

View File

@@ -2,9 +2,9 @@
Alternator's primary goal is to be compatible with Amazon DynamoDB(TM)
and its APIs, so that any application written to use Amazon DynamoDB could
be run, unmodified, against ScyllaDB with Alternator enabled. The extent of
be run, unmodified, against Scylla with Alternator enabled. The extent of
Alternator's compatibility with DynamoDB is described in the
[ScyllaDB Alternator for DynamoDB users](compatibility.md) document.
[Scylla Alternator for DynamoDB users](compatibility.md) document.
But Alternator also adds several features and APIs that are not available in
DynamoDB. These Alternator-specific APIs are documented here.
@@ -15,7 +15,7 @@ _conditional_ update or an update based on the old value of an attribute.
The read and the write should be treated as a single transaction - protected
(_isolated_) from other parallel writes to the same item.
Alternator could do this isolation by using ScyllaDB's LWT (lightweight
Alternator could do this isolation by using Scylla's LWT (lightweight
transactions) for every write operation, but this significantly slows
down writes, and not necessary for workloads which don't use read-modify-write
(RMW) updates.
@@ -41,7 +41,7 @@ isolation policy for a specific table can be overridden by tagging the table
which need a read before the write. An attempt to use such statements
(e.g., UpdateItem with a ConditionExpression) will result in an error.
In this mode, the remaining write requests which are allowed - pure writes
without a read - are performed using standard ScyllaDB writes, not LWT,
without a read - are performed using standard Scylla writes, not LWT,
so they are significantly faster than they would have been in the
`always_use_lwt`, but their isolation is still correct.
@@ -65,19 +65,19 @@ isolation policy for a specific table can be overridden by tagging the table
read-modify-write updates. This mode is not recommended for any use case,
and will likely be removed in the future.
## Accessing system tables from ScyllaDB
ScyllaDB exposes lots of useful information via its internal system tables,
## Accessing system tables from Scylla
Scylla exposes lots of useful information via its internal system tables,
which can be found in system keyspaces: 'system', 'system\_auth', etc.
In order to access to these tables via alternator interface,
Scan and Query requests can use a special table name:
`.scylla.alternator.KEYSPACE_NAME.TABLE_NAME`
which will return results fetched from corresponding ScyllaDB table.
which will return results fetched from corresponding Scylla table.
This interface can be used only to fetch data from system tables.
Attempts to read regular tables via the virtual interface will result
in an error.
Example: in order to query the contents of ScyllaDB's `system.large_rows`,
Example: in order to query the contents of Scylla's `system.large_rows`,
pass `TableName='.scylla.alternator.system.large_rows'` to a Query/Scan
request.
@@ -113,14 +113,14 @@ connection (either active or idle), not necessarily an active request as
in Alternator.
## Service discovery
As explained in [ScyllaDB Alternator for DynamoDB users](compatibility.md),
As explained in [Scylla Alternator for DynamoDB users](compatibility.md),
Alternator requires a load-balancer or a client-side load-balancing library
to distribute requests between all ScyllaDB nodes. This load-balancer needs
to be able to _discover_ the ScyllaDB nodes. Alternator provides two special
to distribute requests between all Scylla nodes. This load-balancer needs
to be able to _discover_ the Scylla nodes. Alternator provides two special
requests, `/` and `/localnodes`, to help with this service discovery, which
we will now explain.
Some setups know exactly which ScyllaDB nodes were brought up, so all that
Some setups know exactly which Scylla nodes were brought up, so all that
remains is to periodically verify that each node is still functional. The
easiest way to do this is to make an HTTP (or HTTPS) GET request to the node,
with URL `/`. This is a trivial GET request and does **not** need to be
@@ -133,10 +133,10 @@ $ curl http://localhost:8000/
healthy: localhost:8000
```
In other setups, the load balancer might not know which ScyllaDB nodes exist.
For example, it may be possible to add or remove ScyllaDB nodes without a
In other setups, the load balancer might not know which Scylla nodes exist.
For example, it may be possible to add or remove Scylla nodes without a
client-side load balancer knowing. For these setups we have the `/localnodes`
request that can be used to discover which ScyllaDB nodes exist: A load balancer
request that can be used to discover which Scylla nodes exist: A load balancer
that already knows at least one live node can discover the rest by sending
a `/localnodes` request to the known node. It's again an unauthenticated
HTTP (or HTTPS) GET request:
@@ -160,7 +160,7 @@ list the nodes in a specific _data center_ or _rack_. These options are
useful for certain use cases:
* A `dc` option (e.g., `/localnodes?dc=dc1`) can be passed to list the
nodes in a specific ScyllaDB data center, not the data center of the node
nodes in a specific Scylla data center, not the data center of the node
being contacted. This is useful when a client knowns of _some_ Scylla
node belonging to an unknown DC, but wants to list the nodes in _its_
DC, which it knows by name.
@@ -191,7 +191,7 @@ tells them to.
If you want to influence whether a specific Alternator table is created with tablets or vnodes,
you can do this by specifying the `system:initial_tablets` tag
(in earlier versions of ScyllaDB the tag was `experimental:initial_tablets`)
(in earlier versions of Scylla the tag was `experimental:initial_tablets`)
in the CreateTable operation. The value of this tag can be:
* Any valid integer as the value of this tag enables tablets.

View File

@@ -365,7 +365,7 @@ Modifying a keyspace with tablets enabled is possible and doesn't require any sp
- The replication factor (RF) can be increased or decreased by at most 1 at a time. To reach the desired RF value, modify the RF repeatedly.
- The ``ALTER`` statement rejects the ``replication_factor`` tag. List the DCs explicitly when altering a keyspace. See :ref:`NetworkTopologyStrategy <replication-strategy>`.
- An RF change cannot be requested while another RF change is pending for the same keyspace. Attempting to execute an ``ALTER`` statement in this scenario will fail with an explicit error. Wait for the ongoing RF change to complete before issuing another ``ALTER`` statement.
- If there's any other ongoing global topology operation, executing the ``ALTER`` statement will fail (with an explicit and specific error) and needs to be repeated.
- The ``ALTER`` statement may take longer than the regular query timeout, and even if it times out, it will continue to execute in the background.
- The replication strategy cannot be modified, as keyspaces with tablets only support ``NetworkTopologyStrategy``.
- The ``ALTER`` statement will fail if it would make the keyspace :term:`RF-rack-invalid <RF-rack-valid keyspace>`.
@@ -1043,8 +1043,6 @@ The following modes are available:
* - ``immediate``
- Tombstone GC is immediately performed. There is no wait time or repair requirement. This mode is useful for a table that uses the TWCS compaction strategy with no user deletes. After data is expired after TTL, ScyllaDB can perform compaction to drop the expired data immediately.
.. warning:: The ``repair`` mode is not supported for :term:`Colocated Tables <Colocated Table>` in this version.
.. _cql-per-table-tablet-options:
Per-table tablet options

View File

@@ -102,7 +102,6 @@ Additional Information
To learn more about TTL, and see a hands-on example, check out `this lesson <https://university.scylladb.com/courses/data-modeling/lessons/advanced-data-modeling/topic/expiring-data-with-ttl-time-to-live/>`_ on ScyllaDB University.
* `Video: Managing data expiration with Time-To-Live <https://www.youtube.com/watch?v=SXkbu7mFHeA>`_
* :doc:`Apache Cassandra Query Language (CQL) Reference </cql/index>`
* :doc:`KB Article:How to Change gc_grace_seconds for a Table </kb/gc-grace-seconds/>`
* :doc:`KB Article:Time to Live (TTL) and Compaction </kb/ttl-facts/>`

View File

@@ -74,8 +74,6 @@ The keys and values are:
as an indicator to which shard client wants to connect. The desired shard number
is calculated as: `desired_shard_no = client_port % SCYLLA_NR_SHARDS`.
Its value is a decimal representation of type `uint16_t`, by default `19142`.
- `CLIENT_OPTIONS` is a string containing a JSON object representation that
contains CQL Driver configuration, e.g. load balancing policy, retry policy, timeouts, etc.
Currently, one `SCYLLA_SHARDING_ALGORITHM` is defined,
`biased-token-round-robin`. To apply the algorithm,
@@ -238,26 +236,3 @@ the same mechanism for other protocol versions, such as CQLv4.
The feature is identified by the `SCYLLA_USE_METADATA_ID` key, which is meant to be sent
in the SUPPORTED message.
## Sending the CLIENT_ROUTES_CHANGE event
This extension allows a driver to update its connections when the
`system.client_routes` table is modified.
In some network topologies a specific mapping of addresses and ports is required (e.g.
to support Private Link). This mapping can change dynamically even when no nodes are
added or removed. The driver must adapt to those changes; otherwise connectivity can be
lost.
The extension is implemented as a new `EVENT` type: `CLIENT_ROUTES_CHANGE`. The event
body consists of:
- [string] change
- [string list] connection_ids
- [string list] host_ids
There is only one change value: `UPDATE_NODES`, which means at least one client route
was inserted, updated, or deleted.
Events already have a subscription mechanism similar to protocol extensions (that is,
the driver only receives the events it explicitly subscribed to), so no additional
`cql_protocol_extension` key is introduced for this feature.

View File

@@ -86,7 +86,6 @@ stateDiagram-v2
de_left_token_ring --> [*]
}
state removing {
re_left_token_ring : left_token_ring
re_tablet_draining : tablet_draining
re_tablet_migration : tablet_migration
re_write_both_read_old : write_both_read_old
@@ -99,8 +98,7 @@ stateDiagram-v2
re_tablet_draining --> re_write_both_read_old
re_write_both_read_old --> re_write_both_read_new: streaming completed
re_write_both_read_old --> re_rollback_to_normal: rollback
re_write_both_read_new --> re_left_token_ring
re_left_token_ring --> [*]
re_write_both_read_new --> [*]
}
rebuilding --> normal: streaming completed
decommissioning --> left: operation succeeded
@@ -124,10 +122,9 @@ Note that these are not all states, as there are other states specific to tablet
Writes to vnodes-based tables are going to both new and old replicas (new replicas means calculated according
to modified token ring), reads are using old replicas.
- `write_both_read_new` - as above, but reads are using new replicas.
- `left_token_ring` - the decommissioning or removing node left the token ring, but we still need to wait until other
nodes observe it and stop sending writes to this node. For decommission, we tell the node to shut down,
then remove it from group 0. For removenode, the node is already down, so we skip the shutdown step.
We also use this state to rollback a failed bootstrap or decommission.
- `left_token_ring` - the decommissioning node left the token ring, but we still need to wait until other
nodes observe it and stop sending writes to this node. Then, we tell the node to shut down and remove
it from group 0. We also use this state to rollback a failed bootstrap or decommission.
- `rollback_to_normal` - the decommission or removenode operation failed. Rollback the operation by
moving the node we tried to decommission/remove back to the normal state.
- `lock` - the topology stays in this state until externally changed (to null state), preventing topology
@@ -144,9 +141,7 @@ reads that started before this point exist in the system. Finally we remove the
transitioning state.
Decommission, removenode and replace work similarly, except they don't go through
`commit_cdc_generation`. Both decommission and removenode go through the
`left_token_ring` state to run a global barrier ensuring all nodes are aware
of the topology change before the operation completes.
`commit_cdc_generation`.
The state machine may also go only through the `commit_cdc_generation` state
after getting a request from the user to create a new CDC generation if the

View File

@@ -41,12 +41,12 @@ Unless the task was aborted, the worker will eventually reply that the task was
it temporarily saves list of ids of finished tasks and removes those tasks from group0 state (pernamently marking them as finished) in 200ms intervals. (*)
This batching of removing finished tasks is done in order to reduce number of generated group0 operations.
On the other hand, view building tasks can can also be aborted due to 2 main reasons:
On the other hand, view buildind tasks can can also be aborted due to 2 main reasons:
- a keyspace/view was dropped
- tablet operations (see [tablet operations section](#tablet-operations))
In the first case we simply delete relevant view building tasks as they are no longer needed.
But if a task needs to be aborted due to tablet operation, we're firstly setting the `aborted` flag to true. We need to do this because we need the task information
to create new adjusted tasks (if the operation succeeded) or rollback them (if the operation failed).
But if a task needs to be aborted due to tablet operation, we're firstly setting the `aborted` flag to true. We need to do this because we need the task informations
to created a new adjusted tasks (if the operation succeeded) or rollback them (if the operation failed).
Once a task is aborted by setting the flag, this cannot be revoked, so rolling back a task means creating its duplicate and removing the original task.
(*) - Because there is a time gap between when the coordinator learns that a task is finished (from the RPC response) and when the task is marked as completed,

View File

@@ -29,6 +29,9 @@ A CDC generation consists of:
This is the mapping used to decide on which stream IDs to use when making writes, as explained in the :doc:`./cdc-streams` document. It is a global property of the cluster: it doesn't depend on the table you're making writes to.
.. caution::
The tables mentioned in the following sections: ``system_distributed.cdc_generation_timestamps`` and ``system_distributed.cdc_streams_descriptions_v2`` have been introduced in ScyllaDB 4.4. It is highly recommended to upgrade to 4.4 for efficient CDC usage. The last section explains how to run the below examples in ScyllaDB 4.3.
When CDC generations change
---------------------------

View File

@@ -28,8 +28,7 @@ Incremental Repair is only supported for tables that use the tablets architectur
Incremental Repair Modes
------------------------
Incremental is currently disabled by default. You can control its behavior for a given repair operation using the ``incremental_mode`` parameter.
This is useful for enabling incremental repair, or in situations where you might need to force a full data validation.
While incremental repair is the default and recommended mode, you can control its behavior for a given repair operation using the ``incremental_mode`` parameter. This is useful for situations where you might need to force a full data validation.
The available modes are:

View File

@@ -17,7 +17,6 @@ This document highlights ScyllaDB's key data modeling features.
Workload Prioritization </features/workload-prioritization>
Backup and Restore </features/backup-and-restore>
Incremental Repair </features/incremental-repair/>
Vector Search </features/vector-search/>
.. panel-box::
:title: ScyllaDB Features
@@ -44,5 +43,3 @@ This document highlights ScyllaDB's key data modeling features.
* :doc:`Incremental Repair </features/incremental-repair/>` provides a much more
efficient and lightweight approach to maintaining data consistency by
repairing only the data that has changed since the last repair.
* :doc:`Vector Search in ScyllaDB </features/vector-search/>` enables
similarity-based queries on vector embeddings.

View File

@@ -1,55 +0,0 @@
=================================
Vector Search in ScyllaDB
=================================
.. note::
This feature is currently available only in `ScyllaDB Cloud <https://cloud.docs.scylladb.com/>`_.
What Is Vector Search
-------------------------
Vector Search enables similarity-based queries over high-dimensional data,
such as text, images, audio, or user behavior. Instead of searching for exact
matches, it allows applications to find items that are semantically similar to
a given input.
To do this, Vector Search works on vector embeddings, which are numerical
representations of data that capture semantic meaning. This enables queries
such as:
* “Find documents similar to this paragraph”
* “Find products similar to what the user just viewed”
* “Find previous tickets related to this support request”
Rather than relying on exact values or keywords, Vector Search returns results
based on distance or similarity between vectors. This capability is
increasingly used in modern workloads such as AI-powered search, recommendation
systems, and retrieval-augmented generation (RAG).
Why Vector Search Matters
------------------------------------
Many applications already rely on ScyllaDB for high throughput, low and
predictable latency, and large-scale data storage.
Vector Search complements these strengths by enabling new classes of workloads,
including:
* Semantic search over text or documents
* Recommendations based on user or item similarity
* AI and ML applications, including RAG pipelines
* Anomaly and pattern detection
With Vector Search, ScyllaDB can serve as the similarity search backend for
AI-driven applications.
Availability
--------------
Vector Search is currently available only in ScyllaDB Cloud, the fully managed
ScyllaDB service.
👉 For details on using Vector Search, refer to the
`ScyllaDB Cloud documentation <https://cloud.docs.scylladb.com/stable/vector-search/index.html>`_.

View File

@@ -20,10 +20,7 @@ You can run your ScyllaDB workloads on AWS, GCE, and Azure using a ScyllaDB imag
Amazon Web Services (AWS)
-----------------------------
The recommended instance types are :ref:`i3en <system-requirements-i3en-instances>`,
:ref:`i4i <system-requirements-i4i-instances>`, :ref:`i7i <system-requirements-i7i-instances>`,
:ref:`i7ie <system-requirements-i7ie-instances>`, :ref:`i8g<system-requirements-i8g-instances>`,
and :ref:`i8ge <system-requirements-i8ge-instances>`.
The recommended instance types are :ref:`i3en <system-requirements-i3en-instances>`, :ref:`i4i <system-requirements-i4i-instances>`, :ref:`i7i <system-requirements-i7i-instances>`, and :ref:`i7ie <system-requirements-i7ie-instances>`.
.. note::
@@ -198,118 +195,6 @@ All i7i instances have the following specs:
See `Amazon EC2 I7i Instances <https://aws.amazon.com/ec2/instance-types/i7i/>`_ for details.
.. _system-requirements-i8g-instances:
i8g instances
^^^^^^^^^^^^^^
The following i8g instances are supported.
.. list-table::
:widths: 30 20 20 30
:header-rows: 1
* - Model
- vCPU
- Mem (GiB)
- Storage (GB)
* - i8g.large
- 2
- 16
- 1 x 468 GB
* - i8g.xlarge
- 4
- 32
- 1 x 937 GB
* - i8g.2xlarge
- 8
- 64
- 1 x 1,875 GB
* - i8g.4xlarge
- 16
- 128
- 1 x 3,750 GB
* - i8g.8xlarge
- 32
- 256
- 2 x 3,750 GB
* - i8g.12xlarge
- 48
- 384
- 3 x 3,750 GB
* - i8g.16xlarge
- 64
- 512
- 4 x 3,750 GB
All i8g instances have the following specs:
* Powered by AWS Graviton4 processors
* 3rd generation AWS Nitro SSD storage
* DDR5-5600 memory for improved throughput
* Up to 100 Gbps of networking bandwidth and up to 60 Gbps of bandwidth to
Amazon Elastic Block Store (EBS)
* Instance sizes offer up to 45 TB of total local NVMe instance storage
See `Amazon EC2 I8g Instances <https://aws.amazon.com/ec2/instance-types/i8g/>`_ for details.
.. _system-requirements-i8ge-instances:
i8ge instances
^^^^^^^^^^^^^^
The following i8ge instances are supported.
.. list-table::
:widths: 30 20 20 30
:header-rows: 1
* - Model
- vCPU
- Mem (GiB)
- Storage (GB)
* - i8ge.large
- 2
- 16
- 1 x 1,250 GB
* - i8ge.xlarge
- 4
- 32
- 1 x 2,500 GB
* - i8ge.2xlarge
- 8
- 64
- 2 x 2,500 GB
* - i8ge.3xlarge
- 12
- 96
- 1 x 7,500 GB
* - i8ge.6xlarge
- 24
- 192
- 2 x 7,500 GB
* - i8ge.12xlarge
- 48
- 384
- 4 x 7,500 GB
* - i8ge.18xlarge
- 72
- 576
- 6 x 7,500 GB
All i8ge instances have the following specs:
* Powered by AWS Graviton4 processors
* 3rd generation AWS Nitro SSD storage
* DDR5-5600 memory for improved throughput
* Up to 300 Gbps of networking bandwidth and up to 60 Gbps of bandwidth to
Amazon Elastic Block Store (EBS)
* Instance sizes offer up to 120 TB of total local NVMe instance storage
See `Amazon EC2 I8g Instances <https://aws.amazon.com/ec2/instance-types/i8g/>`_ for details.
Im4gn and Is4gen instances
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ScyllaDB supports Arm-based Im4gn and Is4gen instances. See `Amazon EC2 Im4gn and Is4gen instances <https://aws.amazon.com/ec2/instance-types/i4g/>`_ for specification details.

View File

@@ -25,7 +25,8 @@ Getting Started
:id: "getting-started"
:class: my-panel
* :doc:`Install ScyllaDB </getting-started/install-scylla/index/>`
* `Install ScyllaDB (Binary Packages, Docker, or EC2) <https://www.scylladb.com/download/#core>`_ - Links to the ScyllaDB Download Center
* :doc:`Configure ScyllaDB </getting-started/system-configuration/>`
* :doc:`Run ScyllaDB in a Shared Environment </getting-started/scylla-in-a-shared-environment>`
* :doc:`Create a ScyllaDB Cluster - Single Data Center (DC) </operating-scylla/procedures/cluster-management/create-cluster/>`

View File

@@ -3,7 +3,8 @@
ScyllaDB Housekeeping and how to disable it
============================================
It is always recommended to run the latest stable version of ScyllaDB.
It is always recommended to run the latest version of ScyllaDB.
The latest stable release version is always available from the `Download Center <https://www.scylladb.com/download/>`_.
When you install ScyllaDB, it installs by default two services: **scylla-housekeeping-restart** and **scylla-housekeeping-daily**. These services check for the latest ScyllaDB version and prompt the user if they are using a version that is older than what is publicly available.
Information about your ScyllaDB deployment, including the ScyllaDB version currently used, as well as unique user and server identifiers, are collected by a centralized service.

View File

@@ -9,8 +9,6 @@ Running ``cluster repair`` on a **single node** synchronizes all data on all nod
To synchronize all data in clusters that have both tablets-based and vnodes-based keyspaces, run :doc:`nodetool repair -pr </operating-scylla/nodetool-commands/repair/>` on **all**
of the nodes in the cluster, and :doc:`nodetool cluster repair </operating-scylla/nodetool-commands/cluster/repair/>` on **any** of the nodes in the cluster.
.. warning:: :term:`Colocated Tables <Colocated Table>` cannot be synchronized using cluster repair in this version.
To check if a keyspace enables tablets, use:
.. code-block:: cql
@@ -55,13 +53,13 @@ ScyllaDB nodetool cluster repair command supports the following options:
nodetool cluster repair --tablet-tokens 1,10474535988
- ``--incremental-mode`` specifies the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to 'disabled'.
- ``--incremental-mode`` specifies the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to incremental.
For example:
::
nodetool cluster repair --incremental-mode disabled
nodetool cluster repair --incremental-mode regular
- ``keyspace`` executes a repair on a specific keyspace. The default is all keyspaces.

View File

@@ -17,7 +17,7 @@ SYNOPSIS
[(-u <username> | --username <username>)] snapshot
[(-cf <table> | --column-family <table> | --table <table>)]
[(-kc <kclist> | --kc.list <kclist>)]
[(-sf | --skip-flush)] [(-t <tag> | --tag <tag>)] [--] [<keyspaces...>]
[(-sf | --skip-flush)] [--use-sstable-identifier] [(-t <tag> | --tag <tag>)] [--] [<keyspaces...>]
OPTIONS
.......
@@ -37,6 +37,8 @@ Parameter Descriptio
-------------------------------------------------------------------- -------------------------------------------------------------------------------------
-sf / --skip-flush Do not flush memtables before snapshotting (snapshot will not contain unflushed data)
-------------------------------------------------------------------- -------------------------------------------------------------------------------------
--use-sstable-identifier Use the sstable identifier UUID, if available, rather than the sstable generation.
-------------------------------------------------------------------- -------------------------------------------------------------------------------------
-t <tag> / --tag <tag> The name of the snapshot
==================================================================== =====================================================================================

View File

@@ -64,12 +64,13 @@ ADMIN Logs service level operations: create, alter, drop, attach, detach, l
auditing.
========= =========================================================================================
Note that enabling audit may negatively impact performance and audit-to-table may consume extra storage. That's especially true when auditing DML and QUERY categories, which generate a high volume of audit messages.
Note that audit for every DML or QUERY might impact performance and consume a lot of storage.
Configuring Audit Storage
---------------------------
Auditing messages can be sent to :ref:`Syslog <auditing-syslog-storage>` or stored in a Scylla :ref:`table <auditing-table-storage>` or both.
Auditing messages can be sent to :ref:`Syslog <auditing-syslog-storage>` or stored in a Scylla :ref:`table <auditing-table-storage>`.
Currently, auditing messages can only be saved to one location at a time. You cannot log into both a table and the Syslog.
.. _auditing-syslog-storage:
@@ -192,23 +193,6 @@ For example:
2018-03-18 00:00:00+0000 | 10.143.2.108 | 3429b1a5-2a94-11e8-8f4e-000000000001 | DDL | ONE | False | nba | DROP TABLE nba.team_roster ; | 127.0.0.1 | team_roster | Scylla |
(1 row)
.. _auditing-table-and-syslog-storage:
Storing Audit Messages in a Table and Syslog Simultaneously
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Procedure**
#. Follow both procedures from above, and set the ``audit`` parameter in the ``scylla.yaml`` file to both ``syslog`` and ``table``. You need to restart scylla only once.
To have both syslog and table you need to specify both backends separated by a comma:
.. code-block:: shell
audit: "syslog,table"
Handling Audit Failures
---------------------------

95
docs/poetry.lock generated
View File

@@ -2,35 +2,36 @@
[[package]]
name = "alabaster"
version = "1.0.0"
version = "0.7.16"
description = "A light, configurable Sphinx theme"
optional = false
python-versions = ">=3.10"
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b"},
{file = "alabaster-1.0.0.tar.gz", hash = "sha256:c00dca57bca26fa62a6d7d0a9fcce65f3e026e9bfe33e9c538fd3fbb2144fd9e"},
{file = "alabaster-0.7.16-py3-none-any.whl", hash = "sha256:b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92"},
{file = "alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65"},
]
[[package]]
name = "anyio"
version = "4.12.0"
version = "4.11.0"
description = "High-level concurrency and networking framework on top of asyncio or Trio"
optional = false
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "anyio-4.12.0-py3-none-any.whl", hash = "sha256:dad2376a628f98eeca4881fc56cd06affd18f659b17a747d3ff0307ced94b1bb"},
{file = "anyio-4.12.0.tar.gz", hash = "sha256:73c693b567b0c55130c104d0b43a9baf3aa6a31fc6110116509f27bf75e21ec0"},
{file = "anyio-4.11.0-py3-none-any.whl", hash = "sha256:0287e96f4d26d4149305414d4e3bc32f0dcd0862365a4bddea19d7a1ec38c4fc"},
{file = "anyio-4.11.0.tar.gz", hash = "sha256:82a8d0b81e318cc5ce71a5f1f8b5c4e63619620b63141ef8c995fa0db95a57c4"},
]
[package.dependencies]
exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
idna = ">=2.8"
sniffio = ">=1.1"
typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""}
[package.extras]
trio = ["trio (>=0.31.0) ; python_version < \"3.10\"", "trio (>=0.32.0) ; python_version >= \"3.10\""]
trio = ["trio (>=0.31.0)"]
[[package]]
name = "babel"
@@ -49,14 +50,14 @@ dev = ["backports.zoneinfo ; python_version < \"3.9\"", "freezegun (>=1.0,<2.0)"
[[package]]
name = "beartype"
version = "0.22.8"
version = "0.22.6"
description = "Unbearably fast near-real-time pure-Python runtime-static type-checker."
optional = false
python-versions = ">=3.10"
groups = ["main"]
files = [
{file = "beartype-0.22.8-py3-none-any.whl", hash = "sha256:b832882d04e41a4097bab9f63e6992bc6de58c414ee84cba9b45b67314f5ab2e"},
{file = "beartype-0.22.8.tar.gz", hash = "sha256:b19b21c9359722ee3f7cc433f063b3e13997b27ae8226551ea5062e621f61165"},
{file = "beartype-0.22.6-py3-none-any.whl", hash = "sha256:0584bc46a2ea2a871509679278cda992eadde676c01356ab0ac77421f3c9a093"},
{file = "beartype-0.22.6.tar.gz", hash = "sha256:97fbda69c20b48c5780ac2ca60ce3c1bb9af29b3a1a0216898ffabdd523e48f4"},
]
[package.extras]
@@ -69,18 +70,18 @@ test-tox-coverage = ["coverage (>=5.5)"]
[[package]]
name = "beautifulsoup4"
version = "4.14.3"
version = "4.14.2"
description = "Screen-scraping library"
optional = false
python-versions = ">=3.7.0"
groups = ["main"]
files = [
{file = "beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb"},
{file = "beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86"},
{file = "beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515"},
{file = "beautifulsoup4-4.14.2.tar.gz", hash = "sha256:2a98ab9f944a11acee9cc848508ec28d9228abfd522ef0fad6a02a72e0ded69e"},
]
[package.dependencies]
soupsieve = ">=1.6.1"
soupsieve = ">1.2"
typing-extensions = ">=4.0.0"
[package.extras]
@@ -801,6 +802,18 @@ files = [
{file = "shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de"},
]
[[package]]
name = "sniffio"
version = "1.3.1"
description = "Sniff out which async library your code is running under"
optional = false
python-versions = ">=3.7"
groups = ["main"]
files = [
{file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
]
[[package]]
name = "snowballstemmer"
version = "3.0.1"
@@ -827,18 +840,18 @@ files = [
[[package]]
name = "sphinx"
version = "8.1.3"
version = "7.4.7"
description = "Python documentation generator"
optional = false
python-versions = ">=3.10"
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "sphinx-8.1.3-py3-none-any.whl", hash = "sha256:09719015511837b76bf6e03e42eb7595ac8c2e41eeb9c29c5b755c6b677992a2"},
{file = "sphinx-8.1.3.tar.gz", hash = "sha256:43c1911eecb0d3e161ad78611bc905d1ad0e523e4ddc202a58a821773dc4c927"},
{file = "sphinx-7.4.7-py3-none-any.whl", hash = "sha256:c2419e2135d11f1951cd994d6eb18a1835bd8fdd8429f9ca375dc1f3281bd239"},
{file = "sphinx-7.4.7.tar.gz", hash = "sha256:242f92a7ea7e6c5b406fdc2615413890ba9f699114a9c09192d7dfead2ee9cfe"},
]
[package.dependencies]
alabaster = ">=0.7.14"
alabaster = ">=0.7.14,<0.8.0"
babel = ">=2.13"
colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\""}
docutils = ">=0.20,<0.22"
@@ -848,17 +861,17 @@ packaging = ">=23.0"
Pygments = ">=2.17"
requests = ">=2.30.0"
snowballstemmer = ">=2.2"
sphinxcontrib-applehelp = ">=1.0.7"
sphinxcontrib-devhelp = ">=1.0.6"
sphinxcontrib-htmlhelp = ">=2.0.6"
sphinxcontrib-jsmath = ">=1.0.1"
sphinxcontrib-qthelp = ">=1.0.6"
sphinxcontrib-applehelp = "*"
sphinxcontrib-devhelp = "*"
sphinxcontrib-htmlhelp = ">=2.0.0"
sphinxcontrib-jsmath = "*"
sphinxcontrib-qthelp = "*"
sphinxcontrib-serializinghtml = ">=1.1.9"
tomli = {version = ">=2", markers = "python_version < \"3.11\""}
[package.extras]
docs = ["sphinxcontrib-websupport"]
lint = ["flake8 (>=6.0)", "mypy (==1.11.1)", "pyright (==1.1.384)", "pytest (>=6.0)", "ruff (==0.6.9)", "sphinx-lint (>=0.9)", "tomli (>=2)", "types-Pillow (==10.2.0.20240822)", "types-Pygments (==2.18.0.20240506)", "types-colorama (==0.4.15.20240311)", "types-defusedxml (==0.7.0.20240218)", "types-docutils (==0.21.0.20241005)", "types-requests (==2.32.0.20240914)", "types-urllib3 (==1.26.25.14)"]
lint = ["flake8 (>=6.0)", "importlib-metadata (>=6.0)", "mypy (==1.10.1)", "pytest (>=6.0)", "ruff (==0.5.2)", "sphinx-lint (>=0.9)", "tomli (>=2)", "types-docutils (==0.21.0.20240711)", "types-requests (>=2.30.0)"]
test = ["cython (>=3.0)", "defusedxml (>=0.7.1)", "pytest (>=8.0)", "setuptools (>=70.0)", "typing_extensions (>=4.9)"]
[[package]]
@@ -988,14 +1001,13 @@ test = ["tox"]
[[package]]
name = "sphinx-scylladb-markdown"
version = "0.1.4"
version = "0.1.3"
description = "Sphinx extension for ScyllaDB documentation with enhanced Markdown support through MystParser and recommonmark."
optional = false
python-versions = "*"
groups = ["main"]
files = [
{file = "sphinx_scylladb_markdown-0.1.4-py3-none-any.whl", hash = "sha256:598753e01cf159d4698eb1a707958828446e21749038d3d42c5b9c7e86eda6e4"},
{file = "sphinx_scylladb_markdown-0.1.4.tar.gz", hash = "sha256:9db3ae0dcf7c3519262da65e48c7f9e4db0ad1ce9c5f874864ea218f4cbc4c68"},
{file = "sphinx_scylladb_markdown-0.1.3-py3-none-any.whl", hash = "sha256:f20160b4aadf4c8cf95637f0a544121954b792914ab6ec05b67cae75e20a5566"},
]
[package.dependencies]
@@ -1047,25 +1059,24 @@ dev = ["build", "flake8", "pre-commit", "pytest", "sphinx", "sphinx-last-updated
[[package]]
name = "sphinx-substitution-extensions"
version = "2025.11.17"
version = "2025.1.2"
description = "Extensions for Sphinx which allow for substitutions."
optional = false
python-versions = ">=3.10"
groups = ["main"]
files = [
{file = "sphinx_substitution_extensions-2025.11.17-py2.py3-none-any.whl", hash = "sha256:ac18455bdc8324b337b0fe7498c1c0d0b1cb65c74d131459be4dea9edb6abbef"},
{file = "sphinx_substitution_extensions-2025.11.17.tar.gz", hash = "sha256:aae17f8db9efc3d454a304373ae3df763f8739e05e0b98d5381db46f6d250b27"},
{file = "sphinx_substitution_extensions-2025.1.2-py2.py3-none-any.whl", hash = "sha256:ff14f40e4393bd7434a196badb8d47983355d9755af884b902e3023fb456b958"},
{file = "sphinx_substitution_extensions-2025.1.2.tar.gz", hash = "sha256:53b8d394d5098a09aef36bc687fa310aeb28466319d2c750e996e46400fb2474"},
]
[package.dependencies]
beartype = ">=0.18.5"
docutils = ">=0.19"
myst-parser = ">=4.0.0"
sphinx = ">=8.1.0"
sphinx = ">=7.3.5"
[package.extras]
dev = ["actionlint-py (==1.7.8.24)", "check-manifest (==0.51)", "deptry (==0.24.0)", "doc8 (==2.0.0)", "doccmd (==2025.11.8.1)", "docformatter (==1.7.7)", "interrogate (==1.7.0)", "mypy-strict-kwargs (==2025.4.3)", "mypy[faster-cache] (==1.18.2)", "pre-commit (==4.4.0)", "pylint[spelling] (==4.0.3)", "pyproject-fmt (==2.11.1)", "pyright (==1.1.407)", "pyroma (==5.0)", "pytest (==9.0.1)", "pytest-cov (==7.0.0)", "ruff (==0.14.5)", "shellcheck-py (==0.11.0.1)", "shfmt-py (==3.12.0.2)", "sphinx-lint (==1.0.1)", "sphinx-toolbox (==4.0.0)", "types-docutils (==0.22.2.20251006)", "vulture (==2.14)", "yamlfix (==1.19.0)"]
release = ["check-wheel-contents (==0.6.3)"]
dev = ["actionlint-py (==1.7.5.21)", "check-manifest (==0.50)", "deptry (==0.21.2)", "doc8 (==1.1.2)", "doccmd (==2024.12.26)", "docformatter (==1.7.5)", "interrogate (==1.7.0)", "mypy-strict-kwargs (==2024.12.25)", "mypy[faster-cache] (==1.14.1)", "myst-parser (==4.0.0)", "pre-commit (==4.0.1)", "pyenchant (==3.3.0rc1)", "pylint (==3.3.3)", "pyproject-fmt (==2.5.0)", "pyright (==1.1.391)", "pyroma (==4.2)", "pytest (==8.3.4)", "pytest-cov (==6.0.0)", "ruff (==0.8.4)", "shellcheck-py (==0.10.0.1)", "shfmt-py (==3.7.0.1)", "sphinx-toolbox (==3.8.1)", "sphinx[test] (==8.1.3)", "types-docutils (==0.21.0.20241128)", "vulture (==2.14)", "yamlfix (==1.17.0)"]
release = ["check-wheel-contents (==0.6.1)"]
[[package]]
name = "sphinx-tabs"
@@ -1352,21 +1363,21 @@ files = [
[[package]]
name = "urllib3"
version = "2.6.2"
version = "2.5.0"
description = "HTTP library with thread-safe connection pooling, file post, and more."
optional = false
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
{file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
{file = "urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc"},
{file = "urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760"},
]
[package.extras]
brotli = ["brotli (>=1.2.0) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=1.2.0.0) ; platform_python_implementation != \"CPython\""]
brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""]
h2 = ["h2 (>=4,<5)"]
socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
zstd = ["backports-zstd (>=1.0.0) ; python_version < \"3.14\""]
zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "uvicorn"
@@ -1592,4 +1603,4 @@ files = [
[metadata]
lock-version = "2.1"
python-versions = "^3.10"
content-hash = "9a17caa38b3c88f3fe3d1a60fdb73a96aa12ff1e30ecb00e2f9249e7ba9f859c"
content-hash = "0ae673106f45d3465cbdabbf511e165ca44feadd34d7753f2e68093afaa95c79"

View File

@@ -12,10 +12,10 @@ redirects_cli ="^0.1.3"
sphinx-scylladb-theme = "^1.8.10"
sphinx-sitemap = "^2.6.0"
sphinx-autobuild = "^2024.4.19"
Sphinx = "^8.0.0"
Sphinx = "^7.3.7"
sphinx-multiversion-scylla = "^0.3.4"
sphinxcontrib-datatemplates = "^0.9.2"
sphinx-scylladb-markdown = "^0.1.4"
sphinx-scylladb-markdown = "^0.1.2"
sphinx_collapse ="^0.1.3"
[build-system]

View File

@@ -202,7 +202,3 @@ Glossary
The name comes from two basic operations, multiply (MU) and rotate (R), used in its inner loop.
The MurmurHash3 version used in ScyllaDB originated from `Apache Cassandra <https://commons.apache.org/proper/commons-codec/apidocs/org/apache/commons/codec/digest/MurmurHash3.html>`_, and is **not** identical to the `official MurmurHash3 calculation <https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/utils/MurmurHash.java#L31-L33>`_. More `here <https://github.com/russss/murmur3-cassandra>`_.
Colocated Table
An internal table of a special type in a :doc:`tablets </architecture/tablets>` enabled keyspace that is colocated with another base table, meaning it always has the same tablet replicas as the base table.
Current types of colocated tables include CDC log tables, local indexes, and materialized views that have the same partition key as their base table.

View File

@@ -816,6 +816,7 @@ public:
future<data_sink> wrap_sink(const sstables::sstable& sst, sstables::component_type type, data_sink sink) override {
switch (type) {
case sstables::component_type::Scylla:
case sstables::component_type::TemporaryScylla:
case sstables::component_type::TemporaryTOC:
case sstables::component_type::TOC:
co_return sink;
@@ -844,6 +845,7 @@ public:
sstables::component_type type,
data_source src) override {
switch (type) {
case sstables::component_type::TemporaryScylla:
case sstables::component_type::Scylla:
case sstables::component_type::TemporaryTOC:
case sstables::component_type::TOC:

View File

@@ -143,6 +143,7 @@ public:
gms::feature tablet_incremental_repair { *this, "TABLET_INCREMENTAL_REPAIR"sv };
gms::feature tablet_repair_scheduler { *this, "TABLET_REPAIR_SCHEDULER"sv };
gms::feature tablet_repair_tasks_table { *this, "TABLET_REPAIR_TASKS_TABLE"sv };
gms::feature tablet_merge { *this, "TABLET_MERGE"sv };
gms::feature tablet_rack_aware_view_pairing { *this, "TABLET_RACK_AWARE_VIEW_PAIRING"sv };
@@ -176,8 +177,6 @@ public:
gms::feature rack_list_rf { *this, "RACK_LIST_RF"sv };
gms::feature driver_service_level { *this, "DRIVER_SERVICE_LEVEL"sv };
gms::feature strongly_consistent_tables { *this, "STRONGLY_CONSISTENT_TABLES"sv };
gms::feature client_routes { *this, "CLIENT_ROUTES"sv };
gms::feature removenode_with_left_token_ring { *this, "REMOVENODE_WITH_LEFT_TOKEN_RING"sv };
public:
const std::unordered_map<sstring, std::reference_wrapper<feature>>& registered_features() const;

View File

@@ -15,22 +15,3 @@ with the Apache License (version 2) and ScyllaDB-Source-Available-1.0.
They contain the following tag:
SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
### `musl libc` files
`licenses/musl-license.txt` is obtained from:
https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
`utils/crypt_sha512.cc` is obtained from:
https://git.musl-libc.org/cgit/musl/tree/src/crypt/crypt_sha512.c
Both files are obtained from git.musl-libc.org.
Import commit:
commit 1b76ff0767d01df72f692806ee5adee13c67ef88
Author: Alex Rønne Petersen <alex@alexrp.com>
Date: Sun Oct 12 05:35:19 2025 +0200
s390x: shuffle register usage in __tls_get_offset to avoid r0 as address
musl as a whole is licensed under the standard MIT license included in
`licenses/musl-license.txt`.

View File

@@ -1,193 +0,0 @@
musl as a whole is licensed under the following standard MIT license:
----------------------------------------------------------------------
Copyright © 2005-2020 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------
Authors/contributors include:
A. Wilcox
Ada Worcester
Alex Dowad
Alex Suykov
Alexander Monakov
Andre McCurdy
Andrew Kelley
Anthony G. Basile
Aric Belsito
Arvid Picciani
Bartosz Brachaczek
Benjamin Peterson
Bobby Bingham
Boris Brezillon
Brent Cook
Chris Spiegel
Clément Vasseur
Daniel Micay
Daniel Sabogal
Daurnimator
David Carlier
David Edelsohn
Denys Vlasenko
Dmitry Ivanov
Dmitry V. Levin
Drew DeVault
Emil Renner Berthing
Fangrui Song
Felix Fietkau
Felix Janda
Gianluca Anzolin
Hauke Mehrtens
He X
Hiltjo Posthuma
Isaac Dunham
Jaydeep Patil
Jens Gustedt
Jeremy Huntwork
Jo-Philipp Wich
Joakim Sindholt
John Spencer
Julien Ramseier
Justin Cormack
Kaarle Ritvanen
Khem Raj
Kylie McClain
Leah Neukirchen
Luca Barbato
Luka Perkov
Lynn Ochs
M Farkas-Dyck (Strake)
Mahesh Bodapati
Markus Wichmann
Masanori Ogino
Michael Clark
Michael Forney
Mikhail Kremnyov
Natanael Copa
Nicholas J. Kain
orc
Pascal Cuoq
Patrick Oppenlander
Petr Hosek
Petr Skocik
Pierre Carrier
Reini Urban
Rich Felker
Richard Pennington
Ryan Fairfax
Samuel Holland
Segev Finer
Shiz
sin
Solar Designer
Stefan Kristiansson
Stefan O'Rear
Szabolcs Nagy
Timo Teräs
Trutz Behn
Will Dietz
William Haddon
William Pitcock
Portions of this software are derived from third-party works licensed
under terms compatible with the above MIT license:
The TRE regular expression implementation (src/regex/reg* and
src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed
under a 2-clause BSD license (license text in the source files). The
included version has been heavily modified by Rich Felker in 2012, in
the interests of size, simplicity, and namespace cleanliness.
Much of the math library code (src/math/* and src/complex/*) is
Copyright © 1993,2004 Sun Microsystems or
Copyright © 2003-2011 David Schultz or
Copyright © 2003-2009 Steven G. Kargl or
Copyright © 2003-2009 Bruce D. Evans or
Copyright © 2008 Stephen L. Moshier or
Copyright © 2017-2018 Arm Limited
and labelled as such in comments in the individual source files. All
have been licensed under extremely permissive terms.
The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
The Android Open Source Project and is licensed under a two-clause BSD
license. It was taken from Bionic libc, used on Android.
The AArch64 memcpy and memset code (src/string/aarch64/*) are
Copyright © 1999-2019, Arm Limited.
The implementation of DES for crypt (src/crypt/crypt_des.c) is
Copyright © 1994 David Burren. It is licensed under a BSD license.
The implementation of blowfish crypt (src/crypt/crypt_blowfish.c) was
originally written by Solar Designer and placed into the public
domain. The code also comes with a fallback permissive license for use
in jurisdictions that may not recognize the public domain.
The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011
Lynn Ochs and is licensed under an MIT-style license.
The x86_64 port was written by Nicholas J. Kain and is licensed under
the standard MIT terms.
The mips and microblaze ports were originally written by Richard
Pennington for use in the ellcc project. The original code was adapted
by Rich Felker for build system and code conventions during upstream
integration. It is licensed under the standard MIT terms.
The mips64 port was contributed by Imagination Technologies and is
licensed under the standard MIT terms.
The powerpc port was also originally written by Richard Pennington,
and later supplemented and integrated by John Spencer. It is licensed
under the standard MIT terms.
All other files which have no copyright comments are original works
produced specifically for use as part of this library, written either
by Rich Felker, the main author of the library, or by one or more
contibutors listed above. Details on authorship of individual files
can be found in the git version control history of the project. The
omission of copyright and license comments in each file is in the
interest of source tree size.
In addition, permission is hereby granted for all public header files
(include/* and arch/*/bits/*) and crt files intended to be linked into
applications (crt/*, ldso/dlstart.c, and arch/*/crt_arch.h) to omit
the copyright notice and permission notice otherwise required by the
license, and to use these files without any requirement of
attribution. These files include substantial contributions from:
Bobby Bingham
John Spencer
Nicholas J. Kain
Rich Felker
Richard Pennington
Stefan Kristiansson
Szabolcs Nagy
all of whom have explicitly granted such permission.
This file previously contained text expressing a belief that most of
the files covered by the above exception were sufficiently trivial not
to be subject to copyright, resulting in confusion over whether it
negated the permissions granted in the license. In the spirit of
permissive licensing, and of not having licensing issues being an
obstacle to adoption, that text has been removed.

View File

@@ -200,10 +200,7 @@ enum class tablet_repair_incremental_mode : uint8_t {
disabled,
};
// FIXME: Incremental repair is disabled by default due to
// https://github.com/scylladb/scylladb/issues/26041 and
// https://github.com/scylladb/scylladb/issues/27414
constexpr tablet_repair_incremental_mode default_tablet_repair_incremental_mode{tablet_repair_incremental_mode::disabled};
constexpr tablet_repair_incremental_mode default_tablet_repair_incremental_mode{tablet_repair_incremental_mode::incremental};
sstring tablet_repair_incremental_mode_to_string(tablet_repair_incremental_mode);
tablet_repair_incremental_mode tablet_repair_incremental_mode_from_string(const sstring&);

View File

@@ -235,6 +235,9 @@ public:
const topology& get_topology() const;
void debug_show() const;
/** Return the unique host ID for an end-point. */
host_id get_host_id(inet_address endpoint) const;
/** @return a copy of the endpoint-to-id map for read-only operations */
std::unordered_set<host_id> get_host_ids() const;

26
main.cc
View File

@@ -23,7 +23,6 @@
#include <seastar/core/future.hh>
#include <seastar/core/signal.hh>
#include <seastar/core/timer.hh>
#include "service/client_routes.hh"
#include "service/qos/raft_service_level_distributed_data_accessor.hh"
#include "db/view/view_building_state.hh"
#include "tasks/task_manager.hh"
@@ -749,6 +748,8 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
// inherit Seastar's CPU affinity masks. We want this thread to be free
// to migrate between CPUs; we think that's what makes the most sense.
auto rpc_dict_training_worker = utils::alien_worker(startlog, 19, "rpc-dict");
// niceness=10 is ~10% of normal process time
auto hashing_worker = utils::alien_worker(startlog, 10, "pwd-hash");
return app.run(ac, av, [&] () -> future<int> {
@@ -778,7 +779,8 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
return seastar::async([&app, cfg, ext, &disk_space_monitor_shard0, &cm, &sstm, &db, &qp, &bm, &proxy, &mapreduce_service, &mm, &mm_notifier, &ctx, &opts, &dirs,
&prometheus_server, &cf_cache_hitrate_calculator, &load_meter, &feature_service, &gossiper, &snitch,
&token_metadata, &erm_factory, &snapshot_ctl, &messaging, &sst_dir_semaphore, &raft_gr, &service_memory_limiter,
&repair, &sst_loader, &auth_cache, &ss, &lifecycle_notifier, &stream_manager, &task_manager, &rpc_dict_training_worker, &vector_store_client] {
&repair, &sst_loader, &auth_cache, &ss, &lifecycle_notifier, &stream_manager, &task_manager, &rpc_dict_training_worker,
&hashing_worker, &vector_store_client] {
try {
if (opts.contains("relabel-config-file") && !opts["relabel-config-file"].as<sstring>().empty()) {
// calling update_relabel_config_from_file can cause an exception that would stop startup
@@ -1796,13 +1798,6 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
auth_cache.stop().get();
});
checkpoint(stop_signal, "initializing client routes service");
static sharded<service::client_routes_service> client_routes;
client_routes.start(std::ref(stop_signal.as_sharded_abort_source()), std::ref(feature_service), std::ref(group0_client), std::ref(qp), std::ref(lifecycle_notifier)).get();
auto stop_client_routes = defer_verbose_shutdown("client_routes", [&] {
client_routes.stop().get();
});
checkpoint(stop_signal, "initializing storage service");
debug::the_storage_service = &ss;
ss.start(std::ref(stop_signal.as_sharded_abort_source()),
@@ -1811,7 +1806,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
std::ref(messaging), std::ref(repair),
std::ref(stream_manager), std::ref(lifecycle_notifier), std::ref(bm), std::ref(snitch),
std::ref(tablet_allocator), std::ref(cdc_generation_service), std::ref(view_builder), std::ref(view_building_worker), std::ref(qp), std::ref(sl_controller),
std::ref(auth_cache), std::ref(client_routes),
std::ref(auth_cache),
std::ref(tsm), std::ref(vbsm), std::ref(task_manager), std::ref(gossip_address_map),
compression_dict_updated_callback,
only_on_shard0(&*disk_space_monitor_shard0)
@@ -2065,7 +2060,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
maintenance_auth_config.authenticator_java_name = sstring{auth::allow_all_authenticator_name};
maintenance_auth_config.role_manager_java_name = sstring{auth::maintenance_socket_role_manager_name};
maintenance_auth_service.start(perm_cache_config, std::ref(qp), std::ref(group0_client), std::ref(mm_notifier), std::ref(mm), maintenance_auth_config, maintenance_socket_enabled::yes, std::ref(auth_cache)).get();
maintenance_auth_service.start(perm_cache_config, std::ref(qp), std::ref(group0_client), std::ref(mm_notifier), std::ref(mm), maintenance_auth_config, maintenance_socket_enabled::yes, std::ref(auth_cache), std::ref(hashing_worker)).get();
cql_maintenance_server_ctl.emplace(maintenance_auth_service, mm_notifier, gossiper, qp, service_memory_limiter, sl_controller, lifecycle_notifier, *cfg, maintenance_cql_sg_stats_key, maintenance_socket_enabled::yes, dbcfg.statement_scheduling_group);
@@ -2199,11 +2194,6 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
});
}).get();
api::set_server_client_routes(ctx, client_routes).get();
auto stop_cr_api = defer_verbose_shutdown("client routes API", [&ctx] {
api::unset_server_client_routes(ctx).get();
});
checkpoint(stop_signal, "join cluster");
// Allow abort during join_cluster since bootstrap or replace
// can take a long time.
@@ -2346,7 +2336,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
auth_config.authenticator_java_name = qualified_authenticator_name;
auth_config.role_manager_java_name = qualified_role_manager_name;
auth_service.start(std::move(perm_cache_config), std::ref(qp), std::ref(group0_client), std::ref(mm_notifier), std::ref(mm), auth_config, maintenance_socket_enabled::no, std::ref(auth_cache)).get();
auth_service.start(std::move(perm_cache_config), std::ref(qp), std::ref(group0_client), std::ref(mm_notifier), std::ref(mm), auth_config, maintenance_socket_enabled::no, std::ref(auth_cache), std::ref(hashing_worker)).get();
std::any stop_auth_service;
// Has to be called after node joined the cluster (join_cluster())
@@ -2390,7 +2380,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
checkpoint(stop_signal, "starting batchlog manager");
db::batchlog_manager_config bm_cfg;
bm_cfg.replay_timeout = cfg->write_request_timeout_in_ms() * 1ms * 2;
bm_cfg.write_request_timeout = cfg->write_request_timeout_in_ms() * 1ms;
bm_cfg.replay_rate = cfg->batchlog_replay_throttle_in_kb() * 1000;
bm_cfg.delay = std::chrono::milliseconds(cfg->ring_delay_ms());
bm_cfg.replay_cleanup_after_replays = cfg->batchlog_replay_cleanup_after_replays();

View File

@@ -56,16 +56,33 @@ static tasks::task_manager::task_state get_state(const db::system_keyspace::topo
}
}
static future<db::system_keyspace::topology_requests_entries> get_entries(db::system_keyspace& sys_ks, std::chrono::seconds ttl) {
return sys_ks.get_node_ops_request_entries(db_clock::now() - ttl);
static std::set<tasks::task_id> get_pending_ids(service::topology& topology) {
std::set<tasks::task_id> ids;
for (auto& request : topology.requests) {
ids.emplace(topology.find(request.first)->second.request_id);
}
return ids;
}
future<std::optional<tasks::task_status>> node_ops_virtual_task::get_status(tasks::task_id id, tasks::virtual_task_hint hint) {
auto entry_opt = co_await _ss._sys_ks.local().get_topology_request_entry_opt(id.uuid());
if (!entry_opt) {
static future<db::system_keyspace::topology_requests_entries> get_entries(db::system_keyspace& sys_ks, service::topology& topology, std::chrono::seconds ttl) {
// Started requests.
auto entries = co_await sys_ks.get_node_ops_request_entries(db_clock::now() - ttl);
// Pending requests.
for (auto& id : get_pending_ids(topology)) {
entries.try_emplace(id.uuid(), db::system_keyspace::topology_requests_entry{});
}
co_return entries;
}
future<std::optional<tasks::task_status>> node_ops_virtual_task::get_status_helper(tasks::task_id id, tasks::virtual_task_hint hint) const {
auto entry = co_await _ss._sys_ks.local().get_topology_request_entry(id.uuid(), false);
auto started = entry.id;
service::topology& topology = _ss._topology_state_machine._topology;
if (!started && !get_pending_ids(topology).contains(id)) {
co_return std::nullopt;
}
auto& entry = *entry_opt;
co_return tasks::task_status{
.task_id = id,
.type = request_type_to_task_type(entry.request_type),
@@ -84,7 +101,7 @@ future<std::optional<tasks::task_status>> node_ops_virtual_task::get_status(task
.entity = "",
.progress_units = "",
.progress = tasks::task_manager::task::progress{},
.children = co_await get_children(get_module(), id, std::bind_front(&gms::gossiper::is_alive, &_ss.gossiper()))
.children = started ? co_await get_children(get_module(), id, std::bind_front(&gms::gossiper::is_alive, &_ss.gossiper())) : utils::chunked_vector<tasks::task_identity>{}
};
}
@@ -106,22 +123,26 @@ future<std::optional<tasks::virtual_task_hint>> node_ops_virtual_task::contains(
}
}
auto entry = co_await _ss._sys_ks.local().get_topology_request_entry_opt(task_id.uuid());
co_return entry && std::holds_alternative<service::topology_request>(entry->request_type) ? empty_hint : std::nullopt;
auto entry = co_await _ss._sys_ks.local().get_topology_request_entry(task_id.uuid(), false);
co_return bool(entry.id) && std::holds_alternative<service::topology_request>(entry.request_type) ? empty_hint : std::nullopt;
}
future<tasks::is_abortable> node_ops_virtual_task::is_abortable(tasks::virtual_task_hint) const {
return make_ready_future<tasks::is_abortable>(tasks::is_abortable::no);
}
future<std::optional<tasks::task_status>> node_ops_virtual_task::get_status(tasks::task_id id, tasks::virtual_task_hint hint) {
return get_status_helper(id, std::move(hint));
}
future<std::optional<tasks::task_status>> node_ops_virtual_task::wait(tasks::task_id id, tasks::virtual_task_hint hint) {
auto entry = co_await get_status(id, hint);
auto entry = co_await get_status_helper(id, hint);
if (!entry) {
co_return std::nullopt;
}
co_await _ss.wait_for_topology_request_completion(id.uuid(), false);
co_return co_await get_status(id, std::move(hint));
co_return co_await get_status_helper(id, std::move(hint));
}
future<> node_ops_virtual_task::abort(tasks::task_id id, tasks::virtual_task_hint) noexcept {
@@ -130,7 +151,8 @@ future<> node_ops_virtual_task::abort(tasks::task_id id, tasks::virtual_task_hin
future<std::vector<tasks::task_stats>> node_ops_virtual_task::get_stats() {
db::system_keyspace& sys_ks = _ss._sys_ks.local();
co_return std::ranges::to<std::vector<tasks::task_stats>>(co_await get_entries(sys_ks, get_task_manager().get_user_task_ttl())
service::topology& topology = _ss._topology_state_machine._topology;
co_return std::ranges::to<std::vector<tasks::task_stats>>(co_await get_entries(sys_ks, topology, get_task_manager().get_user_task_ttl())
| std::views::transform([] (const auto& e) {
auto id = e.first;
auto& entry = e.second;

Some files were not shown because too many files have changed in this diff Show More