Compare commits
262 Commits
copilot/fi
...
copilot/fi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c06760cf15 | ||
|
|
c684456eba | ||
|
|
cd2568ad00 | ||
|
|
7586c5ccbd | ||
|
|
d60b908a8e | ||
|
|
20ff2fcc18 | ||
|
|
6ffdada0ea | ||
|
|
4c247a5d08 | ||
|
|
288d4b49e9 | ||
|
|
e304d912b4 | ||
|
|
846a6e700b | ||
|
|
af5e73def9 | ||
|
|
9793a45288 | ||
|
|
033579ad6f | ||
|
|
c1da552fa4 | ||
|
|
cb3b96b8f4 | ||
|
|
b105ad8379 | ||
|
|
addac8b3f7 | ||
|
|
ea95cdaaec | ||
|
|
28cbaef110 | ||
|
|
85adf6bdb1 | ||
|
|
3a54bab193 | ||
|
|
f65db4e8eb | ||
|
|
df2ac0f257 | ||
|
|
093e97a539 | ||
|
|
fa6e5d0754 | ||
|
|
08518b2c12 | ||
|
|
2a75b1374e | ||
|
|
2cb9bb8f3a | ||
|
|
f1d63d014c | ||
|
|
33f7bc28da | ||
|
|
f831ca5ab5 | ||
|
|
1fe0509a9b | ||
|
|
e7d76fd8f3 | ||
|
|
700853740d | ||
|
|
3c5dd5e5ae | ||
|
|
5971b2ad97 | ||
|
|
f89315d02f | ||
|
|
d5c205194b | ||
|
|
6ad10b141a | ||
|
|
8cf8e6c87d | ||
|
|
3a06c32749 | ||
|
|
74ab5addd3 | ||
|
|
55f4a2b754 | ||
|
|
1642c686c2 | ||
|
|
9431826c52 | ||
|
|
ba6fabfc88 | ||
|
|
a6618f225c | ||
|
|
0bfd07a268 | ||
|
|
c077283352 | ||
|
|
7061384a27 | ||
|
|
7bc59e93b2 | ||
|
|
a61c221902 | ||
|
|
798714183e | ||
|
|
f5ca3657e2 | ||
|
|
dc00461adf | ||
|
|
be6d87648c | ||
|
|
004c08f525 | ||
|
|
386ec0af4e | ||
|
|
c4496dd63c | ||
|
|
84df5cfaf8 | ||
|
|
f06db096bd | ||
|
|
31f90c089c | ||
|
|
c58739de6a | ||
|
|
9e18cfbe17 | ||
|
|
7900aa5319 | ||
|
|
9d20f0a3d2 | ||
|
|
0476e8d272 | ||
|
|
e48789cf6c | ||
|
|
9039dfa4a5 | ||
|
|
1884e655d6 | ||
|
|
640c491388 | ||
|
|
cd83d1d4dc | ||
|
|
bbe0b01b14 | ||
|
|
2e7ba1f8ce | ||
|
|
b3a0e4c2dc | ||
|
|
08e5f35527 | ||
|
|
d66a36058b | ||
|
|
6681c0f33f | ||
|
|
13e9ee3f6f | ||
|
|
71e6ef90f4 | ||
|
|
902803babd | ||
|
|
4ed17c9e88 | ||
|
|
73db5c94de | ||
|
|
85f05fbe1b | ||
|
|
83f46fa7f5 | ||
|
|
ea6f2a21c6 | ||
|
|
30c4bc3f96 | ||
|
|
77fa936edc | ||
|
|
0ec485845b | ||
|
|
dace39fd6c | ||
|
|
5f8f724d78 | ||
|
|
df93ea626b | ||
|
|
74347625f9 | ||
|
|
f1fc5cc808 | ||
|
|
61bbea51ad | ||
|
|
c2b1b10ca0 | ||
|
|
ec87b92ba1 | ||
|
|
9c9371511f | ||
|
|
2e80997630 | ||
|
|
1143acaf5b | ||
|
|
e153cc434f | ||
|
|
4e106b9820 | ||
|
|
64d9c370ee | ||
|
|
a3959fe3db | ||
|
|
4fa4f40712 | ||
|
|
844545bb74 | ||
|
|
ccacea621f | ||
|
|
f287484f4d | ||
|
|
f4a4671ad6 | ||
|
|
70a0418102 | ||
|
|
6fcc1ecf94 | ||
|
|
8dde70d04c | ||
|
|
2e7070d3b7 | ||
|
|
3f7ee3ce5d | ||
|
|
a9442e6d56 | ||
|
|
1e14c08eee | ||
|
|
b9ec1180f5 | ||
|
|
12483d8c3c | ||
|
|
d5641398f5 | ||
|
|
d21faab9dc | ||
|
|
30f6a40ae6 | ||
|
|
5579489c4c | ||
|
|
17c9d640fe | ||
|
|
f98af582a7 | ||
|
|
c06e63daed | ||
|
|
c1c3b2c5bb | ||
|
|
5e7456936e | ||
|
|
e6f5f2537e | ||
|
|
76aacc00f2 | ||
|
|
7e7e378a4b | ||
|
|
77ee7f3417 | ||
|
|
0ff89a58be | ||
|
|
f7ffa395a8 | ||
|
|
3fa3b920de | ||
|
|
e7ca52ee79 | ||
|
|
730eca5dac | ||
|
|
c8cff94a5a | ||
|
|
5fae4cdf80 | ||
|
|
8bbcaacba1 | ||
|
|
3dfa5ebd7f | ||
|
|
24264e24bb | ||
|
|
0c64e3be9a | ||
|
|
b3b0860e7c | ||
|
|
db15c212a6 | ||
|
|
3595941020 | ||
|
|
102516a787 | ||
|
|
d5b63df46e | ||
|
|
f545ed37bc | ||
|
|
5f13880a91 | ||
|
|
8c4ac457af | ||
|
|
e48170ca8e | ||
|
|
11ad32c85e | ||
|
|
4c8c9cd548 | ||
|
|
98f431dd81 | ||
|
|
4ffdb0721f | ||
|
|
775906d749 | ||
|
|
11eca621b0 | ||
|
|
d7818b56df | ||
|
|
033fed5734 | ||
|
|
c6c30b7d0a | ||
|
|
5afcec4a3d | ||
|
|
9b5f3d12a3 | ||
|
|
0e51a1f812 | ||
|
|
8b807b299e | ||
|
|
07ff659849 | ||
|
|
d3e199984e | ||
|
|
8822c23ad4 | ||
|
|
be9992cfb3 | ||
|
|
daf00a7f24 | ||
|
|
62962f33bb | ||
|
|
060c2f7c0d | ||
|
|
64149b57c3 | ||
|
|
4b004fcdfc | ||
|
|
5e38b3071b | ||
|
|
225b3351fc | ||
|
|
80c6718ea8 | ||
|
|
855b91ec20 | ||
|
|
95e303faf3 | ||
|
|
8ba595e472 | ||
|
|
608eee0357 | ||
|
|
0bcc2977bb | ||
|
|
3a865fe991 | ||
|
|
fb32e1c7ee | ||
|
|
b6895f0fa7 | ||
|
|
c30b326033 | ||
|
|
807fc68dc5 | ||
|
|
51843195f7 | ||
|
|
7038b8b544 | ||
|
|
7df610b73d | ||
|
|
386309d6a0 | ||
|
|
a213e41250 | ||
|
|
58dc414912 | ||
|
|
d883ff2317 | ||
|
|
1f777da863 | ||
|
|
faad0167d7 | ||
|
|
0115a21b9a | ||
|
|
71834ce7dd | ||
|
|
df21112c39 | ||
|
|
bd339cc4d8 | ||
|
|
91bf23eea1 | ||
|
|
f925ed176b | ||
|
|
68dcd1b1b2 | ||
|
|
6fd5160947 | ||
|
|
3fc914ca59 | ||
|
|
6ef7ad9b5a | ||
|
|
581b8ace83 | ||
|
|
8192f45e84 | ||
|
|
c6eec4eeef | ||
|
|
9bef142328 | ||
|
|
74bf24a4a7 | ||
|
|
e60bcd0011 | ||
|
|
aa908ba99c | ||
|
|
529cd25c51 | ||
|
|
4fc5fcaec4 | ||
|
|
3253b05ec9 | ||
|
|
597a2ce5f9 | ||
|
|
a5f19af050 | ||
|
|
b4fe565f07 | ||
|
|
19b6207f17 | ||
|
|
ff52550739 | ||
|
|
e654045755 | ||
|
|
07b92a1ee8 | ||
|
|
7504d10d9e | ||
|
|
28cb300d0a | ||
|
|
9b3fbedc8c | ||
|
|
420fb1fd53 | ||
|
|
7c62417b54 | ||
|
|
1c45ad7cee | ||
|
|
c18133b6cb | ||
|
|
e762027943 | ||
|
|
8edd5b80ab | ||
|
|
fb84b30f88 | ||
|
|
8545f7eedd | ||
|
|
e52e1f842e | ||
|
|
0a7df4b8ac | ||
|
|
9bb8156f02 | ||
|
|
d1b796bc43 | ||
|
|
1ad64731bc | ||
|
|
abadb8ebfb | ||
|
|
54f16f9019 | ||
|
|
b584e1e18e | ||
|
|
aa1d3f1170 | ||
|
|
e309b5dbe1 | ||
|
|
846b656610 | ||
|
|
ee851266be | ||
|
|
9434ec2fd1 | ||
|
|
f54602daf0 | ||
|
|
097c2cd676 | ||
|
|
4f30807f01 | ||
|
|
55704908a0 | ||
|
|
337f417b13 | ||
|
|
6221c58325 | ||
|
|
705af2bc16 | ||
|
|
5b5f9120d0 | ||
|
|
6c115c691f | ||
|
|
5924c36b50 | ||
|
|
ad6a73c29b | ||
|
|
4ec0fa6eb5 | ||
|
|
c313b215e4 | ||
|
|
7c612e1789 | ||
|
|
f0e2941e34 |
8
.github/CODEOWNERS
vendored
8
.github/CODEOWNERS
vendored
@@ -1,5 +1,5 @@
|
|||||||
# AUTH
|
# AUTH
|
||||||
auth/* @nuivall @ptrsmrn
|
auth/* @nuivall
|
||||||
|
|
||||||
# CACHE
|
# CACHE
|
||||||
row_cache* @tgrabiec
|
row_cache* @tgrabiec
|
||||||
@@ -25,11 +25,11 @@ compaction/* @raphaelsc
|
|||||||
transport/*
|
transport/*
|
||||||
|
|
||||||
# CQL QUERY LANGUAGE
|
# CQL QUERY LANGUAGE
|
||||||
cql3/* @tgrabiec @nuivall @ptrsmrn
|
cql3/* @tgrabiec @nuivall
|
||||||
|
|
||||||
# COUNTERS
|
# COUNTERS
|
||||||
counters* @nuivall @ptrsmrn
|
counters* @nuivall
|
||||||
tests/counter_test* @nuivall @ptrsmrn
|
tests/counter_test* @nuivall
|
||||||
|
|
||||||
# DOCS
|
# DOCS
|
||||||
docs/* @annastuchlik @tzach
|
docs/* @annastuchlik @tzach
|
||||||
|
|||||||
2
.github/scripts/auto-backport.py
vendored
2
.github/scripts/auto-backport.py
vendored
@@ -62,7 +62,7 @@ def create_pull_request(repo, new_branch_name, base_branch_name, pr, backport_pr
|
|||||||
if is_draft:
|
if is_draft:
|
||||||
labels_to_add.append("conflicts")
|
labels_to_add.append("conflicts")
|
||||||
pr_comment = f"@{pr.user.login} - This PR was marked as draft because it has conflicts\n"
|
pr_comment = f"@{pr.user.login} - This PR was marked as draft because it has conflicts\n"
|
||||||
pr_comment += "Please resolve them and mark this PR as ready for review"
|
pr_comment += "Please resolve them and remove the 'conflicts' label. The PR will be made ready for review automatically."
|
||||||
backport_pr.create_issue_comment(pr_comment)
|
backport_pr.create_issue_comment(pr_comment)
|
||||||
|
|
||||||
# Apply all labels at once if we have any
|
# Apply all labels at once if we have any
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ jobs:
|
|||||||
|
|
||||||
// Regular expression pattern to check for "Fixes" prefix
|
// Regular expression pattern to check for "Fixes" prefix
|
||||||
// Adjusted to dynamically insert the repository full name
|
// Adjusted to dynamically insert the repository full name
|
||||||
const pattern = `Fixes:? (?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)`;
|
const pattern = `Fixes:? ((?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)|([A-Z]+-\\d+))`;
|
||||||
const regex = new RegExp(pattern);
|
const regex = new RegExp(pattern);
|
||||||
|
|
||||||
if (!regex.test(body)) {
|
if (!regex.test(body)) {
|
||||||
|
|||||||
14
.github/workflows/call_sync_milestone_to_jira.yml
vendored
Normal file
14
.github/workflows/call_sync_milestone_to_jira.yml
vendored
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
name: Call Jira release creation for new milestone
|
||||||
|
|
||||||
|
on:
|
||||||
|
milestone:
|
||||||
|
types: [created]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
sync-milestone-to-jira:
|
||||||
|
uses: scylladb/github-automation/.github/workflows/main_sync_milestone_to_jira_release.yml@main
|
||||||
|
with:
|
||||||
|
# Comma-separated list of Jira project keys
|
||||||
|
jira_project_keys: "SCYLLADB,CUSTOMER"
|
||||||
|
secrets:
|
||||||
|
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||||
10
.github/workflows/docs-validate-metrics.yml
vendored
10
.github/workflows/docs-validate-metrics.yml
vendored
@@ -7,7 +7,7 @@ on:
|
|||||||
- enterprise
|
- enterprise
|
||||||
paths:
|
paths:
|
||||||
- '**/*.cc'
|
- '**/*.cc'
|
||||||
- 'scripts/metrics-config.yml'
|
- 'scripts/metrics-config.yml'
|
||||||
- 'scripts/get_description.py'
|
- 'scripts/get_description.py'
|
||||||
- 'docs/_ext/scylladb_metrics.py'
|
- 'docs/_ext/scylladb_metrics.py'
|
||||||
|
|
||||||
@@ -15,20 +15,20 @@ jobs:
|
|||||||
validate-metrics:
|
validate-metrics:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
name: Check metrics documentation coverage
|
name: Check metrics documentation coverage
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v6
|
uses: actions/setup-python@v6
|
||||||
with:
|
with:
|
||||||
python-version: '3.10'
|
python-version: '3.10'
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: pip install PyYAML
|
run: pip install PyYAML
|
||||||
|
|
||||||
- name: Validate metrics
|
- name: Validate metrics
|
||||||
run: python3 scripts/get_description.py --validate -c scripts/metrics-config.yml
|
run: python3 scripts/get_description.py --validate -c scripts/metrics-config.yml
|
||||||
|
|||||||
5
.github/workflows/trigger-scylla-ci.yaml
vendored
5
.github/workflows/trigger-scylla-ci.yaml
vendored
@@ -3,10 +3,13 @@ name: Trigger Scylla CI Route
|
|||||||
on:
|
on:
|
||||||
issue_comment:
|
issue_comment:
|
||||||
types: [created]
|
types: [created]
|
||||||
|
pull_request_target:
|
||||||
|
types:
|
||||||
|
- unlabeled
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
trigger-jenkins:
|
trigger-jenkins:
|
||||||
if: github.event.comment.user.login != 'scylladbbot' && contains(github.event.comment.body, '@scylladbbot') && contains(github.event.comment.body, 'trigger-ci')
|
if: (github.event.comment.user.login != 'scylladbbot' && contains(github.event.comment.body, '@scylladbbot') && contains(github.event.comment.body, 'trigger-ci')) || github.event.label.name == 'conflicts'
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Trigger Scylla-CI-Route Jenkins Job
|
- name: Trigger Scylla-CI-Route Jenkins Job
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ comparison_operator_type get_comparison_operator(const rjson::value& comparison_
|
|||||||
if (!comparison_operator.IsString()) {
|
if (!comparison_operator.IsString()) {
|
||||||
throw api_error::validation(fmt::format("Invalid comparison operator definition {}", rjson::print(comparison_operator)));
|
throw api_error::validation(fmt::format("Invalid comparison operator definition {}", rjson::print(comparison_operator)));
|
||||||
}
|
}
|
||||||
std::string op = comparison_operator.GetString();
|
std::string op = rjson::to_string(comparison_operator);
|
||||||
auto it = ops.find(op);
|
auto it = ops.find(op);
|
||||||
if (it == ops.end()) {
|
if (it == ops.end()) {
|
||||||
throw api_error::validation(fmt::format("Unsupported comparison operator {}", op));
|
throw api_error::validation(fmt::format("Unsupported comparison operator {}", op));
|
||||||
@@ -377,8 +377,8 @@ bool check_compare(const rjson::value* v1, const rjson::value& v2, const Compara
|
|||||||
return cmp(unwrap_number(*v1, cmp.diagnostic), unwrap_number(v2, cmp.diagnostic));
|
return cmp(unwrap_number(*v1, cmp.diagnostic), unwrap_number(v2, cmp.diagnostic));
|
||||||
}
|
}
|
||||||
if (kv1.name == "S") {
|
if (kv1.name == "S") {
|
||||||
return cmp(std::string_view(kv1.value.GetString(), kv1.value.GetStringLength()),
|
return cmp(rjson::to_string_view(kv1.value),
|
||||||
std::string_view(kv2.value.GetString(), kv2.value.GetStringLength()));
|
rjson::to_string_view(kv2.value));
|
||||||
}
|
}
|
||||||
if (kv1.name == "B") {
|
if (kv1.name == "B") {
|
||||||
auto d_kv1 = unwrap_bytes(kv1.value, v1_from_query);
|
auto d_kv1 = unwrap_bytes(kv1.value, v1_from_query);
|
||||||
@@ -470,9 +470,9 @@ static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const r
|
|||||||
return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag), bounds_from_query);
|
return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag), bounds_from_query);
|
||||||
}
|
}
|
||||||
if (kv_v.name == "S") {
|
if (kv_v.name == "S") {
|
||||||
return check_BETWEEN(std::string_view(kv_v.value.GetString(), kv_v.value.GetStringLength()),
|
return check_BETWEEN(rjson::to_string_view(kv_v.value),
|
||||||
std::string_view(kv_lb.value.GetString(), kv_lb.value.GetStringLength()),
|
rjson::to_string_view(kv_lb.value),
|
||||||
std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()),
|
rjson::to_string_view(kv_ub.value),
|
||||||
bounds_from_query);
|
bounds_from_query);
|
||||||
}
|
}
|
||||||
if (kv_v.name == "B") {
|
if (kv_v.name == "B") {
|
||||||
|
|||||||
@@ -8,6 +8,8 @@
|
|||||||
|
|
||||||
#include "consumed_capacity.hh"
|
#include "consumed_capacity.hh"
|
||||||
#include "error.hh"
|
#include "error.hh"
|
||||||
|
#include "utils/rjson.hh"
|
||||||
|
#include <fmt/format.h>
|
||||||
|
|
||||||
namespace alternator {
|
namespace alternator {
|
||||||
|
|
||||||
@@ -32,12 +34,12 @@ bool consumed_capacity_counter::should_add_capacity(const rjson::value& request)
|
|||||||
if (!return_consumed->IsString()) {
|
if (!return_consumed->IsString()) {
|
||||||
throw api_error::validation("Non-string ReturnConsumedCapacity field in request");
|
throw api_error::validation("Non-string ReturnConsumedCapacity field in request");
|
||||||
}
|
}
|
||||||
std::string consumed = return_consumed->GetString();
|
std::string_view consumed = rjson::to_string_view(*return_consumed);
|
||||||
if (consumed == "INDEXES") {
|
if (consumed == "INDEXES") {
|
||||||
throw api_error::validation("INDEXES consumed capacity is not supported");
|
throw api_error::validation("INDEXES consumed capacity is not supported");
|
||||||
}
|
}
|
||||||
if (consumed != "TOTAL") {
|
if (consumed != "TOTAL") {
|
||||||
throw api_error::validation("Unknown consumed capacity "+ consumed);
|
throw api_error::validation(fmt::format("Unknown consumed capacity {}", consumed));
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -169,7 +169,7 @@ future<> controller::request_stop_server() {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
future<utils::chunked_vector<client_data>> controller::get_client_data() {
|
future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> controller::get_client_data() {
|
||||||
return _server.local().get_client_data();
|
return _server.local().get_client_data();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -93,7 +93,7 @@ public:
|
|||||||
// This virtual function is called (on each shard separately) when the
|
// This virtual function is called (on each shard separately) when the
|
||||||
// virtual table "system.clients" is read. It is expected to generate a
|
// virtual table "system.clients" is read. It is expected to generate a
|
||||||
// list of clients connected to this server (on this shard).
|
// list of clients connected to this server (on this shard).
|
||||||
virtual future<utils::chunked_vector<client_data>> get_client_data() override;
|
virtual future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> get_client_data() override;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -419,7 +419,7 @@ static std::optional<std::string> find_table_name(const rjson::value& request) {
|
|||||||
if (!table_name_value->IsString()) {
|
if (!table_name_value->IsString()) {
|
||||||
throw api_error::validation("Non-string TableName field in request");
|
throw api_error::validation("Non-string TableName field in request");
|
||||||
}
|
}
|
||||||
std::string table_name = table_name_value->GetString();
|
std::string table_name = rjson::to_string(*table_name_value);
|
||||||
return table_name;
|
return table_name;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -546,7 +546,7 @@ get_table_or_view(service::storage_proxy& proxy, const rjson::value& request) {
|
|||||||
// does exist but the index does not (ValidationException).
|
// does exist but the index does not (ValidationException).
|
||||||
if (proxy.data_dictionary().has_schema(keyspace_name, orig_table_name)) {
|
if (proxy.data_dictionary().has_schema(keyspace_name, orig_table_name)) {
|
||||||
throw api_error::validation(
|
throw api_error::validation(
|
||||||
fmt::format("Requested resource not found: Index '{}' for table '{}'", index_name->GetString(), orig_table_name));
|
fmt::format("Requested resource not found: Index '{}' for table '{}'", rjson::to_string_view(*index_name), orig_table_name));
|
||||||
} else {
|
} else {
|
||||||
throw api_error::resource_not_found(
|
throw api_error::resource_not_found(
|
||||||
fmt::format("Requested resource not found: Table: {} not found", orig_table_name));
|
fmt::format("Requested resource not found: Table: {} not found", orig_table_name));
|
||||||
@@ -587,7 +587,7 @@ static std::string get_string_attribute(const rjson::value& value, std::string_v
|
|||||||
throw api_error::validation(fmt::format("Expected string value for attribute {}, got: {}",
|
throw api_error::validation(fmt::format("Expected string value for attribute {}, got: {}",
|
||||||
attribute_name, value));
|
attribute_name, value));
|
||||||
}
|
}
|
||||||
return std::string(attribute_value->GetString(), attribute_value->GetStringLength());
|
return rjson::to_string(*attribute_value);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convenience function for getting the value of a boolean attribute, or a
|
// Convenience function for getting the value of a boolean attribute, or a
|
||||||
@@ -1080,8 +1080,8 @@ static void add_column(schema_builder& builder, const std::string& name, const r
|
|||||||
}
|
}
|
||||||
for (auto it = attribute_definitions.Begin(); it != attribute_definitions.End(); ++it) {
|
for (auto it = attribute_definitions.Begin(); it != attribute_definitions.End(); ++it) {
|
||||||
const rjson::value& attribute_info = *it;
|
const rjson::value& attribute_info = *it;
|
||||||
if (attribute_info["AttributeName"].GetString() == name) {
|
if (rjson::to_string_view(attribute_info["AttributeName"]) == name) {
|
||||||
auto type = attribute_info["AttributeType"].GetString();
|
std::string_view type = rjson::to_string_view(attribute_info["AttributeType"]);
|
||||||
data_type dt = parse_key_type(type);
|
data_type dt = parse_key_type(type);
|
||||||
if (computed_column) {
|
if (computed_column) {
|
||||||
// Computed column for GSI (doesn't choose a real column as-is
|
// Computed column for GSI (doesn't choose a real column as-is
|
||||||
@@ -1116,7 +1116,7 @@ static std::pair<std::string, std::string> parse_key_schema(const rjson::value&
|
|||||||
throw api_error::validation("First element of KeySchema must be an object");
|
throw api_error::validation("First element of KeySchema must be an object");
|
||||||
}
|
}
|
||||||
const rjson::value *v = rjson::find((*key_schema)[0], "KeyType");
|
const rjson::value *v = rjson::find((*key_schema)[0], "KeyType");
|
||||||
if (!v || !v->IsString() || v->GetString() != std::string("HASH")) {
|
if (!v || !v->IsString() || rjson::to_string_view(*v) != "HASH") {
|
||||||
throw api_error::validation("First key in KeySchema must be a HASH key");
|
throw api_error::validation("First key in KeySchema must be a HASH key");
|
||||||
}
|
}
|
||||||
v = rjson::find((*key_schema)[0], "AttributeName");
|
v = rjson::find((*key_schema)[0], "AttributeName");
|
||||||
@@ -1124,14 +1124,14 @@ static std::pair<std::string, std::string> parse_key_schema(const rjson::value&
|
|||||||
throw api_error::validation("First key in KeySchema must have string AttributeName");
|
throw api_error::validation("First key in KeySchema must have string AttributeName");
|
||||||
}
|
}
|
||||||
validate_attr_name_length(supplementary_context, v->GetStringLength(), true, "HASH key in KeySchema - ");
|
validate_attr_name_length(supplementary_context, v->GetStringLength(), true, "HASH key in KeySchema - ");
|
||||||
std::string hash_key = v->GetString();
|
std::string hash_key = rjson::to_string(*v);
|
||||||
std::string range_key;
|
std::string range_key;
|
||||||
if (key_schema->Size() == 2) {
|
if (key_schema->Size() == 2) {
|
||||||
if (!(*key_schema)[1].IsObject()) {
|
if (!(*key_schema)[1].IsObject()) {
|
||||||
throw api_error::validation("Second element of KeySchema must be an object");
|
throw api_error::validation("Second element of KeySchema must be an object");
|
||||||
}
|
}
|
||||||
v = rjson::find((*key_schema)[1], "KeyType");
|
v = rjson::find((*key_schema)[1], "KeyType");
|
||||||
if (!v || !v->IsString() || v->GetString() != std::string("RANGE")) {
|
if (!v || !v->IsString() || rjson::to_string_view(*v) != "RANGE") {
|
||||||
throw api_error::validation("Second key in KeySchema must be a RANGE key");
|
throw api_error::validation("Second key in KeySchema must be a RANGE key");
|
||||||
}
|
}
|
||||||
v = rjson::find((*key_schema)[1], "AttributeName");
|
v = rjson::find((*key_schema)[1], "AttributeName");
|
||||||
@@ -1799,6 +1799,11 @@ static future<executor::request_return_type> create_table_on_shard0(service::cli
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Creating an index in tablets mode requires the rf_rack_valid_keyspaces option to be enabled.
|
||||||
|
// GSI and LSI indexes are based on materialized views which require this option to avoid consistency issues.
|
||||||
|
if (!view_builders.empty() && ksm->uses_tablets() && !sp.data_dictionary().get_config().rf_rack_valid_keyspaces()) {
|
||||||
|
co_return api_error::validation("GlobalSecondaryIndexes and LocalSecondaryIndexes with tablets require the rf_rack_valid_keyspaces option to be enabled.");
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
schema_mutations = service::prepare_new_keyspace_announcement(sp.local_db(), ksm, ts);
|
schema_mutations = service::prepare_new_keyspace_announcement(sp.local_db(), ksm, ts);
|
||||||
} catch (exceptions::already_exists_exception&) {
|
} catch (exceptions::already_exists_exception&) {
|
||||||
@@ -1887,8 +1892,8 @@ future<executor::request_return_type> executor::create_table(client_state& clien
|
|||||||
std::string def_type = type_to_string(def.type);
|
std::string def_type = type_to_string(def.type);
|
||||||
for (auto it = attribute_definitions.Begin(); it != attribute_definitions.End(); ++it) {
|
for (auto it = attribute_definitions.Begin(); it != attribute_definitions.End(); ++it) {
|
||||||
const rjson::value& attribute_info = *it;
|
const rjson::value& attribute_info = *it;
|
||||||
if (attribute_info["AttributeName"].GetString() == def.name_as_text()) {
|
if (rjson::to_string_view(attribute_info["AttributeName"]) == def.name_as_text()) {
|
||||||
auto type = attribute_info["AttributeType"].GetString();
|
std::string_view type = rjson::to_string_view(attribute_info["AttributeType"]);
|
||||||
if (type != def_type) {
|
if (type != def_type) {
|
||||||
throw api_error::validation(fmt::format("AttributeDefinitions redefined {} to {} already a key attribute of type {} in this table", def.name_as_text(), type, def_type));
|
throw api_error::validation(fmt::format("AttributeDefinitions redefined {} to {} already a key attribute of type {} in this table", def.name_as_text(), type, def_type));
|
||||||
}
|
}
|
||||||
@@ -2019,6 +2024,10 @@ future<executor::request_return_type> executor::update_table(client_state& clien
|
|||||||
co_return api_error::validation(fmt::format(
|
co_return api_error::validation(fmt::format(
|
||||||
"LSI {} already exists in table {}, can't use same name for GSI", index_name, table_name));
|
"LSI {} already exists in table {}, can't use same name for GSI", index_name, table_name));
|
||||||
}
|
}
|
||||||
|
if (p.local().local_db().find_keyspace(keyspace_name).get_replication_strategy().uses_tablets() &&
|
||||||
|
!p.local().data_dictionary().get_config().rf_rack_valid_keyspaces()) {
|
||||||
|
co_return api_error::validation("GlobalSecondaryIndexes with tablets require the rf_rack_valid_keyspaces option to be enabled.");
|
||||||
|
}
|
||||||
|
|
||||||
elogger.trace("Adding GSI {}", index_name);
|
elogger.trace("Adding GSI {}", index_name);
|
||||||
// FIXME: read and handle "Projection" parameter. This will
|
// FIXME: read and handle "Projection" parameter. This will
|
||||||
@@ -2362,7 +2371,7 @@ put_or_delete_item::put_or_delete_item(const rjson::value& item, schema_ptr sche
|
|||||||
_cells = std::vector<cell>();
|
_cells = std::vector<cell>();
|
||||||
_cells->reserve(item.MemberCount());
|
_cells->reserve(item.MemberCount());
|
||||||
for (auto it = item.MemberBegin(); it != item.MemberEnd(); ++it) {
|
for (auto it = item.MemberBegin(); it != item.MemberEnd(); ++it) {
|
||||||
bytes column_name = to_bytes(it->name.GetString());
|
bytes column_name = to_bytes(rjson::to_string_view(it->name));
|
||||||
validate_value(it->value, "PutItem");
|
validate_value(it->value, "PutItem");
|
||||||
const column_definition* cdef = find_attribute(*schema, column_name);
|
const column_definition* cdef = find_attribute(*schema, column_name);
|
||||||
validate_attr_name_length("", column_name.size(), cdef && cdef->is_primary_key());
|
validate_attr_name_length("", column_name.size(), cdef && cdef->is_primary_key());
|
||||||
@@ -2783,10 +2792,10 @@ static void verify_all_are_used(const rjson::value* field,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
for (auto it = field->MemberBegin(); it != field->MemberEnd(); ++it) {
|
for (auto it = field->MemberBegin(); it != field->MemberEnd(); ++it) {
|
||||||
if (!used.contains(it->name.GetString())) {
|
if (!used.contains(rjson::to_string(it->name))) {
|
||||||
throw api_error::validation(
|
throw api_error::validation(
|
||||||
format("{} has spurious '{}', not used in {}",
|
format("{} has spurious '{}', not used in {}",
|
||||||
field_name, it->name.GetString(), operation));
|
field_name, rjson::to_string_view(it->name), operation));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -3000,7 +3009,7 @@ future<executor::request_return_type> executor::delete_item(client_state& client
|
|||||||
}
|
}
|
||||||
|
|
||||||
static schema_ptr get_table_from_batch_request(const service::storage_proxy& proxy, const rjson::value::ConstMemberIterator& batch_request) {
|
static schema_ptr get_table_from_batch_request(const service::storage_proxy& proxy, const rjson::value::ConstMemberIterator& batch_request) {
|
||||||
sstring table_name = batch_request->name.GetString(); // JSON keys are always strings
|
sstring table_name = rjson::to_sstring(batch_request->name); // JSON keys are always strings
|
||||||
try {
|
try {
|
||||||
return proxy.data_dictionary().find_schema(sstring(executor::KEYSPACE_NAME_PREFIX) + table_name, table_name);
|
return proxy.data_dictionary().find_schema(sstring(executor::KEYSPACE_NAME_PREFIX) + table_name, table_name);
|
||||||
} catch(data_dictionary::no_such_column_family&) {
|
} catch(data_dictionary::no_such_column_family&) {
|
||||||
@@ -3055,17 +3064,44 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static future<> cas_write(service::storage_proxy& proxy, schema_ptr schema, service::cas_shard cas_shard, const dht::decorated_key& dk, const std::vector<put_or_delete_item>& mutation_builders,
|
future<> executor::cas_write(schema_ptr schema, service::cas_shard cas_shard, const dht::decorated_key& dk,
|
||||||
service::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit) {
|
const std::vector<put_or_delete_item>& mutation_builders, service::client_state& client_state,
|
||||||
|
tracing::trace_state_ptr trace_state, service_permit permit)
|
||||||
|
{
|
||||||
|
if (!cas_shard.this_shard()) {
|
||||||
|
_stats.shard_bounce_for_lwt++;
|
||||||
|
return container().invoke_on(cas_shard.shard(), _ssg,
|
||||||
|
[cs = client_state.move_to_other_shard(),
|
||||||
|
&mb = mutation_builders,
|
||||||
|
&dk,
|
||||||
|
ks = schema->ks_name(),
|
||||||
|
cf = schema->cf_name(),
|
||||||
|
gt = tracing::global_trace_state_ptr(trace_state),
|
||||||
|
permit = std::move(permit)]
|
||||||
|
(executor& self) mutable {
|
||||||
|
return do_with(cs.get(), [&mb, &dk, ks = std::move(ks), cf = std::move(cf),
|
||||||
|
trace_state = tracing::trace_state_ptr(gt), &self]
|
||||||
|
(service::client_state& client_state) mutable {
|
||||||
|
auto schema = self._proxy.data_dictionary().find_schema(ks, cf);
|
||||||
|
service::cas_shard cas_shard(*schema, dk.token());
|
||||||
|
|
||||||
|
//FIXME: Instead of passing empty_service_permit() to the background operation,
|
||||||
|
// the current permit's lifetime should be prolonged, so that it's destructed
|
||||||
|
// only after all background operations are finished as well.
|
||||||
|
return self.cas_write(schema, std::move(cas_shard), dk, mb, client_state, std::move(trace_state), empty_service_permit());
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
auto timeout = executor::default_timeout();
|
auto timeout = executor::default_timeout();
|
||||||
auto op = std::make_unique<put_or_delete_item_cas_request>(schema, mutation_builders);
|
auto op = std::make_unique<put_or_delete_item_cas_request>(schema, mutation_builders);
|
||||||
auto* op_ptr = op.get();
|
auto* op_ptr = op.get();
|
||||||
auto cdc_opts = cdc::per_request_options{
|
auto cdc_opts = cdc::per_request_options{
|
||||||
.alternator = true,
|
.alternator = true,
|
||||||
.alternator_streams_increased_compatibility =
|
.alternator_streams_increased_compatibility =
|
||||||
schema->cdc_options().enabled() && proxy.data_dictionary().get_config().alternator_streams_increased_compatibility(),
|
schema->cdc_options().enabled() && _proxy.data_dictionary().get_config().alternator_streams_increased_compatibility(),
|
||||||
};
|
};
|
||||||
return proxy.cas(schema, std::move(cas_shard), *op_ptr, nullptr, to_partition_ranges(dk),
|
return _proxy.cas(schema, std::move(cas_shard), *op_ptr, nullptr, to_partition_ranges(dk),
|
||||||
{timeout, std::move(permit), client_state, trace_state},
|
{timeout, std::move(permit), client_state, trace_state},
|
||||||
db::consistency_level::LOCAL_SERIAL, db::consistency_level::LOCAL_QUORUM,
|
db::consistency_level::LOCAL_SERIAL, db::consistency_level::LOCAL_QUORUM,
|
||||||
timeout, timeout, true, std::move(cdc_opts)).finally([op = std::move(op)]{}).discard_result();
|
timeout, timeout, true, std::move(cdc_opts)).finally([op = std::move(op)]{}).discard_result();
|
||||||
@@ -3091,13 +3127,11 @@ struct schema_decorated_key_equal {
|
|||||||
|
|
||||||
// FIXME: if we failed writing some of the mutations, need to return a list
|
// FIXME: if we failed writing some of the mutations, need to return a list
|
||||||
// of these failed mutations rather than fail the whole write (issue #5650).
|
// of these failed mutations rather than fail the whole write (issue #5650).
|
||||||
static future<> do_batch_write(service::storage_proxy& proxy,
|
future<> executor::do_batch_write(
|
||||||
smp_service_group ssg,
|
|
||||||
std::vector<std::pair<schema_ptr, put_or_delete_item>> mutation_builders,
|
std::vector<std::pair<schema_ptr, put_or_delete_item>> mutation_builders,
|
||||||
service::client_state& client_state,
|
service::client_state& client_state,
|
||||||
tracing::trace_state_ptr trace_state,
|
tracing::trace_state_ptr trace_state,
|
||||||
service_permit permit,
|
service_permit permit) {
|
||||||
stats& stats) {
|
|
||||||
if (mutation_builders.empty()) {
|
if (mutation_builders.empty()) {
|
||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
}
|
}
|
||||||
@@ -3119,7 +3153,7 @@ static future<> do_batch_write(service::storage_proxy& proxy,
|
|||||||
mutations.push_back(b.second.build(b.first, now));
|
mutations.push_back(b.second.build(b.first, now));
|
||||||
any_cdc_enabled |= b.first->cdc_options().enabled();
|
any_cdc_enabled |= b.first->cdc_options().enabled();
|
||||||
}
|
}
|
||||||
return proxy.mutate(std::move(mutations),
|
return _proxy.mutate(std::move(mutations),
|
||||||
db::consistency_level::LOCAL_QUORUM,
|
db::consistency_level::LOCAL_QUORUM,
|
||||||
executor::default_timeout(),
|
executor::default_timeout(),
|
||||||
trace_state,
|
trace_state,
|
||||||
@@ -3128,7 +3162,7 @@ static future<> do_batch_write(service::storage_proxy& proxy,
|
|||||||
false,
|
false,
|
||||||
cdc::per_request_options{
|
cdc::per_request_options{
|
||||||
.alternator = true,
|
.alternator = true,
|
||||||
.alternator_streams_increased_compatibility = any_cdc_enabled && proxy.data_dictionary().get_config().alternator_streams_increased_compatibility(),
|
.alternator_streams_increased_compatibility = any_cdc_enabled && _proxy.data_dictionary().get_config().alternator_streams_increased_compatibility(),
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
// Do the write via LWT:
|
// Do the write via LWT:
|
||||||
@@ -3140,46 +3174,35 @@ static future<> do_batch_write(service::storage_proxy& proxy,
|
|||||||
schema_decorated_key_hash,
|
schema_decorated_key_hash,
|
||||||
schema_decorated_key_equal>;
|
schema_decorated_key_equal>;
|
||||||
auto key_builders = std::make_unique<map_type>(1, schema_decorated_key_hash{}, schema_decorated_key_equal{});
|
auto key_builders = std::make_unique<map_type>(1, schema_decorated_key_hash{}, schema_decorated_key_equal{});
|
||||||
for (auto& b : mutation_builders) {
|
for (auto&& b : std::move(mutation_builders)) {
|
||||||
auto dk = dht::decorate_key(*b.first, b.second.pk());
|
auto [it, added] = key_builders->try_emplace(schema_decorated_key {
|
||||||
auto [it, added] = key_builders->try_emplace(schema_decorated_key{b.first, dk});
|
.schema = b.first,
|
||||||
|
.dk = dht::decorate_key(*b.first, b.second.pk())
|
||||||
|
});
|
||||||
it->second.push_back(std::move(b.second));
|
it->second.push_back(std::move(b.second));
|
||||||
}
|
}
|
||||||
auto* key_builders_ptr = key_builders.get();
|
auto* key_builders_ptr = key_builders.get();
|
||||||
return parallel_for_each(*key_builders_ptr, [&proxy, &client_state, &stats, trace_state, ssg, permit = std::move(permit)] (const auto& e) {
|
return parallel_for_each(*key_builders_ptr, [this, &client_state, trace_state, permit = std::move(permit)] (const auto& e) {
|
||||||
stats.write_using_lwt++;
|
_stats.write_using_lwt++;
|
||||||
auto desired_shard = service::cas_shard(*e.first.schema, e.first.dk.token());
|
auto desired_shard = service::cas_shard(*e.first.schema, e.first.dk.token());
|
||||||
if (desired_shard.this_shard()) {
|
auto s = e.first.schema;
|
||||||
return cas_write(proxy, e.first.schema, std::move(desired_shard), e.first.dk, e.second, client_state, trace_state, permit);
|
|
||||||
} else {
|
|
||||||
stats.shard_bounce_for_lwt++;
|
|
||||||
return proxy.container().invoke_on(desired_shard.shard(), ssg,
|
|
||||||
[cs = client_state.move_to_other_shard(),
|
|
||||||
&mb = e.second,
|
|
||||||
&dk = e.first.dk,
|
|
||||||
ks = e.first.schema->ks_name(),
|
|
||||||
cf = e.first.schema->cf_name(),
|
|
||||||
gt = tracing::global_trace_state_ptr(trace_state),
|
|
||||||
permit = std::move(permit)]
|
|
||||||
(service::storage_proxy& proxy) mutable {
|
|
||||||
return do_with(cs.get(), [&proxy, &mb, &dk, ks = std::move(ks), cf = std::move(cf),
|
|
||||||
trace_state = tracing::trace_state_ptr(gt)]
|
|
||||||
(service::client_state& client_state) mutable {
|
|
||||||
auto schema = proxy.data_dictionary().find_schema(ks, cf);
|
|
||||||
|
|
||||||
// The desired_shard on the original shard remains alive for the duration
|
static const auto* injection_name = "alternator_executor_batch_write_wait";
|
||||||
// of cas_write on this shard and prevents any tablet operations.
|
return utils::get_local_injector().inject(injection_name, [s = std::move(s)] (auto& handler) -> future<> {
|
||||||
// However, we need a local instance of cas_shard on this shard
|
const auto ks = handler.get("keyspace");
|
||||||
// to pass it to sp::cas, so we just create a new one.
|
const auto cf = handler.get("table");
|
||||||
service::cas_shard cas_shard(*schema, dk.token());
|
const auto shard = std::atoll(handler.get("shard")->data());
|
||||||
|
if (ks == s->ks_name() && cf == s->cf_name() && shard == this_shard_id()) {
|
||||||
//FIXME: Instead of passing empty_service_permit() to the background operation,
|
elogger.info("{}: hit", injection_name);
|
||||||
// the current permit's lifetime should be prolonged, so that it's destructed
|
co_await handler.wait_for_message(std::chrono::steady_clock::now() + std::chrono::minutes{5});
|
||||||
// only after all background operations are finished as well.
|
elogger.info("{}: continue", injection_name);
|
||||||
return cas_write(proxy, schema, std::move(cas_shard), dk, mb, client_state, std::move(trace_state), empty_service_permit());
|
}
|
||||||
});
|
}).then([&e, desired_shard = std::move(desired_shard),
|
||||||
}).finally([desired_shard = std::move(desired_shard)]{});
|
&client_state, trace_state = std::move(trace_state), permit = std::move(permit), this]() mutable
|
||||||
}
|
{
|
||||||
|
return cas_write(e.first.schema, std::move(desired_shard), e.first.dk,
|
||||||
|
std::move(e.second), client_state, std::move(trace_state), std::move(permit));
|
||||||
|
});
|
||||||
}).finally([key_builders = std::move(key_builders)]{});
|
}).finally([key_builders = std::move(key_builders)]{});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -3327,7 +3350,7 @@ future<executor::request_return_type> executor::batch_write_item(client_state& c
|
|||||||
_stats.wcu_total[stats::DELETE_ITEM] += wcu_delete_units;
|
_stats.wcu_total[stats::DELETE_ITEM] += wcu_delete_units;
|
||||||
_stats.api_operations.batch_write_item_batch_total += total_items;
|
_stats.api_operations.batch_write_item_batch_total += total_items;
|
||||||
_stats.api_operations.batch_write_item_histogram.add(total_items);
|
_stats.api_operations.batch_write_item_histogram.add(total_items);
|
||||||
co_await do_batch_write(_proxy, _ssg, std::move(mutation_builders), client_state, trace_state, std::move(permit), _stats);
|
co_await do_batch_write(std::move(mutation_builders), client_state, trace_state, std::move(permit));
|
||||||
// FIXME: Issue #5650: If we failed writing some of the updates,
|
// FIXME: Issue #5650: If we failed writing some of the updates,
|
||||||
// need to return a list of these failed updates in UnprocessedItems
|
// need to return a list of these failed updates in UnprocessedItems
|
||||||
// rather than fail the whole write (issue #5650).
|
// rather than fail the whole write (issue #5650).
|
||||||
@@ -3372,7 +3395,7 @@ static bool hierarchy_filter(rjson::value& val, const attribute_path_map_node<T>
|
|||||||
}
|
}
|
||||||
rjson::value newv = rjson::empty_object();
|
rjson::value newv = rjson::empty_object();
|
||||||
for (auto it = v.MemberBegin(); it != v.MemberEnd(); ++it) {
|
for (auto it = v.MemberBegin(); it != v.MemberEnd(); ++it) {
|
||||||
std::string attr = it->name.GetString();
|
std::string attr = rjson::to_string(it->name);
|
||||||
auto x = members.find(attr);
|
auto x = members.find(attr);
|
||||||
if (x != members.end()) {
|
if (x != members.end()) {
|
||||||
if (x->second) {
|
if (x->second) {
|
||||||
@@ -3592,7 +3615,7 @@ static std::optional<attrs_to_get> calculate_attrs_to_get(const rjson::value& re
|
|||||||
const rjson::value& attributes_to_get = req["AttributesToGet"];
|
const rjson::value& attributes_to_get = req["AttributesToGet"];
|
||||||
attrs_to_get ret;
|
attrs_to_get ret;
|
||||||
for (auto it = attributes_to_get.Begin(); it != attributes_to_get.End(); ++it) {
|
for (auto it = attributes_to_get.Begin(); it != attributes_to_get.End(); ++it) {
|
||||||
attribute_path_map_add("AttributesToGet", ret, it->GetString());
|
attribute_path_map_add("AttributesToGet", ret, rjson::to_string(*it));
|
||||||
validate_attr_name_length("AttributesToGet", it->GetStringLength(), false);
|
validate_attr_name_length("AttributesToGet", it->GetStringLength(), false);
|
||||||
}
|
}
|
||||||
if (ret.empty()) {
|
if (ret.empty()) {
|
||||||
@@ -4258,12 +4281,12 @@ inline void update_item_operation::apply_attribute_updates(const std::unique_ptr
|
|||||||
attribute_collector& modified_attrs, bool& any_updates, bool& any_deletes) const {
|
attribute_collector& modified_attrs, bool& any_updates, bool& any_deletes) const {
|
||||||
for (auto it = _attribute_updates->MemberBegin(); it != _attribute_updates->MemberEnd(); ++it) {
|
for (auto it = _attribute_updates->MemberBegin(); it != _attribute_updates->MemberEnd(); ++it) {
|
||||||
// Note that it.key() is the name of the column, *it is the operation
|
// Note that it.key() is the name of the column, *it is the operation
|
||||||
bytes column_name = to_bytes(it->name.GetString());
|
bytes column_name = to_bytes(rjson::to_string_view(it->name));
|
||||||
const column_definition* cdef = _schema->get_column_definition(column_name);
|
const column_definition* cdef = _schema->get_column_definition(column_name);
|
||||||
if (cdef && cdef->is_primary_key()) {
|
if (cdef && cdef->is_primary_key()) {
|
||||||
throw api_error::validation(format("UpdateItem cannot update key column {}", it->name.GetString()));
|
throw api_error::validation(format("UpdateItem cannot update key column {}", rjson::to_string_view(it->name)));
|
||||||
}
|
}
|
||||||
std::string action = (it->value)["Action"].GetString();
|
std::string action = rjson::to_string((it->value)["Action"]);
|
||||||
if (action == "DELETE") {
|
if (action == "DELETE") {
|
||||||
// The DELETE operation can do two unrelated tasks. Without a
|
// The DELETE operation can do two unrelated tasks. Without a
|
||||||
// "Value" option, it is used to delete an attribute. With a
|
// "Value" option, it is used to delete an attribute. With a
|
||||||
@@ -5460,7 +5483,7 @@ calculate_bounds_conditions(schema_ptr schema, const rjson::value& conditions) {
|
|||||||
std::vector<query::clustering_range> ck_bounds;
|
std::vector<query::clustering_range> ck_bounds;
|
||||||
|
|
||||||
for (auto it = conditions.MemberBegin(); it != conditions.MemberEnd(); ++it) {
|
for (auto it = conditions.MemberBegin(); it != conditions.MemberEnd(); ++it) {
|
||||||
std::string key = it->name.GetString();
|
sstring key = rjson::to_sstring(it->name);
|
||||||
const rjson::value& condition = it->value;
|
const rjson::value& condition = it->value;
|
||||||
|
|
||||||
const rjson::value& comp_definition = rjson::get(condition, "ComparisonOperator");
|
const rjson::value& comp_definition = rjson::get(condition, "ComparisonOperator");
|
||||||
@@ -5468,13 +5491,13 @@ calculate_bounds_conditions(schema_ptr schema, const rjson::value& conditions) {
|
|||||||
|
|
||||||
const column_definition& pk_cdef = schema->partition_key_columns().front();
|
const column_definition& pk_cdef = schema->partition_key_columns().front();
|
||||||
const column_definition* ck_cdef = schema->clustering_key_size() > 0 ? &schema->clustering_key_columns().front() : nullptr;
|
const column_definition* ck_cdef = schema->clustering_key_size() > 0 ? &schema->clustering_key_columns().front() : nullptr;
|
||||||
if (sstring(key) == pk_cdef.name_as_text()) {
|
if (key == pk_cdef.name_as_text()) {
|
||||||
if (!partition_ranges.empty()) {
|
if (!partition_ranges.empty()) {
|
||||||
throw api_error::validation("Currently only a single restriction per key is allowed");
|
throw api_error::validation("Currently only a single restriction per key is allowed");
|
||||||
}
|
}
|
||||||
partition_ranges.push_back(calculate_pk_bound(schema, pk_cdef, comp_definition, attr_list));
|
partition_ranges.push_back(calculate_pk_bound(schema, pk_cdef, comp_definition, attr_list));
|
||||||
}
|
}
|
||||||
if (ck_cdef && sstring(key) == ck_cdef->name_as_text()) {
|
if (ck_cdef && key == ck_cdef->name_as_text()) {
|
||||||
if (!ck_bounds.empty()) {
|
if (!ck_bounds.empty()) {
|
||||||
throw api_error::validation("Currently only a single restriction per key is allowed");
|
throw api_error::validation("Currently only a single restriction per key is allowed");
|
||||||
}
|
}
|
||||||
@@ -5875,7 +5898,7 @@ future<executor::request_return_type> executor::list_tables(client_state& client
|
|||||||
|
|
||||||
rjson::value* exclusive_start_json = rjson::find(request, "ExclusiveStartTableName");
|
rjson::value* exclusive_start_json = rjson::find(request, "ExclusiveStartTableName");
|
||||||
rjson::value* limit_json = rjson::find(request, "Limit");
|
rjson::value* limit_json = rjson::find(request, "Limit");
|
||||||
std::string exclusive_start = exclusive_start_json ? exclusive_start_json->GetString() : "";
|
std::string exclusive_start = exclusive_start_json ? rjson::to_string(*exclusive_start_json) : "";
|
||||||
int limit = limit_json ? limit_json->GetInt() : 100;
|
int limit = limit_json ? limit_json->GetInt() : 100;
|
||||||
if (limit < 1 || limit > 100) {
|
if (limit < 1 || limit > 100) {
|
||||||
co_return api_error::validation("Limit must be greater than 0 and no greater than 100");
|
co_return api_error::validation("Limit must be greater than 0 and no greater than 100");
|
||||||
|
|||||||
@@ -40,6 +40,7 @@ namespace cql3::selection {
|
|||||||
|
|
||||||
namespace service {
|
namespace service {
|
||||||
class storage_proxy;
|
class storage_proxy;
|
||||||
|
class cas_shard;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace cdc {
|
namespace cdc {
|
||||||
@@ -57,6 +58,7 @@ class schema_builder;
|
|||||||
namespace alternator {
|
namespace alternator {
|
||||||
|
|
||||||
class rmw_operation;
|
class rmw_operation;
|
||||||
|
class put_or_delete_item;
|
||||||
|
|
||||||
schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& request);
|
schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& request);
|
||||||
bool is_alternator_keyspace(const sstring& ks_name);
|
bool is_alternator_keyspace(const sstring& ks_name);
|
||||||
@@ -219,6 +221,16 @@ private:
|
|||||||
|
|
||||||
static void describe_key_schema(rjson::value& parent, const schema&, std::unordered_map<std::string,std::string> * = nullptr, const std::map<sstring, sstring> *tags = nullptr);
|
static void describe_key_schema(rjson::value& parent, const schema&, std::unordered_map<std::string,std::string> * = nullptr, const std::map<sstring, sstring> *tags = nullptr);
|
||||||
|
|
||||||
|
future<> do_batch_write(
|
||||||
|
std::vector<std::pair<schema_ptr, put_or_delete_item>> mutation_builders,
|
||||||
|
service::client_state& client_state,
|
||||||
|
tracing::trace_state_ptr trace_state,
|
||||||
|
service_permit permit);
|
||||||
|
|
||||||
|
future<> cas_write(schema_ptr schema, service::cas_shard cas_shard, const dht::decorated_key& dk,
|
||||||
|
const std::vector<put_or_delete_item>& mutation_builders, service::client_state& client_state,
|
||||||
|
tracing::trace_state_ptr trace_state, service_permit permit);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static void describe_key_schema(rjson::value& parent, const schema& schema, std::unordered_map<std::string,std::string>&, const std::map<sstring, sstring> *tags = nullptr);
|
static void describe_key_schema(rjson::value& parent, const schema& schema, std::unordered_map<std::string,std::string>&, const std::map<sstring, sstring> *tags = nullptr);
|
||||||
|
|
||||||
|
|||||||
@@ -496,7 +496,7 @@ const std::pair<std::string, const rjson::value*> unwrap_set(const rjson::value&
|
|||||||
return {"", nullptr};
|
return {"", nullptr};
|
||||||
}
|
}
|
||||||
auto it = v.MemberBegin();
|
auto it = v.MemberBegin();
|
||||||
const std::string it_key = it->name.GetString();
|
const std::string it_key = rjson::to_string(it->name);
|
||||||
if (it_key != "SS" && it_key != "BS" && it_key != "NS") {
|
if (it_key != "SS" && it_key != "BS" && it_key != "NS") {
|
||||||
return {std::move(it_key), nullptr};
|
return {std::move(it_key), nullptr};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -708,8 +708,12 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
|
|||||||
// As long as the system_clients_entry object is alive, this request will
|
// As long as the system_clients_entry object is alive, this request will
|
||||||
// be visible in the "system.clients" virtual table. When requested, this
|
// be visible in the "system.clients" virtual table. When requested, this
|
||||||
// entry will be formatted by server::ongoing_request::make_client_data().
|
// entry will be formatted by server::ongoing_request::make_client_data().
|
||||||
|
auto user_agent_header = co_await _connection_options_keys_and_values.get_or_load(req->get_header("User-Agent"), [] (const client_options_cache_key_type&) {
|
||||||
|
return make_ready_future<options_cache_value_type>(options_cache_value_type{});
|
||||||
|
});
|
||||||
|
|
||||||
auto system_clients_entry = _ongoing_requests.emplace(
|
auto system_clients_entry = _ongoing_requests.emplace(
|
||||||
req->get_client_address(), req->get_header("User-Agent"),
|
req->get_client_address(), std::move(user_agent_header),
|
||||||
username, current_scheduling_group(),
|
username, current_scheduling_group(),
|
||||||
req->get_protocol_name() == "https");
|
req->get_protocol_name() == "https");
|
||||||
|
|
||||||
@@ -985,10 +989,10 @@ client_data server::ongoing_request::make_client_data() const {
|
|||||||
return cd;
|
return cd;
|
||||||
}
|
}
|
||||||
|
|
||||||
future<utils::chunked_vector<client_data>> server::get_client_data() {
|
future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> server::get_client_data() {
|
||||||
utils::chunked_vector<client_data> ret;
|
utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>> ret;
|
||||||
co_await _ongoing_requests.for_each_gently([&ret] (const ongoing_request& r) {
|
co_await _ongoing_requests.for_each_gently([&ret] (const ongoing_request& r) {
|
||||||
ret.emplace_back(r.make_client_data());
|
ret.emplace_back(make_foreign(std::make_unique<client_data>(r.make_client_data())));
|
||||||
});
|
});
|
||||||
co_return ret;
|
co_return ret;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -55,6 +55,7 @@ class server : public peering_sharded_service<server> {
|
|||||||
// though it isn't really relevant for Alternator which defines its own
|
// though it isn't really relevant for Alternator which defines its own
|
||||||
// timeouts separately. We can create this object only once.
|
// timeouts separately. We can create this object only once.
|
||||||
updateable_timeout_config _timeout_config;
|
updateable_timeout_config _timeout_config;
|
||||||
|
client_options_cache_type _connection_options_keys_and_values;
|
||||||
|
|
||||||
alternator_callbacks_map _callbacks;
|
alternator_callbacks_map _callbacks;
|
||||||
|
|
||||||
@@ -88,7 +89,7 @@ class server : public peering_sharded_service<server> {
|
|||||||
// is called when reading the "system.clients" virtual table.
|
// is called when reading the "system.clients" virtual table.
|
||||||
struct ongoing_request {
|
struct ongoing_request {
|
||||||
socket_address _client_address;
|
socket_address _client_address;
|
||||||
sstring _user_agent;
|
client_options_cache_entry_type _user_agent;
|
||||||
sstring _username;
|
sstring _username;
|
||||||
scheduling_group _scheduling_group;
|
scheduling_group _scheduling_group;
|
||||||
bool _is_https;
|
bool _is_https;
|
||||||
@@ -107,7 +108,7 @@ public:
|
|||||||
// table "system.clients" is read. It is expected to generate a list of
|
// table "system.clients" is read. It is expected to generate a list of
|
||||||
// clients connected to this server (on this shard). This function is
|
// clients connected to this server (on this shard). This function is
|
||||||
// called by alternator::controller::get_client_data().
|
// called by alternator::controller::get_client_data().
|
||||||
future<utils::chunked_vector<client_data>> get_client_data();
|
future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> get_client_data();
|
||||||
private:
|
private:
|
||||||
void set_routes(seastar::httpd::routes& r);
|
void set_routes(seastar::httpd::routes& r);
|
||||||
// If verification succeeds, returns the authenticated user's username
|
// If verification succeeds, returns the authenticated user's username
|
||||||
|
|||||||
@@ -93,7 +93,7 @@ future<executor::request_return_type> executor::update_time_to_live(client_state
|
|||||||
if (v->GetStringLength() < 1 || v->GetStringLength() > 255) {
|
if (v->GetStringLength() < 1 || v->GetStringLength() > 255) {
|
||||||
co_return api_error::validation("The length of AttributeName must be between 1 and 255");
|
co_return api_error::validation("The length of AttributeName must be between 1 and 255");
|
||||||
}
|
}
|
||||||
sstring attribute_name(v->GetString(), v->GetStringLength());
|
sstring attribute_name = rjson::to_sstring(*v);
|
||||||
|
|
||||||
co_await verify_permission(_enforce_authorization, _warn_authorization, client_state, schema, auth::permission::ALTER, _stats);
|
co_await verify_permission(_enforce_authorization, _warn_authorization, client_state, schema, auth::permission::ALTER, _stats);
|
||||||
co_await db::modify_tags(_mm, schema->ks_name(), schema->cf_name(), [&](std::map<sstring, sstring>& tags_map) {
|
co_await db::modify_tags(_mm, schema->ks_name(), schema->cf_name(), [&](std::map<sstring, sstring>& tags_map) {
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ set(swagger_files
|
|||||||
api-doc/column_family.json
|
api-doc/column_family.json
|
||||||
api-doc/commitlog.json
|
api-doc/commitlog.json
|
||||||
api-doc/compaction_manager.json
|
api-doc/compaction_manager.json
|
||||||
|
api-doc/client_routes.json
|
||||||
api-doc/config.json
|
api-doc/config.json
|
||||||
api-doc/cql_server_test.json
|
api-doc/cql_server_test.json
|
||||||
api-doc/endpoint_snitch_info.json
|
api-doc/endpoint_snitch_info.json
|
||||||
@@ -68,6 +69,7 @@ target_sources(api
|
|||||||
PRIVATE
|
PRIVATE
|
||||||
api.cc
|
api.cc
|
||||||
cache_service.cc
|
cache_service.cc
|
||||||
|
client_routes.cc
|
||||||
collectd.cc
|
collectd.cc
|
||||||
column_family.cc
|
column_family.cc
|
||||||
commitlog.cc
|
commitlog.cc
|
||||||
|
|||||||
23
api/api-doc/client_routes.def.json
Normal file
23
api/api-doc/client_routes.def.json
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
, "client_routes_entry": {
|
||||||
|
"id": "client_routes_entry",
|
||||||
|
"summary": "An entry storing client routes",
|
||||||
|
"properties": {
|
||||||
|
"connection_id": {"type": "string"},
|
||||||
|
"host_id": {"type": "string", "format": "uuid"},
|
||||||
|
"address": {"type": "string"},
|
||||||
|
"port": {"type": "integer"},
|
||||||
|
"tls_port": {"type": "integer"},
|
||||||
|
"alternator_port": {"type": "integer"},
|
||||||
|
"alternator_https_port": {"type": "integer"}
|
||||||
|
},
|
||||||
|
"required": ["connection_id", "host_id", "address"]
|
||||||
|
}
|
||||||
|
, "client_routes_key": {
|
||||||
|
"id": "client_routes_key",
|
||||||
|
"summary": "A key of client_routes_entry",
|
||||||
|
"properties": {
|
||||||
|
"connection_id": {"type": "string"},
|
||||||
|
"host_id": {"type": "string", "format": "uuid"}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
74
api/api-doc/client_routes.json
Normal file
74
api/api-doc/client_routes.json
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
, "/v2/client-routes":{
|
||||||
|
"get": {
|
||||||
|
"description":"List all client route entries",
|
||||||
|
"operationId":"get_client_routes",
|
||||||
|
"tags":["client_routes"],
|
||||||
|
"produces":[
|
||||||
|
"application/json"
|
||||||
|
],
|
||||||
|
"parameters":[],
|
||||||
|
"responses":{
|
||||||
|
"200":{
|
||||||
|
"schema":{
|
||||||
|
"type":"array",
|
||||||
|
"items":{ "$ref":"#/definitions/client_routes_entry" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"default":{
|
||||||
|
"description":"unexpected error",
|
||||||
|
"schema":{"$ref":"#/definitions/ErrorModel"}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"post": {
|
||||||
|
"description":"Upsert one or more client route entries",
|
||||||
|
"operationId":"set_client_routes",
|
||||||
|
"tags":["client_routes"],
|
||||||
|
"parameters":[
|
||||||
|
{
|
||||||
|
"name":"body",
|
||||||
|
"in":"body",
|
||||||
|
"required":true,
|
||||||
|
"schema":{
|
||||||
|
"type":"array",
|
||||||
|
"items":{ "$ref":"#/definitions/client_routes_entry" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses":{
|
||||||
|
"200":{ "description": "OK" },
|
||||||
|
"default":{
|
||||||
|
"description":"unexpected error",
|
||||||
|
"schema":{ "$ref":"#/definitions/ErrorModel" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"delete": {
|
||||||
|
"description":"Delete one or more client route entries",
|
||||||
|
"operationId":"delete_client_routes",
|
||||||
|
"tags":["client_routes"],
|
||||||
|
"parameters":[
|
||||||
|
{
|
||||||
|
"name":"body",
|
||||||
|
"in":"body",
|
||||||
|
"required":true,
|
||||||
|
"schema":{
|
||||||
|
"type":"array",
|
||||||
|
"items":{ "$ref":"#/definitions/client_routes_key" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses":{
|
||||||
|
"200":{
|
||||||
|
"description": "OK"
|
||||||
|
},
|
||||||
|
"default":{
|
||||||
|
"description":"unexpected error",
|
||||||
|
"schema":{
|
||||||
|
"$ref":"#/definitions/ErrorModel"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@@ -3051,7 +3051,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name":"incremental_mode",
|
"name":"incremental_mode",
|
||||||
"description":"Set the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to incremental mode.",
|
"description":"Set the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to 'disabled' mode.",
|
||||||
"required":false,
|
"required":false,
|
||||||
"allowMultiple":false,
|
"allowMultiple":false,
|
||||||
"type":"string",
|
"type":"string",
|
||||||
|
|||||||
@@ -349,13 +349,9 @@
|
|||||||
"type":"long",
|
"type":"long",
|
||||||
"description":"The shard the task is running on"
|
"description":"The shard the task is running on"
|
||||||
},
|
},
|
||||||
"creation_time":{
|
|
||||||
"type":"datetime",
|
|
||||||
"description":"The creation time of the task (when it was queued); extracted from the task_id UUID"
|
|
||||||
},
|
|
||||||
"start_time":{
|
"start_time":{
|
||||||
"type":"datetime",
|
"type":"datetime",
|
||||||
"description":"The start time of the task (when execution began); unspecified (equal to epoch) when state == created"
|
"description":"The start time of the task; unspecified (equal to epoch) when state == created"
|
||||||
},
|
},
|
||||||
"end_time":{
|
"end_time":{
|
||||||
"type":"datetime",
|
"type":"datetime",
|
||||||
@@ -402,17 +398,13 @@
|
|||||||
"type":"boolean",
|
"type":"boolean",
|
||||||
"description":"Boolean flag indicating whether the task can be aborted"
|
"description":"Boolean flag indicating whether the task can be aborted"
|
||||||
},
|
},
|
||||||
"creation_time":{
|
|
||||||
"type":"datetime",
|
|
||||||
"description":"The creation time of the task (when it was queued); extracted from the task_id UUID"
|
|
||||||
},
|
|
||||||
"start_time":{
|
"start_time":{
|
||||||
"type":"datetime",
|
"type":"datetime",
|
||||||
"description":"The start time of the task (when execution began); unspecified (equal to epoch) when state == created"
|
"description":"The start time of the task"
|
||||||
},
|
},
|
||||||
"end_time":{
|
"end_time":{
|
||||||
"type":"datetime",
|
"type":"datetime",
|
||||||
"description":"The end time of the task (when execution completed); unspecified (equal to epoch) when the task is not completed"
|
"description":"The end time of the task (unspecified when the task is not completed)"
|
||||||
},
|
},
|
||||||
"error":{
|
"error":{
|
||||||
"type":"string",
|
"type":"string",
|
||||||
|
|||||||
13
api/api.cc
13
api/api.cc
@@ -37,6 +37,7 @@
|
|||||||
#include "raft.hh"
|
#include "raft.hh"
|
||||||
#include "gms/gossip_address_map.hh"
|
#include "gms/gossip_address_map.hh"
|
||||||
#include "service_levels.hh"
|
#include "service_levels.hh"
|
||||||
|
#include "client_routes.hh"
|
||||||
|
|
||||||
logging::logger apilog("api");
|
logging::logger apilog("api");
|
||||||
|
|
||||||
@@ -67,9 +68,11 @@ future<> set_server_init(http_context& ctx) {
|
|||||||
rb02->set_api_doc(r);
|
rb02->set_api_doc(r);
|
||||||
rb02->register_api_file(r, "swagger20_header");
|
rb02->register_api_file(r, "swagger20_header");
|
||||||
rb02->register_api_file(r, "metrics");
|
rb02->register_api_file(r, "metrics");
|
||||||
|
rb02->register_api_file(r, "client_routes");
|
||||||
rb->register_function(r, "system",
|
rb->register_function(r, "system",
|
||||||
"The system related API");
|
"The system related API");
|
||||||
rb02->add_definitions_file(r, "metrics");
|
rb02->add_definitions_file(r, "metrics");
|
||||||
|
rb02->add_definitions_file(r, "client_routes");
|
||||||
set_system(ctx, r);
|
set_system(ctx, r);
|
||||||
rb->register_function(r, "error_injection",
|
rb->register_function(r, "error_injection",
|
||||||
"The error injection API");
|
"The error injection API");
|
||||||
@@ -129,6 +132,16 @@ future<> unset_server_storage_service(http_context& ctx) {
|
|||||||
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_storage_service(ctx, r); });
|
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_storage_service(ctx, r); });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
future<> set_server_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr) {
|
||||||
|
return ctx.http_server.set_routes([&ctx, &cr] (routes& r) {
|
||||||
|
set_client_routes(ctx, r, cr);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
future<> unset_server_client_routes(http_context& ctx) {
|
||||||
|
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_client_routes(ctx, r); });
|
||||||
|
}
|
||||||
|
|
||||||
future<> set_load_meter(http_context& ctx, service::load_meter& lm) {
|
future<> set_load_meter(http_context& ctx, service::load_meter& lm) {
|
||||||
return ctx.http_server.set_routes([&ctx, &lm] (routes& r) { set_load_meter(ctx, r, lm); });
|
return ctx.http_server.set_routes([&ctx, &lm] (routes& r) { set_load_meter(ctx, r, lm); });
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ class storage_proxy;
|
|||||||
class storage_service;
|
class storage_service;
|
||||||
class raft_group0_client;
|
class raft_group0_client;
|
||||||
class raft_group_registry;
|
class raft_group_registry;
|
||||||
|
class client_routes_service;
|
||||||
|
|
||||||
} // namespace service
|
} // namespace service
|
||||||
|
|
||||||
@@ -99,6 +100,8 @@ future<> set_server_snitch(http_context& ctx, sharded<locator::snitch_ptr>& snit
|
|||||||
future<> unset_server_snitch(http_context& ctx);
|
future<> unset_server_snitch(http_context& ctx);
|
||||||
future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, service::raft_group0_client&);
|
future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, service::raft_group0_client&);
|
||||||
future<> unset_server_storage_service(http_context& ctx);
|
future<> unset_server_storage_service(http_context& ctx);
|
||||||
|
future<> set_server_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr);
|
||||||
|
future<> unset_server_client_routes(http_context& ctx);
|
||||||
future<> set_server_sstables_loader(http_context& ctx, sharded<sstables_loader>& sst_loader);
|
future<> set_server_sstables_loader(http_context& ctx, sharded<sstables_loader>& sst_loader);
|
||||||
future<> unset_server_sstables_loader(http_context& ctx);
|
future<> unset_server_sstables_loader(http_context& ctx);
|
||||||
future<> set_server_view_builder(http_context& ctx, sharded<db::view::view_builder>& vb, sharded<gms::gossiper>& g);
|
future<> set_server_view_builder(http_context& ctx, sharded<db::view::view_builder>& vb, sharded<gms::gossiper>& g);
|
||||||
|
|||||||
176
api/client_routes.cc
Normal file
176
api/client_routes.cc
Normal file
@@ -0,0 +1,176 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2025-present ScyllaDB
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <seastar/http/short_streams.hh>
|
||||||
|
|
||||||
|
#include "client_routes.hh"
|
||||||
|
#include "api/api.hh"
|
||||||
|
#include "service/storage_service.hh"
|
||||||
|
#include "service/client_routes.hh"
|
||||||
|
#include "utils/rjson.hh"
|
||||||
|
|
||||||
|
|
||||||
|
#include "api/api-doc/client_routes.json.hh"
|
||||||
|
|
||||||
|
using namespace seastar::httpd;
|
||||||
|
using namespace std::chrono_literals;
|
||||||
|
using namespace json;
|
||||||
|
|
||||||
|
extern logging::logger apilog;
|
||||||
|
|
||||||
|
namespace api {
|
||||||
|
|
||||||
|
static void validate_client_routes_endpoint(sharded<service::client_routes_service>& cr, sstring endpoint_name) {
|
||||||
|
if (!cr.local().get_feature_service().client_routes) {
|
||||||
|
apilog.warn("{}: called before the cluster feature was enabled", endpoint_name);
|
||||||
|
throw std::runtime_error(fmt::format("{} requires all nodes to support the CLIENT_ROUTES cluster feature", endpoint_name));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static sstring parse_string(const char* name, rapidjson::Value const& v) {
|
||||||
|
const auto it = v.FindMember(name);
|
||||||
|
if (it == v.MemberEnd()) {
|
||||||
|
throw bad_param_exception(fmt::format("Missing '{}'", name));
|
||||||
|
}
|
||||||
|
if (!it->value.IsString()) {
|
||||||
|
throw bad_param_exception(fmt::format("'{}' must be a string", name));
|
||||||
|
}
|
||||||
|
return {it->value.GetString(), it->value.GetStringLength()};
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::optional<uint32_t> parse_port(const char* name, rapidjson::Value const& v) {
|
||||||
|
const auto it = v.FindMember(name);
|
||||||
|
if (it == v.MemberEnd()) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
if (!it->value.IsInt()) {
|
||||||
|
throw bad_param_exception(fmt::format("'{}' must be an integer", name));
|
||||||
|
}
|
||||||
|
auto port = it->value.GetInt();
|
||||||
|
if (port < 1 || port > 65535) {
|
||||||
|
throw bad_param_exception(fmt::format("'{}' value={} is outside the allowed port range", name, port));
|
||||||
|
}
|
||||||
|
return port;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<service::client_routes_service::client_route_entry> parse_set_client_array(const rapidjson::Document& root) {
|
||||||
|
if (!root.IsArray()) {
|
||||||
|
throw bad_param_exception("Body must be a JSON array");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<service::client_routes_service::client_route_entry> v;
|
||||||
|
v.reserve(root.GetArray().Size());
|
||||||
|
for (const auto& element : root.GetArray()) {
|
||||||
|
if (!element.IsObject()) { throw bad_param_exception("Each element must be object"); }
|
||||||
|
|
||||||
|
const auto port = parse_port("port", element);
|
||||||
|
const auto tls_port = parse_port("tls_port", element);
|
||||||
|
const auto alternator_port = parse_port("alternator_port", element);
|
||||||
|
const auto alternator_https_port = parse_port("alternator_https_port", element);
|
||||||
|
|
||||||
|
if (!port.has_value() && !tls_port.has_value() && !alternator_port.has_value() && !alternator_https_port.has_value()) {
|
||||||
|
throw bad_param_exception("At least one port field ('port', 'tls_port', 'alternator_port', 'alternator_https_port') must be specified");
|
||||||
|
}
|
||||||
|
|
||||||
|
v.emplace_back(
|
||||||
|
parse_string("connection_id", element),
|
||||||
|
utils::UUID{parse_string("host_id", element)},
|
||||||
|
parse_string("address", element),
|
||||||
|
port,
|
||||||
|
tls_port,
|
||||||
|
alternator_port,
|
||||||
|
alternator_https_port
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
future<json::json_return_type>
|
||||||
|
rest_set_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr, std::unique_ptr<http::request> req) {
|
||||||
|
validate_client_routes_endpoint(cr, "rest_set_client_routes");
|
||||||
|
|
||||||
|
rapidjson::Document root;
|
||||||
|
auto content = co_await util::read_entire_stream_contiguous(*req->content_stream);
|
||||||
|
root.Parse(content.c_str());
|
||||||
|
|
||||||
|
co_await cr.local().set_client_routes(parse_set_client_array(root));
|
||||||
|
co_return seastar::json::json_void();
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<service::client_routes_service::client_route_key> parse_delete_client_array(const rapidjson::Document& root) {
|
||||||
|
if (!root.IsArray()) {
|
||||||
|
throw bad_param_exception("Body must be a JSON array");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<service::client_routes_service::client_route_key> v;
|
||||||
|
v.reserve(root.GetArray().Size());
|
||||||
|
for (const auto& element : root.GetArray()) {
|
||||||
|
v.emplace_back(
|
||||||
|
parse_string("connection_id", element),
|
||||||
|
utils::UUID{parse_string("host_id", element)}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
future<json::json_return_type>
|
||||||
|
rest_delete_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr, std::unique_ptr<http::request> req) {
|
||||||
|
validate_client_routes_endpoint(cr, "delete_client_routes");
|
||||||
|
|
||||||
|
rapidjson::Document root;
|
||||||
|
auto content = co_await util::read_entire_stream_contiguous(*req->content_stream);
|
||||||
|
root.Parse(content.c_str());
|
||||||
|
|
||||||
|
co_await cr.local().delete_client_routes(parse_delete_client_array(root));
|
||||||
|
co_return seastar::json::json_void();
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
future<json::json_return_type>
|
||||||
|
rest_get_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr, std::unique_ptr<http::request> req) {
|
||||||
|
validate_client_routes_endpoint(cr, "get_client_routes");
|
||||||
|
|
||||||
|
co_return co_await cr.invoke_on(0, [] (service::client_routes_service& cr) -> future<json::json_return_type> {
|
||||||
|
co_return json::json_return_type(stream_range_as_array(co_await cr.get_client_routes(), [](const service::client_routes_service::client_route_entry & entry) {
|
||||||
|
seastar::httpd::client_routes_json::client_routes_entry obj;
|
||||||
|
obj.connection_id = entry.connection_id;
|
||||||
|
obj.host_id = fmt::to_string(entry.host_id);
|
||||||
|
obj.address = entry.address;
|
||||||
|
if (entry.port.has_value()) { obj.port = entry.port.value(); }
|
||||||
|
if (entry.tls_port.has_value()) { obj.tls_port = entry.tls_port.value(); }
|
||||||
|
if (entry.alternator_port.has_value()) { obj.alternator_port = entry.alternator_port.value(); }
|
||||||
|
if (entry.alternator_https_port.has_value()) { obj.alternator_https_port = entry.alternator_https_port.value(); }
|
||||||
|
return obj;
|
||||||
|
}));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_client_routes(http_context& ctx, routes& r, sharded<service::client_routes_service>& cr) {
|
||||||
|
seastar::httpd::client_routes_json::set_client_routes.set(r, [&ctx, &cr] (std::unique_ptr<seastar::http::request> req) {
|
||||||
|
return rest_set_client_routes(ctx, cr, std::move(req));
|
||||||
|
});
|
||||||
|
seastar::httpd::client_routes_json::delete_client_routes.set(r, [&ctx, &cr] (std::unique_ptr<seastar::http::request> req) {
|
||||||
|
return rest_delete_client_routes(ctx, cr, std::move(req));
|
||||||
|
});
|
||||||
|
seastar::httpd::client_routes_json::get_client_routes.set(r, [&ctx, &cr] (std::unique_ptr<seastar::http::request> req) {
|
||||||
|
return rest_get_client_routes(ctx, cr, std::move(req));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void unset_client_routes(http_context& ctx, routes& r) {
|
||||||
|
seastar::httpd::client_routes_json::set_client_routes.unset(r);
|
||||||
|
seastar::httpd::client_routes_json::delete_client_routes.unset(r);
|
||||||
|
seastar::httpd::client_routes_json::get_client_routes.unset(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
20
api/client_routes.hh
Normal file
20
api/client_routes.hh
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2025-present ScyllaDB
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <seastar/core/sharded.hh>
|
||||||
|
#include <seastar/json/json_elements.hh>
|
||||||
|
#include "api/api_init.hh"
|
||||||
|
|
||||||
|
namespace api {
|
||||||
|
|
||||||
|
void set_client_routes(http_context& ctx, httpd::routes& r, sharded<service::client_routes_service>& cr);
|
||||||
|
void unset_client_routes(http_context& ctx, httpd::routes& r);
|
||||||
|
|
||||||
|
}
|
||||||
@@ -55,7 +55,6 @@ tm::task_status make_status(tasks::task_status status, sharded<gms::gossiper>& g
|
|||||||
res.scope = status.scope;
|
res.scope = status.scope;
|
||||||
res.state = status.state;
|
res.state = status.state;
|
||||||
res.is_abortable = bool(status.is_abortable);
|
res.is_abortable = bool(status.is_abortable);
|
||||||
res.creation_time = get_time(status.creation_time);
|
|
||||||
res.start_time = get_time(status.start_time);
|
res.start_time = get_time(status.start_time);
|
||||||
res.end_time = get_time(status.end_time);
|
res.end_time = get_time(status.end_time);
|
||||||
res.error = status.error;
|
res.error = status.error;
|
||||||
@@ -84,7 +83,6 @@ tm::task_stats make_stats(tasks::task_stats stats) {
|
|||||||
res.table = stats.table;
|
res.table = stats.table;
|
||||||
res.entity = stats.entity;
|
res.entity = stats.entity;
|
||||||
res.shard = stats.shard;
|
res.shard = stats.shard;
|
||||||
res.creation_time = get_time(stats.creation_time);
|
|
||||||
res.start_time = get_time(stats.start_time);
|
res.start_time = get_time(stats.start_time);
|
||||||
res.end_time = get_time(stats.end_time);;
|
res.end_time = get_time(stats.end_time);;
|
||||||
return res;
|
return res;
|
||||||
|
|||||||
@@ -9,7 +9,6 @@
|
|||||||
#include "auth/allow_all_authenticator.hh"
|
#include "auth/allow_all_authenticator.hh"
|
||||||
|
|
||||||
#include "service/migration_manager.hh"
|
#include "service/migration_manager.hh"
|
||||||
#include "utils/alien_worker.hh"
|
|
||||||
#include "utils/class_registrator.hh"
|
#include "utils/class_registrator.hh"
|
||||||
|
|
||||||
namespace auth {
|
namespace auth {
|
||||||
@@ -23,7 +22,6 @@ static const class_registrator<
|
|||||||
cql3::query_processor&,
|
cql3::query_processor&,
|
||||||
::service::raft_group0_client&,
|
::service::raft_group0_client&,
|
||||||
::service::migration_manager&,
|
::service::migration_manager&,
|
||||||
cache&,
|
cache&> registration("org.apache.cassandra.auth.AllowAllAuthenticator");
|
||||||
utils::alien_worker&> registration("org.apache.cassandra.auth.AllowAllAuthenticator");
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,7 +14,6 @@
|
|||||||
#include "auth/authenticator.hh"
|
#include "auth/authenticator.hh"
|
||||||
#include "auth/cache.hh"
|
#include "auth/cache.hh"
|
||||||
#include "auth/common.hh"
|
#include "auth/common.hh"
|
||||||
#include "utils/alien_worker.hh"
|
|
||||||
|
|
||||||
namespace cql3 {
|
namespace cql3 {
|
||||||
class query_processor;
|
class query_processor;
|
||||||
@@ -30,7 +29,7 @@ extern const std::string_view allow_all_authenticator_name;
|
|||||||
|
|
||||||
class allow_all_authenticator final : public authenticator {
|
class allow_all_authenticator final : public authenticator {
|
||||||
public:
|
public:
|
||||||
allow_all_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&) {
|
allow_all_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&) {
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual future<> start() override {
|
virtual future<> start() override {
|
||||||
|
|||||||
@@ -35,14 +35,13 @@ static const class_registrator<auth::authenticator
|
|||||||
, cql3::query_processor&
|
, cql3::query_processor&
|
||||||
, ::service::raft_group0_client&
|
, ::service::raft_group0_client&
|
||||||
, ::service::migration_manager&
|
, ::service::migration_manager&
|
||||||
, auth::cache&
|
, auth::cache&> cert_auth_reg(CERT_AUTH_NAME);
|
||||||
, utils::alien_worker&> cert_auth_reg(CERT_AUTH_NAME);
|
|
||||||
|
|
||||||
enum class auth::certificate_authenticator::query_source {
|
enum class auth::certificate_authenticator::query_source {
|
||||||
subject, altname
|
subject, altname
|
||||||
};
|
};
|
||||||
|
|
||||||
auth::certificate_authenticator::certificate_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, auth::cache&, utils::alien_worker&)
|
auth::certificate_authenticator::certificate_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, auth::cache&)
|
||||||
: _queries([&] {
|
: _queries([&] {
|
||||||
auto& conf = qp.db().get_config();
|
auto& conf = qp.db().get_config();
|
||||||
auto queries = conf.auth_certificate_role_queries();
|
auto queries = conf.auth_certificate_role_queries();
|
||||||
@@ -77,9 +76,9 @@ auth::certificate_authenticator::certificate_authenticator(cql3::query_processor
|
|||||||
throw std::invalid_argument(fmt::format("Invalid source: {}", map.at(cfg_source_attr)));
|
throw std::invalid_argument(fmt::format("Invalid source: {}", map.at(cfg_source_attr)));
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
} catch (std::out_of_range&) {
|
} catch (const std::out_of_range&) {
|
||||||
// just fallthrough
|
// just fallthrough
|
||||||
} catch (boost::regex_error&) {
|
} catch (const boost::regex_error&) {
|
||||||
std::throw_with_nested(std::invalid_argument(fmt::format("Invalid query expression: {}", map.at(cfg_query_attr))));
|
std::throw_with_nested(std::invalid_argument(fmt::format("Invalid query expression: {}", map.at(cfg_query_attr))));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,7 +10,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "auth/authenticator.hh"
|
#include "auth/authenticator.hh"
|
||||||
#include "utils/alien_worker.hh"
|
|
||||||
#include <boost/regex_fwd.hpp> // IWYU pragma: keep
|
#include <boost/regex_fwd.hpp> // IWYU pragma: keep
|
||||||
|
|
||||||
namespace cql3 {
|
namespace cql3 {
|
||||||
@@ -34,7 +33,7 @@ class certificate_authenticator : public authenticator {
|
|||||||
enum class query_source;
|
enum class query_source;
|
||||||
std::vector<std::pair<query_source, boost::regex>> _queries;
|
std::vector<std::pair<query_source, boost::regex>> _queries;
|
||||||
public:
|
public:
|
||||||
certificate_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&);
|
certificate_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);
|
||||||
~certificate_authenticator();
|
~certificate_authenticator();
|
||||||
|
|
||||||
future<> start() override;
|
future<> start() override;
|
||||||
|
|||||||
@@ -94,7 +94,7 @@ static future<> create_legacy_metadata_table_if_missing_impl(
|
|||||||
try {
|
try {
|
||||||
co_return co_await mm.announce(co_await ::service::prepare_new_column_family_announcement(qp.proxy(), table, ts),
|
co_return co_await mm.announce(co_await ::service::prepare_new_column_family_announcement(qp.proxy(), table, ts),
|
||||||
std::move(group0_guard), format("auth: create {} metadata table", table->cf_name()));
|
std::move(group0_guard), format("auth: create {} metadata table", table->cf_name()));
|
||||||
} catch (exceptions::already_exists_exception&) {}
|
} catch (const exceptions::already_exists_exception&) {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -256,7 +256,7 @@ future<> default_authorizer::revoke_all(std::string_view role_name, ::service::g
|
|||||||
} else {
|
} else {
|
||||||
co_await collect_mutations(_qp, mc, query, {sstring(role_name)});
|
co_await collect_mutations(_qp, mc, query, {sstring(role_name)});
|
||||||
}
|
}
|
||||||
} catch (exceptions::request_execution_exception& e) {
|
} catch (const exceptions::request_execution_exception& e) {
|
||||||
alogger.warn("CassandraAuthorizer failed to revoke all permissions of {}: {}", role_name, e);
|
alogger.warn("CassandraAuthorizer failed to revoke all permissions of {}: {}", role_name, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -293,13 +293,13 @@ future<> default_authorizer::revoke_all_legacy(const resource& resource) {
|
|||||||
[resource](auto ep) {
|
[resource](auto ep) {
|
||||||
try {
|
try {
|
||||||
std::rethrow_exception(ep);
|
std::rethrow_exception(ep);
|
||||||
} catch (exceptions::request_execution_exception& e) {
|
} catch (const exceptions::request_execution_exception& e) {
|
||||||
alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
|
alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
} catch (exceptions::request_execution_exception& e) {
|
} catch (const exceptions::request_execution_exception& e) {
|
||||||
alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
|
alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
|
||||||
return make_ready_future();
|
return make_ready_future();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -49,8 +49,7 @@ static const class_registrator<
|
|||||||
cql3::query_processor&,
|
cql3::query_processor&,
|
||||||
::service::raft_group0_client&,
|
::service::raft_group0_client&,
|
||||||
::service::migration_manager&,
|
::service::migration_manager&,
|
||||||
cache&,
|
cache&> password_auth_reg("org.apache.cassandra.auth.PasswordAuthenticator");
|
||||||
utils::alien_worker&> password_auth_reg("org.apache.cassandra.auth.PasswordAuthenticator");
|
|
||||||
|
|
||||||
static thread_local auto rng_for_salt = std::default_random_engine(std::random_device{}());
|
static thread_local auto rng_for_salt = std::default_random_engine(std::random_device{}());
|
||||||
|
|
||||||
@@ -64,14 +63,13 @@ std::string password_authenticator::default_superuser(const db::config& cfg) {
|
|||||||
password_authenticator::~password_authenticator() {
|
password_authenticator::~password_authenticator() {
|
||||||
}
|
}
|
||||||
|
|
||||||
password_authenticator::password_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache, utils::alien_worker& hashing_worker)
|
password_authenticator::password_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache)
|
||||||
: _qp(qp)
|
: _qp(qp)
|
||||||
, _group0_client(g0)
|
, _group0_client(g0)
|
||||||
, _migration_manager(mm)
|
, _migration_manager(mm)
|
||||||
, _cache(cache)
|
, _cache(cache)
|
||||||
, _stopped(make_ready_future<>())
|
, _stopped(make_ready_future<>())
|
||||||
, _superuser(default_superuser(qp.db().get_config()))
|
, _superuser(default_superuser(qp.db().get_config()))
|
||||||
, _hashing_worker(hashing_worker)
|
|
||||||
{}
|
{}
|
||||||
|
|
||||||
static bool has_salted_hash(const cql3::untyped_result_set_row& row) {
|
static bool has_salted_hash(const cql3::untyped_result_set_row& row) {
|
||||||
@@ -330,20 +328,18 @@ future<authenticated_user> password_authenticator::authenticate(
|
|||||||
}
|
}
|
||||||
salted_hash = role->salted_hash;
|
salted_hash = role->salted_hash;
|
||||||
}
|
}
|
||||||
const bool password_match = co_await _hashing_worker.submit<bool>([password = std::move(password), salted_hash] {
|
const bool password_match = co_await passwords::check(password, *salted_hash);
|
||||||
return passwords::check(password, *salted_hash);
|
|
||||||
});
|
|
||||||
if (!password_match) {
|
if (!password_match) {
|
||||||
throw exceptions::authentication_exception("Username and/or password are incorrect");
|
throw exceptions::authentication_exception("Username and/or password are incorrect");
|
||||||
}
|
}
|
||||||
co_return username;
|
co_return username;
|
||||||
} catch (std::system_error &) {
|
} catch (const std::system_error &) {
|
||||||
std::throw_with_nested(exceptions::authentication_exception("Could not verify password"));
|
std::throw_with_nested(exceptions::authentication_exception("Could not verify password"));
|
||||||
} catch (exceptions::request_execution_exception& e) {
|
} catch (const exceptions::request_execution_exception& e) {
|
||||||
std::throw_with_nested(exceptions::authentication_exception(e.what()));
|
std::throw_with_nested(exceptions::authentication_exception(e.what()));
|
||||||
} catch (exceptions::authentication_exception& e) {
|
} catch (const exceptions::authentication_exception& e) {
|
||||||
std::throw_with_nested(e);
|
std::throw_with_nested(e);
|
||||||
} catch (exceptions::unavailable_exception& e) {
|
} catch (const exceptions::unavailable_exception& e) {
|
||||||
std::throw_with_nested(exceptions::authentication_exception(e.get_message()));
|
std::throw_with_nested(exceptions::authentication_exception(e.get_message()));
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
std::throw_with_nested(exceptions::authentication_exception("authentication failed"));
|
std::throw_with_nested(exceptions::authentication_exception("authentication failed"));
|
||||||
|
|||||||
@@ -18,7 +18,6 @@
|
|||||||
#include "auth/passwords.hh"
|
#include "auth/passwords.hh"
|
||||||
#include "auth/cache.hh"
|
#include "auth/cache.hh"
|
||||||
#include "service/raft/raft_group0_client.hh"
|
#include "service/raft/raft_group0_client.hh"
|
||||||
#include "utils/alien_worker.hh"
|
|
||||||
|
|
||||||
namespace db {
|
namespace db {
|
||||||
class config;
|
class config;
|
||||||
@@ -49,13 +48,12 @@ class password_authenticator : public authenticator {
|
|||||||
shared_promise<> _superuser_created_promise;
|
shared_promise<> _superuser_created_promise;
|
||||||
// We used to also support bcrypt, SHA-256, and MD5 (ref. scylladb#24524).
|
// We used to also support bcrypt, SHA-256, and MD5 (ref. scylladb#24524).
|
||||||
constexpr static auth::passwords::scheme _scheme = passwords::scheme::sha_512;
|
constexpr static auth::passwords::scheme _scheme = passwords::scheme::sha_512;
|
||||||
utils::alien_worker& _hashing_worker;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static db::consistency_level consistency_for_user(std::string_view role_name);
|
static db::consistency_level consistency_for_user(std::string_view role_name);
|
||||||
static std::string default_superuser(const db::config&);
|
static std::string default_superuser(const db::config&);
|
||||||
|
|
||||||
password_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&);
|
password_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);
|
||||||
|
|
||||||
~password_authenticator();
|
~password_authenticator();
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,8 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "auth/passwords.hh"
|
#include "auth/passwords.hh"
|
||||||
|
#include "utils/crypt_sha512.hh"
|
||||||
|
#include <seastar/core/coroutine.hh>
|
||||||
|
|
||||||
#include <cerrno>
|
#include <cerrno>
|
||||||
|
|
||||||
@@ -21,27 +23,48 @@ static thread_local crypt_data tlcrypt = {};
|
|||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
|
|
||||||
|
void verify_hashing_output(const char * res) {
|
||||||
|
if (!res || (res[0] == '*')) {
|
||||||
|
throw std::system_error(errno, std::system_category());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void verify_scheme(scheme scheme) {
|
void verify_scheme(scheme scheme) {
|
||||||
const sstring random_part_of_salt = "aaaabbbbccccdddd";
|
const sstring random_part_of_salt = "aaaabbbbccccdddd";
|
||||||
|
|
||||||
const sstring salt = sstring(prefix_for_scheme(scheme)) + random_part_of_salt;
|
const sstring salt = sstring(prefix_for_scheme(scheme)) + random_part_of_salt;
|
||||||
const char* e = crypt_r("fisk", salt.c_str(), &tlcrypt);
|
const char* e = crypt_r("fisk", salt.c_str(), &tlcrypt);
|
||||||
|
try {
|
||||||
if (e && (e[0] != '*')) {
|
verify_hashing_output(e);
|
||||||
return;
|
} catch (const std::system_error& ex) {
|
||||||
|
throw no_supported_schemes();
|
||||||
}
|
}
|
||||||
|
|
||||||
throw no_supported_schemes();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sstring hash_with_salt(const sstring& pass, const sstring& salt) {
|
sstring hash_with_salt(const sstring& pass, const sstring& salt) {
|
||||||
auto res = crypt_r(pass.c_str(), salt.c_str(), &tlcrypt);
|
auto res = crypt_r(pass.c_str(), salt.c_str(), &tlcrypt);
|
||||||
if (!res || (res[0] == '*')) {
|
verify_hashing_output(res);
|
||||||
throw std::system_error(errno, std::system_category());
|
|
||||||
}
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
seastar::future<sstring> hash_with_salt_async(const sstring& pass, const sstring& salt) {
|
||||||
|
sstring res;
|
||||||
|
// Only SHA-512 hashes for passphrases shorter than 256 bytes can be computed using
|
||||||
|
// the __crypt_sha512 method. For other computations, we fall back to the
|
||||||
|
// crypt_r implementation from `<crypt.h>`, which can stall.
|
||||||
|
if (salt.starts_with(prefix_for_scheme(scheme::sha_512)) && pass.size() <= 255) {
|
||||||
|
char buf[128];
|
||||||
|
const char * output_ptr = co_await __crypt_sha512(pass.c_str(), salt.c_str(), buf);
|
||||||
|
verify_hashing_output(output_ptr);
|
||||||
|
res = output_ptr;
|
||||||
|
} else {
|
||||||
|
const char * output_ptr = crypt_r(pass.c_str(), salt.c_str(), &tlcrypt);
|
||||||
|
verify_hashing_output(output_ptr);
|
||||||
|
res = output_ptr;
|
||||||
|
}
|
||||||
|
co_return res;
|
||||||
|
}
|
||||||
|
|
||||||
std::string_view prefix_for_scheme(scheme c) noexcept {
|
std::string_view prefix_for_scheme(scheme c) noexcept {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case scheme::bcrypt_y: return "$2y$";
|
case scheme::bcrypt_y: return "$2y$";
|
||||||
@@ -58,8 +81,9 @@ no_supported_schemes::no_supported_schemes()
|
|||||||
: std::runtime_error("No allowed hashing schemes are supported on this system") {
|
: std::runtime_error("No allowed hashing schemes are supported on this system") {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool check(const sstring& pass, const sstring& salted_hash) {
|
seastar::future<bool> check(const sstring& pass, const sstring& salted_hash) {
|
||||||
return detail::hash_with_salt(pass, salted_hash) == salted_hash;
|
const auto pwd_hash = co_await detail::hash_with_salt_async(pass, salted_hash);
|
||||||
|
co_return pwd_hash == salted_hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace auth::passwords
|
} // namespace auth::passwords
|
||||||
|
|||||||
@@ -11,6 +11,7 @@
|
|||||||
#include <random>
|
#include <random>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
|
||||||
|
#include <seastar/core/future.hh>
|
||||||
#include <seastar/core/sstring.hh>
|
#include <seastar/core/sstring.hh>
|
||||||
|
|
||||||
#include "seastarx.hh"
|
#include "seastarx.hh"
|
||||||
@@ -75,10 +76,19 @@ sstring generate_salt(RandomNumberEngine& g, scheme scheme) {
|
|||||||
|
|
||||||
///
|
///
|
||||||
/// Hash a password combined with an implementation-specific salt string.
|
/// Hash a password combined with an implementation-specific salt string.
|
||||||
|
/// Deprecated in favor of `hash_with_salt_async`.
|
||||||
///
|
///
|
||||||
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
|
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
|
||||||
///
|
///
|
||||||
sstring hash_with_salt(const sstring& pass, const sstring& salt);
|
[[deprecated("Use hash_with_salt_async instead")]] sstring hash_with_salt(const sstring& pass, const sstring& salt);
|
||||||
|
|
||||||
|
///
|
||||||
|
/// Async version of `hash_with_salt` that returns a future.
|
||||||
|
/// If possible, hashing uses `coroutine::maybe_yield` to prevent reactor stalls.
|
||||||
|
///
|
||||||
|
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
|
||||||
|
///
|
||||||
|
seastar::future<sstring> hash_with_salt_async(const sstring& pass, const sstring& salt);
|
||||||
|
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
@@ -107,6 +117,6 @@ sstring hash(const sstring& pass, RandomNumberEngine& g, scheme scheme) {
|
|||||||
///
|
///
|
||||||
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
|
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
|
||||||
///
|
///
|
||||||
bool check(const sstring& pass, const sstring& salted_hash);
|
seastar::future<bool> check(const sstring& pass, const sstring& salted_hash);
|
||||||
|
|
||||||
} // namespace auth::passwords
|
} // namespace auth::passwords
|
||||||
|
|||||||
@@ -35,10 +35,9 @@ static const class_registrator<
|
|||||||
cql3::query_processor&,
|
cql3::query_processor&,
|
||||||
::service::raft_group0_client&,
|
::service::raft_group0_client&,
|
||||||
::service::migration_manager&,
|
::service::migration_manager&,
|
||||||
cache&,
|
cache&> saslauthd_auth_reg("com.scylladb.auth.SaslauthdAuthenticator");
|
||||||
utils::alien_worker&> saslauthd_auth_reg("com.scylladb.auth.SaslauthdAuthenticator");
|
|
||||||
|
|
||||||
saslauthd_authenticator::saslauthd_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&)
|
saslauthd_authenticator::saslauthd_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, cache&)
|
||||||
: _socket_path(qp.db().get_config().saslauthd_socket_path())
|
: _socket_path(qp.db().get_config().saslauthd_socket_path())
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
|||||||
@@ -12,7 +12,6 @@
|
|||||||
|
|
||||||
#include "auth/authenticator.hh"
|
#include "auth/authenticator.hh"
|
||||||
#include "auth/cache.hh"
|
#include "auth/cache.hh"
|
||||||
#include "utils/alien_worker.hh"
|
|
||||||
|
|
||||||
namespace cql3 {
|
namespace cql3 {
|
||||||
class query_processor;
|
class query_processor;
|
||||||
@@ -30,7 +29,7 @@ namespace auth {
|
|||||||
class saslauthd_authenticator : public authenticator {
|
class saslauthd_authenticator : public authenticator {
|
||||||
sstring _socket_path; ///< Path to the domain socket on which saslauthd is listening.
|
sstring _socket_path; ///< Path to the domain socket on which saslauthd is listening.
|
||||||
public:
|
public:
|
||||||
saslauthd_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&,utils::alien_worker&);
|
saslauthd_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);
|
||||||
|
|
||||||
future<> start() override;
|
future<> start() override;
|
||||||
|
|
||||||
|
|||||||
@@ -191,8 +191,7 @@ service::service(
|
|||||||
::service::migration_manager& mm,
|
::service::migration_manager& mm,
|
||||||
const service_config& sc,
|
const service_config& sc,
|
||||||
maintenance_socket_enabled used_by_maintenance_socket,
|
maintenance_socket_enabled used_by_maintenance_socket,
|
||||||
cache& cache,
|
cache& cache)
|
||||||
utils::alien_worker& hashing_worker)
|
|
||||||
: service(
|
: service(
|
||||||
std::move(c),
|
std::move(c),
|
||||||
cache,
|
cache,
|
||||||
@@ -200,7 +199,7 @@ service::service(
|
|||||||
g0,
|
g0,
|
||||||
mn,
|
mn,
|
||||||
create_object<authorizer>(sc.authorizer_java_name, qp, g0, mm),
|
create_object<authorizer>(sc.authorizer_java_name, qp, g0, mm),
|
||||||
create_object<authenticator>(sc.authenticator_java_name, qp, g0, mm, cache, hashing_worker),
|
create_object<authenticator>(sc.authenticator_java_name, qp, g0, mm, cache),
|
||||||
create_object<role_manager>(sc.role_manager_java_name, qp, g0, mm, cache),
|
create_object<role_manager>(sc.role_manager_java_name, qp, g0, mm, cache),
|
||||||
used_by_maintenance_socket) {
|
used_by_maintenance_socket) {
|
||||||
}
|
}
|
||||||
@@ -226,7 +225,7 @@ future<> service::create_legacy_keyspace_if_missing(::service::migration_manager
|
|||||||
try {
|
try {
|
||||||
co_return co_await mm.announce(::service::prepare_new_keyspace_announcement(db.real_database(), ksm, ts),
|
co_return co_await mm.announce(::service::prepare_new_keyspace_announcement(db.real_database(), ksm, ts),
|
||||||
std::move(group0_guard), seastar::format("auth_service: create {} keyspace", meta::legacy::AUTH_KS));
|
std::move(group0_guard), seastar::format("auth_service: create {} keyspace", meta::legacy::AUTH_KS));
|
||||||
} catch (::service::group0_concurrent_modification&) {
|
} catch (const ::service::group0_concurrent_modification&) {
|
||||||
log.info("Concurrent operation is detected while creating {} keyspace, retrying.", meta::legacy::AUTH_KS);
|
log.info("Concurrent operation is detected while creating {} keyspace, retrying.", meta::legacy::AUTH_KS);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -27,7 +27,6 @@
|
|||||||
#include "cql3/description.hh"
|
#include "cql3/description.hh"
|
||||||
#include "seastarx.hh"
|
#include "seastarx.hh"
|
||||||
#include "service/raft/raft_group0_client.hh"
|
#include "service/raft/raft_group0_client.hh"
|
||||||
#include "utils/alien_worker.hh"
|
|
||||||
#include "utils/observable.hh"
|
#include "utils/observable.hh"
|
||||||
#include "utils/serialized_action.hh"
|
#include "utils/serialized_action.hh"
|
||||||
#include "service/maintenance_mode.hh"
|
#include "service/maintenance_mode.hh"
|
||||||
@@ -131,8 +130,7 @@ public:
|
|||||||
::service::migration_manager&,
|
::service::migration_manager&,
|
||||||
const service_config&,
|
const service_config&,
|
||||||
maintenance_socket_enabled,
|
maintenance_socket_enabled,
|
||||||
cache&,
|
cache&);
|
||||||
utils::alien_worker&);
|
|
||||||
|
|
||||||
future<> start(::service::migration_manager&, db::system_keyspace&);
|
future<> start(::service::migration_manager&, db::system_keyspace&);
|
||||||
|
|
||||||
|
|||||||
@@ -192,7 +192,7 @@ future<> standard_role_manager::legacy_create_default_role_if_missing() {
|
|||||||
{_superuser},
|
{_superuser},
|
||||||
cql3::query_processor::cache_internal::no).discard_result();
|
cql3::query_processor::cache_internal::no).discard_result();
|
||||||
log.info("Created default superuser role '{}'.", _superuser);
|
log.info("Created default superuser role '{}'.", _superuser);
|
||||||
} catch(const exceptions::unavailable_exception& e) {
|
} catch (const exceptions::unavailable_exception& e) {
|
||||||
log.warn("Skipped default role setup: some nodes were not ready; will retry");
|
log.warn("Skipped default role setup: some nodes were not ready; will retry");
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -38,8 +38,8 @@ class transitional_authenticator : public authenticator {
|
|||||||
public:
|
public:
|
||||||
static const sstring PASSWORD_AUTHENTICATOR_NAME;
|
static const sstring PASSWORD_AUTHENTICATOR_NAME;
|
||||||
|
|
||||||
transitional_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache, utils::alien_worker& hashing_worker)
|
transitional_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache)
|
||||||
: transitional_authenticator(std::make_unique<password_authenticator>(qp, g0, mm, cache, hashing_worker)) {
|
: transitional_authenticator(std::make_unique<password_authenticator>(qp, g0, mm, cache)) {
|
||||||
}
|
}
|
||||||
transitional_authenticator(std::unique_ptr<authenticator> a)
|
transitional_authenticator(std::unique_ptr<authenticator> a)
|
||||||
: _authenticator(std::move(a)) {
|
: _authenticator(std::move(a)) {
|
||||||
@@ -81,7 +81,7 @@ public:
|
|||||||
}).handle_exception([](auto ep) {
|
}).handle_exception([](auto ep) {
|
||||||
try {
|
try {
|
||||||
std::rethrow_exception(ep);
|
std::rethrow_exception(ep);
|
||||||
} catch (exceptions::authentication_exception&) {
|
} catch (const exceptions::authentication_exception&) {
|
||||||
// return anon user
|
// return anon user
|
||||||
return make_ready_future<authenticated_user>(anonymous_user());
|
return make_ready_future<authenticated_user>(anonymous_user());
|
||||||
}
|
}
|
||||||
@@ -126,7 +126,7 @@ public:
|
|||||||
virtual bytes evaluate_response(bytes_view client_response) override {
|
virtual bytes evaluate_response(bytes_view client_response) override {
|
||||||
try {
|
try {
|
||||||
return _sasl->evaluate_response(client_response);
|
return _sasl->evaluate_response(client_response);
|
||||||
} catch (exceptions::authentication_exception&) {
|
} catch (const exceptions::authentication_exception&) {
|
||||||
_complete = true;
|
_complete = true;
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
@@ -141,7 +141,7 @@ public:
|
|||||||
return _sasl->get_authenticated_user().handle_exception([](auto ep) {
|
return _sasl->get_authenticated_user().handle_exception([](auto ep) {
|
||||||
try {
|
try {
|
||||||
std::rethrow_exception(ep);
|
std::rethrow_exception(ep);
|
||||||
} catch (exceptions::authentication_exception&) {
|
} catch (const exceptions::authentication_exception&) {
|
||||||
// return anon user
|
// return anon user
|
||||||
return make_ready_future<authenticated_user>(anonymous_user());
|
return make_ready_future<authenticated_user>(anonymous_user());
|
||||||
}
|
}
|
||||||
@@ -241,8 +241,7 @@ static const class_registrator<
|
|||||||
cql3::query_processor&,
|
cql3::query_processor&,
|
||||||
::service::raft_group0_client&,
|
::service::raft_group0_client&,
|
||||||
::service::migration_manager&,
|
::service::migration_manager&,
|
||||||
auth::cache&,
|
auth::cache&> transitional_authenticator_reg(auth::PACKAGE_NAME + "TransitionalAuthenticator");
|
||||||
utils::alien_worker&> transitional_authenticator_reg(auth::PACKAGE_NAME + "TransitionalAuthenticator");
|
|
||||||
|
|
||||||
static const class_registrator<
|
static const class_registrator<
|
||||||
auth::authorizer,
|
auth::authorizer,
|
||||||
|
|||||||
@@ -10,7 +10,9 @@
|
|||||||
#include <seastar/net/inet_address.hh>
|
#include <seastar/net/inet_address.hh>
|
||||||
#include <seastar/core/sstring.hh>
|
#include <seastar/core/sstring.hh>
|
||||||
#include "seastarx.hh"
|
#include "seastarx.hh"
|
||||||
|
#include "utils/loading_shared_values.hh"
|
||||||
|
|
||||||
|
#include <list>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
|
||||||
enum class client_type {
|
enum class client_type {
|
||||||
@@ -27,6 +29,20 @@ enum class client_connection_stage {
|
|||||||
ready,
|
ready,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// We implement a keys cache using a map-like utils::loading_shared_values container by storing empty values.
|
||||||
|
struct options_cache_value_type {};
|
||||||
|
using client_options_cache_type = utils::loading_shared_values<sstring, options_cache_value_type>;
|
||||||
|
using client_options_cache_entry_type = client_options_cache_type::entry_ptr;
|
||||||
|
using client_options_cache_key_type = client_options_cache_type::key_type;
|
||||||
|
|
||||||
|
// This struct represents a single OPTION key-value pair from the client's connection options.
|
||||||
|
// Both key and value are represented by corresponding "references" to their cached values.
|
||||||
|
// Each "reference" is effectively a lw_shared_ptr value.
|
||||||
|
struct client_option_key_value_cached_entry {
|
||||||
|
client_options_cache_entry_type key;
|
||||||
|
client_options_cache_entry_type value;
|
||||||
|
};
|
||||||
|
|
||||||
sstring to_string(client_connection_stage ct);
|
sstring to_string(client_connection_stage ct);
|
||||||
|
|
||||||
// Representation of a row in `system.clients'. std::optionals are for nullable cells.
|
// Representation of a row in `system.clients'. std::optionals are for nullable cells.
|
||||||
@@ -37,8 +53,8 @@ struct client_data {
|
|||||||
client_connection_stage connection_stage = client_connection_stage::established;
|
client_connection_stage connection_stage = client_connection_stage::established;
|
||||||
int32_t shard_id; /// ID of server-side shard which is processing the connection.
|
int32_t shard_id; /// ID of server-side shard which is processing the connection.
|
||||||
|
|
||||||
std::optional<sstring> driver_name;
|
std::optional<client_options_cache_entry_type> driver_name;
|
||||||
std::optional<sstring> driver_version;
|
std::optional<client_options_cache_entry_type> driver_version;
|
||||||
std::optional<sstring> hostname;
|
std::optional<sstring> hostname;
|
||||||
std::optional<int32_t> protocol_version;
|
std::optional<int32_t> protocol_version;
|
||||||
std::optional<sstring> ssl_cipher_suite;
|
std::optional<sstring> ssl_cipher_suite;
|
||||||
@@ -46,6 +62,7 @@ struct client_data {
|
|||||||
std::optional<sstring> ssl_protocol;
|
std::optional<sstring> ssl_protocol;
|
||||||
std::optional<sstring> username;
|
std::optional<sstring> username;
|
||||||
std::optional<sstring> scheduling_group_name;
|
std::optional<sstring> scheduling_group_name;
|
||||||
|
std::list<client_option_key_value_cached_entry> client_options;
|
||||||
|
|
||||||
sstring stage_str() const { return to_string(connection_stage); }
|
sstring stage_str() const { return to_string(connection_stage); }
|
||||||
sstring client_type_str() const { return to_string(ct); }
|
sstring client_type_str() const { return to_string(ct); }
|
||||||
|
|||||||
@@ -125,10 +125,6 @@ if(target_arch)
|
|||||||
add_compile_options("-march=${target_arch}")
|
add_compile_options("-march=${target_arch}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
|
||||||
add_compile_options("SHELL:-Xclang -fexperimental-assignment-tracking=disabled")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
function(maybe_limit_stack_usage_in_KB stack_usage_threshold_in_KB config)
|
function(maybe_limit_stack_usage_in_KB stack_usage_threshold_in_KB config)
|
||||||
math(EXPR _stack_usage_threshold_in_bytes "${stack_usage_threshold_in_KB} * 1024")
|
math(EXPR _stack_usage_threshold_in_bytes "${stack_usage_threshold_in_KB} * 1024")
|
||||||
set(_stack_usage_threshold_flag "-Wstack-usage=${_stack_usage_threshold_in_bytes}")
|
set(_stack_usage_threshold_flag "-Wstack-usage=${_stack_usage_threshold_in_bytes}")
|
||||||
|
|||||||
14
configure.py
14
configure.py
@@ -859,6 +859,7 @@ scylla_core = (['message/messaging_service.cc',
|
|||||||
'utils/alien_worker.cc',
|
'utils/alien_worker.cc',
|
||||||
'utils/array-search.cc',
|
'utils/array-search.cc',
|
||||||
'utils/base64.cc',
|
'utils/base64.cc',
|
||||||
|
'utils/crypt_sha512.cc',
|
||||||
'utils/logalloc.cc',
|
'utils/logalloc.cc',
|
||||||
'utils/large_bitset.cc',
|
'utils/large_bitset.cc',
|
||||||
'utils/buffer_input_stream.cc',
|
'utils/buffer_input_stream.cc',
|
||||||
@@ -1157,6 +1158,7 @@ scylla_core = (['message/messaging_service.cc',
|
|||||||
'locator/topology.cc',
|
'locator/topology.cc',
|
||||||
'locator/util.cc',
|
'locator/util.cc',
|
||||||
'service/client_state.cc',
|
'service/client_state.cc',
|
||||||
|
'service/client_routes.cc',
|
||||||
'service/storage_service.cc',
|
'service/storage_service.cc',
|
||||||
'service/session.cc',
|
'service/session.cc',
|
||||||
'service/task_manager_module.cc',
|
'service/task_manager_module.cc',
|
||||||
@@ -1317,6 +1319,8 @@ api = ['api/api.cc',
|
|||||||
'api/storage_proxy.cc',
|
'api/storage_proxy.cc',
|
||||||
Json2Code('api/api-doc/cache_service.json'),
|
Json2Code('api/api-doc/cache_service.json'),
|
||||||
'api/cache_service.cc',
|
'api/cache_service.cc',
|
||||||
|
Json2Code('api/api-doc/client_routes.json'),
|
||||||
|
'api/client_routes.cc',
|
||||||
Json2Code('api/api-doc/collectd.json'),
|
Json2Code('api/api-doc/collectd.json'),
|
||||||
'api/collectd.cc',
|
'api/collectd.cc',
|
||||||
Json2Code('api/api-doc/endpoint_snitch_info.json'),
|
Json2Code('api/api-doc/endpoint_snitch_info.json'),
|
||||||
@@ -1479,7 +1483,6 @@ deps = {
|
|||||||
|
|
||||||
pure_boost_tests = set([
|
pure_boost_tests = set([
|
||||||
'test/boost/anchorless_list_test',
|
'test/boost/anchorless_list_test',
|
||||||
'test/boost/auth_passwords_test',
|
|
||||||
'test/boost/auth_resource_test',
|
'test/boost/auth_resource_test',
|
||||||
'test/boost/big_decimal_test',
|
'test/boost/big_decimal_test',
|
||||||
'test/boost/caching_options_test',
|
'test/boost/caching_options_test',
|
||||||
@@ -2248,15 +2251,6 @@ def get_extra_cxxflags(mode, mode_config, cxx, debuginfo):
|
|||||||
if debuginfo and mode_config['can_have_debug_info']:
|
if debuginfo and mode_config['can_have_debug_info']:
|
||||||
cxxflags += ['-g', '-gz']
|
cxxflags += ['-g', '-gz']
|
||||||
|
|
||||||
if 'clang' in cxx:
|
|
||||||
# Since AssignmentTracking was enabled by default in clang
|
|
||||||
# (llvm/llvm-project@de6da6ad55d3ca945195d1cb109cb8efdf40a52a)
|
|
||||||
# coroutine frame debugging info (`coro_frame_ty`) is broken.
|
|
||||||
#
|
|
||||||
# It seems that we aren't losing much by disabling AssigmentTracking,
|
|
||||||
# so for now we choose to disable it to get `coro_frame_ty` back.
|
|
||||||
cxxflags.append('-Xclang -fexperimental-assignment-tracking=disabled')
|
|
||||||
|
|
||||||
return cxxflags
|
return cxxflags
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -64,6 +64,10 @@ bool query_processor::topology_global_queue_empty() {
|
|||||||
return remote().first.get().ss.topology_global_queue_empty();
|
return remote().first.get().ss.topology_global_queue_empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
future<bool> query_processor::ongoing_rf_change(const service::group0_guard& guard, sstring ks) {
|
||||||
|
return remote().first.get().ss.ongoing_rf_change(guard, std::move(ks));
|
||||||
|
}
|
||||||
|
|
||||||
static service::query_state query_state_for_internal_call() {
|
static service::query_state query_state_for_internal_call() {
|
||||||
return {service::client_state::for_internal_calls(), empty_service_permit()};
|
return {service::client_state::for_internal_calls(), empty_service_permit()};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -474,6 +474,7 @@ public:
|
|||||||
void reset_cache();
|
void reset_cache();
|
||||||
|
|
||||||
bool topology_global_queue_empty();
|
bool topology_global_queue_empty();
|
||||||
|
future<bool> ongoing_rf_change(const service::group0_guard& guard, sstring ks);
|
||||||
|
|
||||||
query_options make_internal_options(
|
query_options make_internal_options(
|
||||||
const statements::prepared_statement::checked_weak_ptr& p,
|
const statements::prepared_statement::checked_weak_ptr& p,
|
||||||
|
|||||||
@@ -1322,6 +1322,10 @@ const std::vector<expr::expression>& statement_restrictions::index_restrictions(
|
|||||||
return _index_restrictions;
|
return _index_restrictions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool statement_restrictions::is_empty() const {
|
||||||
|
return !_where.has_value();
|
||||||
|
}
|
||||||
|
|
||||||
// Current score table:
|
// Current score table:
|
||||||
// local and restrictions include full partition key: 2
|
// local and restrictions include full partition key: 2
|
||||||
// global: 1
|
// global: 1
|
||||||
|
|||||||
@@ -408,6 +408,8 @@ public:
|
|||||||
|
|
||||||
/// Checks that the primary key restrictions don't contain null values, throws invalid_request_exception otherwise.
|
/// Checks that the primary key restrictions don't contain null values, throws invalid_request_exception otherwise.
|
||||||
void validate_primary_key(const query_options& options) const;
|
void validate_primary_key(const query_options& options) const;
|
||||||
|
|
||||||
|
bool is_empty() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
statement_restrictions analyze_statement_restrictions(
|
statement_restrictions analyze_statement_restrictions(
|
||||||
|
|||||||
@@ -19,6 +19,7 @@
|
|||||||
#include "locator/abstract_replication_strategy.hh"
|
#include "locator/abstract_replication_strategy.hh"
|
||||||
#include "mutation/canonical_mutation.hh"
|
#include "mutation/canonical_mutation.hh"
|
||||||
#include "prepared_statement.hh"
|
#include "prepared_statement.hh"
|
||||||
|
#include "seastar/coroutine/exception.hh"
|
||||||
#include "service/migration_manager.hh"
|
#include "service/migration_manager.hh"
|
||||||
#include "service/storage_proxy.hh"
|
#include "service/storage_proxy.hh"
|
||||||
#include "service/topology_mutation.hh"
|
#include "service/topology_mutation.hh"
|
||||||
@@ -138,6 +139,7 @@ bool cql3::statements::alter_keyspace_statement::changes_tablets(query_processor
|
|||||||
future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, cql3::cql_warnings_vec>>
|
future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, cql3::cql_warnings_vec>>
|
||||||
cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_processor& qp, service::query_state& state, const query_options& options, service::group0_batch& mc) const {
|
cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_processor& qp, service::query_state& state, const query_options& options, service::group0_batch& mc) const {
|
||||||
using namespace cql_transport;
|
using namespace cql_transport;
|
||||||
|
bool unknown_keyspace = false;
|
||||||
try {
|
try {
|
||||||
event::schema_change::target_type target_type = event::schema_change::target_type::KEYSPACE;
|
event::schema_change::target_type target_type = event::schema_change::target_type::KEYSPACE;
|
||||||
auto ks = qp.db().find_keyspace(_name);
|
auto ks = qp.db().find_keyspace(_name);
|
||||||
@@ -158,14 +160,19 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
|
|||||||
// when in reality nothing or only schema is being changed
|
// when in reality nothing or only schema is being changed
|
||||||
if (changes_tablets(qp)) {
|
if (changes_tablets(qp)) {
|
||||||
if (!qp.proxy().features().topology_global_request_queue && !qp.topology_global_queue_empty()) {
|
if (!qp.proxy().features().topology_global_request_queue && !qp.topology_global_queue_empty()) {
|
||||||
return make_exception_future<std::tuple<::shared_ptr<::cql_transport::event::schema_change>, cql3::cql_warnings_vec>>(
|
co_await coroutine::return_exception(
|
||||||
exceptions::invalid_request_exception("Another global topology request is ongoing, please retry."));
|
exceptions::invalid_request_exception("Another global topology request is ongoing, please retry."));
|
||||||
|
}
|
||||||
|
if (qp.proxy().features().rack_list_rf && co_await qp.ongoing_rf_change(mc.guard(),_name)) {
|
||||||
|
co_await coroutine::return_exception(
|
||||||
|
exceptions::invalid_request_exception(format("Another RF change for this keyspace {} ongoing, please retry.", _name)));
|
||||||
}
|
}
|
||||||
qp.db().real_database().validate_keyspace_update(*ks_md_update);
|
qp.db().real_database().validate_keyspace_update(*ks_md_update);
|
||||||
|
|
||||||
service::topology_mutation_builder builder(ts);
|
service::topology_mutation_builder builder(ts);
|
||||||
service::topology_request_tracking_mutation_builder rtbuilder{global_request_id, qp.proxy().features().topology_requests_type_column};
|
service::topology_request_tracking_mutation_builder rtbuilder{global_request_id, qp.proxy().features().topology_requests_type_column};
|
||||||
rtbuilder.set("done", false);
|
rtbuilder.set("done", false)
|
||||||
|
.set("start_time", db_clock::now());
|
||||||
if (!qp.proxy().features().topology_global_request_queue) {
|
if (!qp.proxy().features().topology_global_request_queue) {
|
||||||
builder.set_global_topology_request(service::global_topology_request::keyspace_rf_change);
|
builder.set_global_topology_request(service::global_topology_request::keyspace_rf_change);
|
||||||
builder.set_global_topology_request_id(global_request_id);
|
builder.set_global_topology_request_id(global_request_id);
|
||||||
@@ -241,10 +248,15 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
|
|||||||
target_type,
|
target_type,
|
||||||
keyspace());
|
keyspace());
|
||||||
mc.add_mutations(std::move(muts), "CQL alter keyspace");
|
mc.add_mutations(std::move(muts), "CQL alter keyspace");
|
||||||
return make_ready_future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, cql3::cql_warnings_vec>>(std::make_tuple(std::move(ret), warnings));
|
co_return std::make_tuple(std::move(ret), warnings);
|
||||||
} catch (data_dictionary::no_such_keyspace& e) {
|
} catch (data_dictionary::no_such_keyspace& e) {
|
||||||
return make_exception_future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, cql3::cql_warnings_vec>>(exceptions::invalid_request_exception("Unknown keyspace " + _name));
|
unknown_keyspace = true;
|
||||||
}
|
}
|
||||||
|
if (unknown_keyspace) {
|
||||||
|
co_await coroutine::return_exception(
|
||||||
|
exceptions::invalid_request_exception("Unknown keyspace " + _name));
|
||||||
|
}
|
||||||
|
std::unreachable();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<cql3::statements::prepared_statement>
|
std::unique_ptr<cql3::statements::prepared_statement>
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ expand_to_racks(const locator::token_metadata& tm,
|
|||||||
|
|
||||||
// Handle ALTER:
|
// Handle ALTER:
|
||||||
// ([]|0) -> numeric is allowed, there are no existing replicas
|
// ([]|0) -> numeric is allowed, there are no existing replicas
|
||||||
// numeric -> numeric' is not supported. User should convert RF to rack list of equal count first.
|
// numeric -> numeric' is not supported unless numeric == numeric'. User should convert RF to rack list of equal count first.
|
||||||
// rack_list -> len(rack_list) is allowed (no-op)
|
// rack_list -> len(rack_list) is allowed (no-op)
|
||||||
// rack_list -> numeric is not allowed
|
// rack_list -> numeric is not allowed
|
||||||
if (old_options.contains(dc)) {
|
if (old_options.contains(dc)) {
|
||||||
@@ -75,6 +75,8 @@ expand_to_racks(const locator::token_metadata& tm,
|
|||||||
"Cannot change replication factor for '{}' from {} to numeric {}, use rack list instead",
|
"Cannot change replication factor for '{}' from {} to numeric {}, use rack list instead",
|
||||||
dc, old_rf_val, data.count()));
|
dc, old_rf_val, data.count()));
|
||||||
}
|
}
|
||||||
|
} else if (old_rf.count() == data.count()) {
|
||||||
|
return rf;
|
||||||
} else if (old_rf.count() > 0) {
|
} else if (old_rf.count() > 0) {
|
||||||
throw exceptions::configuration_exception(fmt::format(
|
throw exceptions::configuration_exception(fmt::format(
|
||||||
"Cannot change replication factor for '{}' from {} to {}, only rack list is allowed",
|
"Cannot change replication factor for '{}' from {} to {}, only rack list is allowed",
|
||||||
@@ -153,6 +155,8 @@ static locator::replication_strategy_config_options prepare_options(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Validate options.
|
// Validate options.
|
||||||
|
bool numeric_to_rack_list_transition = false;
|
||||||
|
bool rf_change = false;
|
||||||
for (auto&& [dc, opt] : options) {
|
for (auto&& [dc, opt] : options) {
|
||||||
locator::replication_factor_data rf(opt);
|
locator::replication_factor_data rf(opt);
|
||||||
|
|
||||||
@@ -162,6 +166,7 @@ static locator::replication_strategy_config_options prepare_options(
|
|||||||
old_rf = locator::replication_factor_data(i->second);
|
old_rf = locator::replication_factor_data(i->second);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rf_change = rf_change || (old_rf && old_rf->count() != rf.count()) || (!old_rf && rf.count() != 0);
|
||||||
if (!rf.is_rack_based()) {
|
if (!rf.is_rack_based()) {
|
||||||
if (old_rf && old_rf->is_rack_based() && rf.count() != 0) {
|
if (old_rf && old_rf->is_rack_based() && rf.count() != 0) {
|
||||||
if (old_rf->count() != rf.count()) {
|
if (old_rf->count() != rf.count()) {
|
||||||
@@ -187,12 +192,11 @@ static locator::replication_strategy_config_options prepare_options(
|
|||||||
throw exceptions::configuration_exception(fmt::format(
|
throw exceptions::configuration_exception(fmt::format(
|
||||||
"Rack list for '{}' contains duplicate entries", dc));
|
"Rack list for '{}' contains duplicate entries", dc));
|
||||||
}
|
}
|
||||||
if (old_rf && !old_rf->is_rack_based() && old_rf->count() != 0) {
|
numeric_to_rack_list_transition = numeric_to_rack_list_transition || (old_rf && !old_rf->is_rack_based() && old_rf->count() != 0);
|
||||||
// FIXME: Allow this if replicas already conform to the given rack list.
|
}
|
||||||
// FIXME: Implement automatic colocation to allow transition to rack list.
|
|
||||||
throw exceptions::configuration_exception(fmt::format(
|
if (numeric_to_rack_list_transition && rf_change) {
|
||||||
"Cannot change replication factor from numeric to rack list for '{}'", dc));
|
throw exceptions::configuration_exception("Cannot change replication factor from numeric to rack list and rf value at the same time");
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!rf && options.empty() && old_options.empty()) {
|
if (!rf && options.empty() && old_options.empty()) {
|
||||||
@@ -412,7 +416,7 @@ lw_shared_ptr<data_dictionary::keyspace_metadata> ks_prop_defs::as_ks_metadata(s
|
|||||||
? std::optional<unsigned>(0) : std::nullopt;
|
? std::optional<unsigned>(0) : std::nullopt;
|
||||||
auto initial_tablets = get_initial_tablets(default_initial_tablets, cfg.enforce_tablets());
|
auto initial_tablets = get_initial_tablets(default_initial_tablets, cfg.enforce_tablets());
|
||||||
bool uses_tablets = initial_tablets.has_value();
|
bool uses_tablets = initial_tablets.has_value();
|
||||||
bool rack_list_enabled = feat.rack_list_rf;
|
bool rack_list_enabled = utils::get_local_injector().enter("create_with_numeric") ? false : feat.rack_list_rf;
|
||||||
auto options = prepare_options(sc, tm, cfg.rf_rack_valid_keyspaces(), get_replication_options(), {}, rack_list_enabled, uses_tablets);
|
auto options = prepare_options(sc, tm, cfg.rf_rack_valid_keyspaces(), get_replication_options(), {}, rack_list_enabled, uses_tablets);
|
||||||
return data_dictionary::keyspace_metadata::new_keyspace(ks_name, sc,
|
return data_dictionary::keyspace_metadata::new_keyspace(ks_name, sc,
|
||||||
std::move(options), initial_tablets, get_consistency_option(), get_boolean(KW_DURABLE_WRITES, true), get_storage_options());
|
std::move(options), initial_tablets, get_consistency_option(), get_boolean(KW_DURABLE_WRITES, true), get_storage_options());
|
||||||
@@ -428,7 +432,7 @@ lw_shared_ptr<data_dictionary::keyspace_metadata> ks_prop_defs::as_ks_metadata_u
|
|||||||
throw exceptions::invalid_request_exception("Cannot alter replication strategy vnode/tablets flavor");
|
throw exceptions::invalid_request_exception("Cannot alter replication strategy vnode/tablets flavor");
|
||||||
}
|
}
|
||||||
auto sc = get_replication_strategy_class();
|
auto sc = get_replication_strategy_class();
|
||||||
bool rack_list_enabled = feat.rack_list_rf;
|
bool rack_list_enabled = utils::get_local_injector().enter("create_with_numeric") ? false : feat.rack_list_rf;
|
||||||
if (sc) {
|
if (sc) {
|
||||||
options = prepare_options(*sc, tm, cfg.rf_rack_valid_keyspaces(), get_replication_options(), old_options, rack_list_enabled, uses_tablets);
|
options = prepare_options(*sc, tm, cfg.rf_rack_valid_keyspaces(), get_replication_options(), old_options, rack_list_enabled, uses_tablets);
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -1976,7 +1976,7 @@ mutation_fragments_select_statement::do_execute(query_processor& qp, service::qu
|
|||||||
if (it == indexes.end()) {
|
if (it == indexes.end()) {
|
||||||
throw exceptions::invalid_request_exception("ANN ordering by vector requires the column to be indexed using 'vector_index'");
|
throw exceptions::invalid_request_exception("ANN ordering by vector requires the column to be indexed using 'vector_index'");
|
||||||
}
|
}
|
||||||
if (index_opt || parameters->allow_filtering() || restrictions->need_filtering() || check_needs_allow_filtering_anyway(*restrictions)) {
|
if (index_opt || parameters->allow_filtering() || !(restrictions->is_empty()) || check_needs_allow_filtering_anyway(*restrictions)) {
|
||||||
throw exceptions::invalid_request_exception("ANN ordering by vector does not support filtering");
|
throw exceptions::invalid_request_exception("ANN ordering by vector does not support filtering");
|
||||||
}
|
}
|
||||||
index_opt = *it;
|
index_opt = *it;
|
||||||
|
|||||||
@@ -42,6 +42,11 @@ table::get_index_manager() const {
|
|||||||
return _ops->get_index_manager(*this);
|
return _ops->get_index_manager(*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
db_clock::time_point
|
||||||
|
table::get_truncation_time() const {
|
||||||
|
return _ops->get_truncation_time(*this);
|
||||||
|
}
|
||||||
|
|
||||||
lw_shared_ptr<keyspace_metadata>
|
lw_shared_ptr<keyspace_metadata>
|
||||||
keyspace::metadata() const {
|
keyspace::metadata() const {
|
||||||
return _ops->get_keyspace_metadata(*this);
|
return _ops->get_keyspace_metadata(*this);
|
||||||
|
|||||||
@@ -77,6 +77,7 @@ public:
|
|||||||
schema_ptr schema() const;
|
schema_ptr schema() const;
|
||||||
const std::vector<view_ptr>& views() const;
|
const std::vector<view_ptr>& views() const;
|
||||||
const secondary_index::secondary_index_manager& get_index_manager() const;
|
const secondary_index::secondary_index_manager& get_index_manager() const;
|
||||||
|
db_clock::time_point get_truncation_time() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
class keyspace {
|
class keyspace {
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ public:
|
|||||||
virtual std::optional<table> try_find_table(database db, table_id id) const = 0;
|
virtual std::optional<table> try_find_table(database db, table_id id) const = 0;
|
||||||
virtual const secondary_index::secondary_index_manager& get_index_manager(table t) const = 0;
|
virtual const secondary_index::secondary_index_manager& get_index_manager(table t) const = 0;
|
||||||
virtual schema_ptr get_table_schema(table t) const = 0;
|
virtual schema_ptr get_table_schema(table t) const = 0;
|
||||||
|
virtual db_clock::time_point get_truncation_time(table t) const = 0;
|
||||||
virtual lw_shared_ptr<keyspace_metadata> get_keyspace_metadata(keyspace ks) const = 0;
|
virtual lw_shared_ptr<keyspace_metadata> get_keyspace_metadata(keyspace ks) const = 0;
|
||||||
virtual bool is_internal(keyspace ks) const = 0;
|
virtual bool is_internal(keyspace ks) const = 0;
|
||||||
virtual const locator::abstract_replication_strategy& get_replication_strategy(keyspace ks) const = 0;
|
virtual const locator::abstract_replication_strategy& get_replication_strategy(keyspace ks) const = 0;
|
||||||
|
|||||||
20
db/batchlog.hh
Normal file
20
db/batchlog.hh
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2025-present ScyllaDB
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "mutation/mutation.hh"
|
||||||
|
#include "utils/UUID.hh"
|
||||||
|
|
||||||
|
namespace db {
|
||||||
|
|
||||||
|
mutation get_batchlog_mutation_for(schema_ptr schema, const utils::chunked_vector<mutation>& mutations, int32_t version, db_clock::time_point now, const utils::UUID& id);
|
||||||
|
|
||||||
|
mutation get_batchlog_delete_mutation(schema_ptr schema, int32_t version, db_clock::time_point now, const utils::UUID& id);
|
||||||
|
|
||||||
|
}
|
||||||
@@ -10,6 +10,7 @@
|
|||||||
|
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <exception>
|
#include <exception>
|
||||||
|
#include <ranges>
|
||||||
#include <seastar/core/future-util.hh>
|
#include <seastar/core/future-util.hh>
|
||||||
#include <seastar/core/do_with.hh>
|
#include <seastar/core/do_with.hh>
|
||||||
#include <seastar/core/semaphore.hh>
|
#include <seastar/core/semaphore.hh>
|
||||||
@@ -18,12 +19,14 @@
|
|||||||
#include <seastar/core/sleep.hh>
|
#include <seastar/core/sleep.hh>
|
||||||
|
|
||||||
#include "batchlog_manager.hh"
|
#include "batchlog_manager.hh"
|
||||||
|
#include "batchlog.hh"
|
||||||
#include "data_dictionary/data_dictionary.hh"
|
#include "data_dictionary/data_dictionary.hh"
|
||||||
#include "mutation/canonical_mutation.hh"
|
#include "mutation/canonical_mutation.hh"
|
||||||
#include "service/storage_proxy.hh"
|
#include "service/storage_proxy.hh"
|
||||||
#include "system_keyspace.hh"
|
#include "system_keyspace.hh"
|
||||||
#include "utils/rate_limiter.hh"
|
#include "utils/rate_limiter.hh"
|
||||||
#include "utils/log.hh"
|
#include "utils/log.hh"
|
||||||
|
#include "utils/murmur_hash.hh"
|
||||||
#include "db_clock.hh"
|
#include "db_clock.hh"
|
||||||
#include "unimplemented.hh"
|
#include "unimplemented.hh"
|
||||||
#include "idl/frozen_schema.dist.hh"
|
#include "idl/frozen_schema.dist.hh"
|
||||||
@@ -33,17 +36,94 @@
|
|||||||
#include "cql3/untyped_result_set.hh"
|
#include "cql3/untyped_result_set.hh"
|
||||||
#include "service_permit.hh"
|
#include "service_permit.hh"
|
||||||
#include "cql3/query_processor.hh"
|
#include "cql3/query_processor.hh"
|
||||||
#include "replica/database.hh"
|
|
||||||
|
|
||||||
static logging::logger blogger("batchlog_manager");
|
static logging::logger blogger("batchlog_manager");
|
||||||
|
|
||||||
|
namespace db {
|
||||||
|
|
||||||
|
// Yields 256 batchlog shards. Even on the largest nodes we currently run on,
|
||||||
|
// this should be enough to give every core a batchlog partition.
|
||||||
|
static constexpr unsigned batchlog_shard_bits = 8;
|
||||||
|
|
||||||
|
int32_t batchlog_shard_of(db_clock::time_point written_at) {
|
||||||
|
const int64_t count = written_at.time_since_epoch().count();
|
||||||
|
std::array<uint64_t, 2> result;
|
||||||
|
utils::murmur_hash::hash3_x64_128(bytes_view(reinterpret_cast<const signed char*>(&count), sizeof(count)), 0, result);
|
||||||
|
uint64_t hash = result[0] ^ result[1];
|
||||||
|
return hash & ((1ULL << batchlog_shard_bits) - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<partition_key, clustering_key>
|
||||||
|
get_batchlog_key(const schema& schema, int32_t version, db::batchlog_stage stage, int32_t batchlog_shard, db_clock::time_point written_at, std::optional<utils::UUID> id) {
|
||||||
|
auto pkey = partition_key::from_exploded(schema, {serialized(version), serialized(int8_t(stage)), serialized(batchlog_shard)});
|
||||||
|
|
||||||
|
std::vector<bytes> ckey_components;
|
||||||
|
ckey_components.reserve(2);
|
||||||
|
ckey_components.push_back(serialized(written_at));
|
||||||
|
if (id) {
|
||||||
|
ckey_components.push_back(serialized(*id));
|
||||||
|
}
|
||||||
|
auto ckey = clustering_key::from_exploded(schema, ckey_components);
|
||||||
|
|
||||||
|
return {std::move(pkey), std::move(ckey)};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<partition_key, clustering_key>
|
||||||
|
get_batchlog_key(const schema& schema, int32_t version, db::batchlog_stage stage, db_clock::time_point written_at, std::optional<utils::UUID> id) {
|
||||||
|
return get_batchlog_key(schema, version, stage, batchlog_shard_of(written_at), written_at, id);
|
||||||
|
}
|
||||||
|
|
||||||
|
mutation get_batchlog_mutation_for(schema_ptr schema, managed_bytes data, int32_t version, db::batchlog_stage stage, db_clock::time_point now, const utils::UUID& id) {
|
||||||
|
auto [key, ckey] = get_batchlog_key(*schema, version, stage, now, id);
|
||||||
|
|
||||||
|
auto timestamp = api::new_timestamp();
|
||||||
|
|
||||||
|
mutation m(schema, key);
|
||||||
|
// Avoid going through data_value and therefore `bytes`, as it can be large (#24809).
|
||||||
|
auto cdef_data = schema->get_column_definition(to_bytes("data"));
|
||||||
|
m.set_cell(ckey, *cdef_data, atomic_cell::make_live(*cdef_data->type, timestamp, std::move(data)));
|
||||||
|
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
mutation get_batchlog_mutation_for(schema_ptr schema, const utils::chunked_vector<mutation>& mutations, int32_t version, db::batchlog_stage stage, db_clock::time_point now, const utils::UUID& id) {
|
||||||
|
auto data = [&mutations] {
|
||||||
|
utils::chunked_vector<canonical_mutation> fm(mutations.begin(), mutations.end());
|
||||||
|
bytes_ostream out;
|
||||||
|
for (auto& m : fm) {
|
||||||
|
ser::serialize(out, m);
|
||||||
|
}
|
||||||
|
return std::move(out).to_managed_bytes();
|
||||||
|
}();
|
||||||
|
|
||||||
|
return get_batchlog_mutation_for(std::move(schema), std::move(data), version, stage, now, id);
|
||||||
|
}
|
||||||
|
|
||||||
|
mutation get_batchlog_mutation_for(schema_ptr schema, const utils::chunked_vector<mutation>& mutations, int32_t version, db_clock::time_point now, const utils::UUID& id) {
|
||||||
|
return get_batchlog_mutation_for(std::move(schema), mutations, version, batchlog_stage::initial, now, id);
|
||||||
|
}
|
||||||
|
|
||||||
|
mutation get_batchlog_delete_mutation(schema_ptr schema, int32_t version, db::batchlog_stage stage, db_clock::time_point now, const utils::UUID& id) {
|
||||||
|
auto [key, ckey] = get_batchlog_key(*schema, version, stage, now, id);
|
||||||
|
mutation m(schema, key);
|
||||||
|
auto timestamp = api::new_timestamp();
|
||||||
|
m.partition().apply_delete(*schema, ckey, tombstone(timestamp, gc_clock::now()));
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
mutation get_batchlog_delete_mutation(schema_ptr schema, int32_t version, db_clock::time_point now, const utils::UUID& id) {
|
||||||
|
return get_batchlog_delete_mutation(std::move(schema), version, batchlog_stage::initial, now, id);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace db
|
||||||
|
|
||||||
const std::chrono::seconds db::batchlog_manager::replay_interval;
|
const std::chrono::seconds db::batchlog_manager::replay_interval;
|
||||||
const uint32_t db::batchlog_manager::page_size;
|
const uint32_t db::batchlog_manager::page_size;
|
||||||
|
|
||||||
db::batchlog_manager::batchlog_manager(cql3::query_processor& qp, db::system_keyspace& sys_ks, batchlog_manager_config config)
|
db::batchlog_manager::batchlog_manager(cql3::query_processor& qp, db::system_keyspace& sys_ks, batchlog_manager_config config)
|
||||||
: _qp(qp)
|
: _qp(qp)
|
||||||
, _sys_ks(sys_ks)
|
, _sys_ks(sys_ks)
|
||||||
, _write_request_timeout(std::chrono::duration_cast<db_clock::duration>(config.write_request_timeout))
|
, _replay_timeout(config.replay_timeout)
|
||||||
, _replay_rate(config.replay_rate)
|
, _replay_rate(config.replay_rate)
|
||||||
, _delay(config.delay)
|
, _delay(config.delay)
|
||||||
, _replay_cleanup_after_replays(config.replay_cleanup_after_replays)
|
, _replay_cleanup_after_replays(config.replay_cleanup_after_replays)
|
||||||
@@ -152,18 +232,75 @@ future<> db::batchlog_manager::stop() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
future<size_t> db::batchlog_manager::count_all_batches() const {
|
future<size_t> db::batchlog_manager::count_all_batches() const {
|
||||||
sstring query = format("SELECT count(*) FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG);
|
sstring query = format("SELECT count(*) FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG_V2);
|
||||||
return _qp.execute_internal(query, cql3::query_processor::cache_internal::yes).then([](::shared_ptr<cql3::untyped_result_set> rs) {
|
return _qp.execute_internal(query, cql3::query_processor::cache_internal::yes).then([](::shared_ptr<cql3::untyped_result_set> rs) {
|
||||||
return size_t(rs->one().get_as<int64_t>("count"));
|
return size_t(rs->one().get_as<int64_t>("count"));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
db_clock::duration db::batchlog_manager::get_batch_log_timeout() const {
|
future<> db::batchlog_manager::maybe_migrate_v1_to_v2() {
|
||||||
// enough time for the actual write + BM removal mutation
|
if (_migration_done) {
|
||||||
return _write_request_timeout * 2;
|
return make_ready_future<>();
|
||||||
|
}
|
||||||
|
return with_gate(_gate, [this] () mutable -> future<> {
|
||||||
|
blogger.info("Migrating batchlog entries from v1 -> v2");
|
||||||
|
|
||||||
|
auto schema_v1 = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG);
|
||||||
|
auto schema_v2 = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG_V2);
|
||||||
|
|
||||||
|
auto batch = [this, schema_v1, schema_v2] (const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
|
||||||
|
// check version of serialization format
|
||||||
|
if (!row.has("version")) {
|
||||||
|
blogger.warn("Not migrating logged batch because of unknown version");
|
||||||
|
co_return stop_iteration::no;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto version = row.get_as<int32_t>("version");
|
||||||
|
if (version != netw::messaging_service::current_version) {
|
||||||
|
blogger.warn("Not migrating logged batch because of incorrect version");
|
||||||
|
co_return stop_iteration::no;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto id = row.get_as<utils::UUID>("id");
|
||||||
|
auto written_at = row.get_as<db_clock::time_point>("written_at");
|
||||||
|
auto data = row.get_blob_fragmented("data");
|
||||||
|
|
||||||
|
auto& sp = _qp.proxy();
|
||||||
|
|
||||||
|
utils::get_local_injector().inject("batchlog_manager_fail_migration", [] { throw std::runtime_error("Error injection: failing batchlog migration"); });
|
||||||
|
|
||||||
|
auto migrate_mut = get_batchlog_mutation_for(schema_v2, std::move(data), version, batchlog_stage::failed_replay, written_at, id);
|
||||||
|
co_await sp.mutate_locally(migrate_mut, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
|
||||||
|
|
||||||
|
mutation delete_mut(schema_v1, partition_key::from_single_value(*schema_v1, serialized(id)));
|
||||||
|
delete_mut.partition().apply_delete(*schema_v1, clustering_key_prefix::make_empty(), tombstone(api::new_timestamp(), gc_clock::now()));
|
||||||
|
co_await sp.mutate_locally(delete_mut, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
|
||||||
|
|
||||||
|
co_return stop_iteration::no;
|
||||||
|
};
|
||||||
|
try {
|
||||||
|
co_await _qp.query_internal(
|
||||||
|
format("SELECT * FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG),
|
||||||
|
db::consistency_level::ONE,
|
||||||
|
{},
|
||||||
|
page_size,
|
||||||
|
std::move(batch));
|
||||||
|
} catch (...) {
|
||||||
|
blogger.warn("Batchlog v1 to v2 migration failed: {}; will retry", std::current_exception());
|
||||||
|
co_return;
|
||||||
|
}
|
||||||
|
|
||||||
|
co_await container().invoke_on_all([] (auto& bm) {
|
||||||
|
bm._migration_done = true;
|
||||||
|
});
|
||||||
|
|
||||||
|
blogger.info("Done migrating batchlog entries from v1 -> v2");
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches(post_replay_cleanup cleanup) {
|
future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches(post_replay_cleanup cleanup) {
|
||||||
|
co_await maybe_migrate_v1_to_v2();
|
||||||
|
|
||||||
typedef db_clock::rep clock_type;
|
typedef db_clock::rep clock_type;
|
||||||
|
|
||||||
db::all_batches_replayed all_replayed = all_batches_replayed::yes;
|
db::all_batches_replayed all_replayed = all_batches_replayed::yes;
|
||||||
@@ -172,21 +309,26 @@ future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches
|
|||||||
auto throttle = _replay_rate / _qp.proxy().get_token_metadata_ptr()->count_normal_token_owners();
|
auto throttle = _replay_rate / _qp.proxy().get_token_metadata_ptr()->count_normal_token_owners();
|
||||||
auto limiter = make_lw_shared<utils::rate_limiter>(throttle);
|
auto limiter = make_lw_shared<utils::rate_limiter>(throttle);
|
||||||
|
|
||||||
auto schema = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG);
|
auto schema = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG_V2);
|
||||||
auto delete_batch = [this, schema = std::move(schema)] (utils::UUID id) {
|
|
||||||
auto key = partition_key::from_singular(*schema, id);
|
struct replay_stats {
|
||||||
mutation m(schema, key);
|
std::optional<db_clock::time_point> min_too_fresh;
|
||||||
auto now = service::client_state(service::client_state::internal_tag()).get_timestamp();
|
bool need_cleanup = false;
|
||||||
m.partition().apply_delete(*schema, clustering_key_prefix::make_empty(), tombstone(now, gc_clock::now()));
|
|
||||||
return _qp.proxy().mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
auto batch = [this, limiter, delete_batch = std::move(delete_batch), &all_replayed](const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
|
std::unordered_map<int32_t, replay_stats> replay_stats_per_shard;
|
||||||
|
|
||||||
|
// Use a stable `now` accross all batches, so skip/replay decisions are the
|
||||||
|
// same accross a while prefix of written_at (accross all ids).
|
||||||
|
const auto now = db_clock::now();
|
||||||
|
|
||||||
|
auto batch = [this, cleanup, limiter, schema, &all_replayed, &replay_stats_per_shard, now] (const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
|
||||||
|
const auto stage = static_cast<batchlog_stage>(row.get_as<int8_t>("stage"));
|
||||||
|
const auto batch_shard = row.get_as<int32_t>("shard");
|
||||||
auto written_at = row.get_as<db_clock::time_point>("written_at");
|
auto written_at = row.get_as<db_clock::time_point>("written_at");
|
||||||
auto id = row.get_as<utils::UUID>("id");
|
auto id = row.get_as<utils::UUID>("id");
|
||||||
// enough time for the actual write + batchlog entry mutation delivery (two separate requests).
|
// enough time for the actual write + batchlog entry mutation delivery (two separate requests).
|
||||||
auto now = db_clock::now();
|
auto timeout = _replay_timeout;
|
||||||
auto timeout = get_batch_log_timeout();
|
|
||||||
|
|
||||||
if (utils::get_local_injector().is_enabled("skip_batch_replay")) {
|
if (utils::get_local_injector().is_enabled("skip_batch_replay")) {
|
||||||
blogger.debug("Skipping batch replay due to skip_batch_replay injection");
|
blogger.debug("Skipping batch replay due to skip_batch_replay injection");
|
||||||
@@ -194,52 +336,48 @@ future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches
|
|||||||
co_return stop_iteration::no;
|
co_return stop_iteration::no;
|
||||||
}
|
}
|
||||||
|
|
||||||
// check version of serialization format
|
|
||||||
if (!row.has("version")) {
|
|
||||||
blogger.warn("Skipping logged batch because of unknown version");
|
|
||||||
co_await delete_batch(id);
|
|
||||||
co_return stop_iteration::no;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto version = row.get_as<int32_t>("version");
|
|
||||||
if (version != netw::messaging_service::current_version) {
|
|
||||||
blogger.warn("Skipping logged batch because of incorrect version {}; current version = {}", version, netw::messaging_service::current_version);
|
|
||||||
co_await delete_batch(id);
|
|
||||||
co_return stop_iteration::no;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto data = row.get_blob_unfragmented("data");
|
auto data = row.get_blob_unfragmented("data");
|
||||||
|
|
||||||
blogger.debug("Replaying batch {}", id);
|
blogger.debug("Replaying batch {} from stage {} and batch shard {}", id, int32_t(stage), batch_shard);
|
||||||
|
|
||||||
|
utils::chunked_vector<mutation> mutations;
|
||||||
|
bool send_failed = false;
|
||||||
|
|
||||||
|
auto& shard_written_at = replay_stats_per_shard.try_emplace(batch_shard, replay_stats{}).first->second;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
auto fms = make_lw_shared<std::deque<canonical_mutation>>();
|
utils::chunked_vector<std::pair<canonical_mutation, schema_ptr>> fms;
|
||||||
auto in = ser::as_input_stream(data);
|
auto in = ser::as_input_stream(data);
|
||||||
while (in.size()) {
|
while (in.size()) {
|
||||||
fms->emplace_back(ser::deserialize(in, std::type_identity<canonical_mutation>()));
|
auto fm = ser::deserialize(in, std::type_identity<canonical_mutation>());
|
||||||
schema_ptr s = _qp.db().find_schema(fms->back().column_family_id());
|
const auto tbl = _qp.db().try_find_table(fm.column_family_id());
|
||||||
timeout = std::min(timeout, std::chrono::duration_cast<db_clock::duration>(s->tombstone_gc_options().propagation_delay_in_seconds()));
|
if (!tbl) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (written_at <= tbl->get_truncation_time()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
schema_ptr s = tbl->schema();
|
||||||
|
if (s->tombstone_gc_options().mode() == tombstone_gc_mode::repair) {
|
||||||
|
timeout = std::min(timeout, std::chrono::duration_cast<db_clock::duration>(s->tombstone_gc_options().propagation_delay_in_seconds()));
|
||||||
|
}
|
||||||
|
fms.emplace_back(std::move(fm), std::move(s));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (now < written_at + timeout) {
|
if (now < written_at + timeout) {
|
||||||
blogger.debug("Skipping replay of {}, too fresh", id);
|
blogger.debug("Skipping replay of {}, too fresh", id);
|
||||||
|
|
||||||
|
shard_written_at.min_too_fresh = std::min(shard_written_at.min_too_fresh.value_or(written_at), written_at);
|
||||||
|
|
||||||
co_return stop_iteration::no;
|
co_return stop_iteration::no;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto size = data.size();
|
auto size = data.size();
|
||||||
|
|
||||||
auto mutations = co_await map_reduce(*fms, [this, written_at] (canonical_mutation& fm) {
|
for (const auto& [fm, s] : fms) {
|
||||||
const auto& cf = _qp.proxy().local_db().find_column_family(fm.column_family_id());
|
mutations.emplace_back(fm.to_mutation(s));
|
||||||
return make_ready_future<canonical_mutation*>(written_at > cf.get_truncation_time() ? &fm : nullptr);
|
co_await maybe_yield();
|
||||||
},
|
}
|
||||||
utils::chunked_vector<mutation>(),
|
|
||||||
[this] (utils::chunked_vector<mutation> mutations, canonical_mutation* fm) {
|
|
||||||
if (fm) {
|
|
||||||
schema_ptr s = _qp.db().find_schema(fm->column_family_id());
|
|
||||||
mutations.emplace_back(fm->to_mutation(s));
|
|
||||||
}
|
|
||||||
return mutations;
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!mutations.empty()) {
|
if (!mutations.empty()) {
|
||||||
const auto ttl = [written_at]() -> clock_type {
|
const auto ttl = [written_at]() -> clock_type {
|
||||||
@@ -265,7 +403,11 @@ future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches
|
|||||||
co_await limiter->reserve(size);
|
co_await limiter->reserve(size);
|
||||||
_stats.write_attempts += mutations.size();
|
_stats.write_attempts += mutations.size();
|
||||||
auto timeout = db::timeout_clock::now() + write_timeout;
|
auto timeout = db::timeout_clock::now() + write_timeout;
|
||||||
co_await _qp.proxy().send_batchlog_replay_to_all_replicas(std::move(mutations), timeout);
|
if (cleanup) {
|
||||||
|
co_await _qp.proxy().send_batchlog_replay_to_all_replicas(mutations, timeout);
|
||||||
|
} else {
|
||||||
|
co_await _qp.proxy().send_batchlog_replay_to_all_replicas(std::move(mutations), timeout);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (data_dictionary::no_such_keyspace& ex) {
|
} catch (data_dictionary::no_such_keyspace& ex) {
|
||||||
@@ -279,31 +421,80 @@ future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches
|
|||||||
// Do _not_ remove the batch, assuning we got a node write error.
|
// Do _not_ remove the batch, assuning we got a node write error.
|
||||||
// Since we don't have hints (which origin is satisfied with),
|
// Since we don't have hints (which origin is satisfied with),
|
||||||
// we have to resort to keeping this batch to next lap.
|
// we have to resort to keeping this batch to next lap.
|
||||||
co_return stop_iteration::no;
|
if (!cleanup || stage == batchlog_stage::failed_replay) {
|
||||||
|
co_return stop_iteration::no;
|
||||||
|
}
|
||||||
|
send_failed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto& sp = _qp.proxy();
|
||||||
|
|
||||||
|
if (send_failed) {
|
||||||
|
blogger.debug("Moving batch {} to stage failed_replay", id);
|
||||||
|
auto m = get_batchlog_mutation_for(schema, mutations, netw::messaging_service::current_version, batchlog_stage::failed_replay, written_at, id);
|
||||||
|
co_await sp.mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
|
||||||
|
}
|
||||||
|
|
||||||
// delete batch
|
// delete batch
|
||||||
co_await delete_batch(id);
|
auto m = get_batchlog_delete_mutation(schema, netw::messaging_service::current_version, stage, written_at, id);
|
||||||
|
co_await _qp.proxy().mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
|
||||||
|
|
||||||
|
shard_written_at.need_cleanup = true;
|
||||||
|
|
||||||
co_return stop_iteration::no;
|
co_return stop_iteration::no;
|
||||||
};
|
};
|
||||||
|
|
||||||
co_await with_gate(_gate, [this, cleanup, batch = std::move(batch)] () mutable -> future<> {
|
co_await with_gate(_gate, [this, cleanup, &all_replayed, batch = std::move(batch), now, &replay_stats_per_shard] () mutable -> future<> {
|
||||||
blogger.debug("Started replayAllFailedBatches (cpu {})", this_shard_id());
|
blogger.debug("Started replayAllFailedBatches with cleanup: {}", cleanup);
|
||||||
co_await utils::get_local_injector().inject("add_delay_to_batch_replay", std::chrono::milliseconds(1000));
|
co_await utils::get_local_injector().inject("add_delay_to_batch_replay", std::chrono::milliseconds(1000));
|
||||||
co_await _qp.query_internal(
|
|
||||||
format("SELECT id, data, written_at, version FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG),
|
auto schema = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG_V2);
|
||||||
db::consistency_level::ONE,
|
|
||||||
{},
|
co_await coroutine::parallel_for_each(std::views::iota(0, 16), [&] (int32_t chunk) -> future<> {
|
||||||
page_size,
|
const int32_t batchlog_chunk_base = chunk * 16;
|
||||||
std::move(batch)).then([this, cleanup] {
|
for (int32_t i = 0; i < 16; ++i) {
|
||||||
if (cleanup == post_replay_cleanup::no) {
|
int32_t batchlog_shard = batchlog_chunk_base + i;
|
||||||
return make_ready_future<>();
|
|
||||||
|
co_await _qp.query_internal(
|
||||||
|
format("SELECT * FROM {}.{} WHERE version = ? AND stage = ? AND shard = ? BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG_V2),
|
||||||
|
db::consistency_level::ONE,
|
||||||
|
{data_value(netw::messaging_service::current_version), data_value(int8_t(batchlog_stage::failed_replay)), data_value(batchlog_shard)},
|
||||||
|
page_size,
|
||||||
|
batch);
|
||||||
|
|
||||||
|
co_await _qp.query_internal(
|
||||||
|
format("SELECT * FROM {}.{} WHERE version = ? AND stage = ? AND shard = ? BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG_V2),
|
||||||
|
db::consistency_level::ONE,
|
||||||
|
{data_value(netw::messaging_service::current_version), data_value(int8_t(batchlog_stage::initial)), data_value(batchlog_shard)},
|
||||||
|
page_size,
|
||||||
|
batch);
|
||||||
|
|
||||||
|
if (cleanup != post_replay_cleanup::yes) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto it = replay_stats_per_shard.find(batchlog_shard);
|
||||||
|
if (it == replay_stats_per_shard.end() || !it->second.need_cleanup) {
|
||||||
|
// Nothing was replayed on this batchlog shard, nothing to cleanup.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto write_time = it->second.min_too_fresh.value_or(now - _replay_timeout);
|
||||||
|
const auto end_weight = it->second.min_too_fresh ? bound_weight::before_all_prefixed : bound_weight::after_all_prefixed;
|
||||||
|
auto [key, ckey] = get_batchlog_key(*schema, netw::messaging_service::current_version, batchlog_stage::initial, batchlog_shard, write_time, {});
|
||||||
|
auto end_pos = position_in_partition(partition_region::clustered, end_weight, std::move(ckey));
|
||||||
|
|
||||||
|
range_tombstone rt(position_in_partition::before_all_clustered_rows(), std::move(end_pos), tombstone(api::new_timestamp(), gc_clock::now()));
|
||||||
|
|
||||||
|
blogger.trace("Clean up batchlog shard {} with range tombstone {}", batchlog_shard, rt);
|
||||||
|
|
||||||
|
mutation m(schema, key);
|
||||||
|
m.partition().apply_row_tombstone(*schema, std::move(rt));
|
||||||
|
co_await _qp.proxy().mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
|
||||||
}
|
}
|
||||||
// Replaying batches could have generated tombstones, flush to disk,
|
|
||||||
// where they can be compacted away.
|
|
||||||
return replica::database::flush_table_on_all_shards(_qp.proxy().get_db(), system_keyspace::NAME, system_keyspace::BATCHLOG);
|
|
||||||
}).then([] {
|
|
||||||
blogger.debug("Finished replayAllFailedBatches");
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
blogger.debug("Finished replayAllFailedBatches with all_replayed: {}", all_replayed);
|
||||||
});
|
});
|
||||||
|
|
||||||
co_return all_replayed;
|
co_return all_replayed;
|
||||||
|
|||||||
@@ -34,12 +34,17 @@ class system_keyspace;
|
|||||||
using all_batches_replayed = bool_class<struct all_batches_replayed_tag>;
|
using all_batches_replayed = bool_class<struct all_batches_replayed_tag>;
|
||||||
|
|
||||||
struct batchlog_manager_config {
|
struct batchlog_manager_config {
|
||||||
std::chrono::duration<double> write_request_timeout;
|
db_clock::duration replay_timeout;
|
||||||
uint64_t replay_rate = std::numeric_limits<uint64_t>::max();
|
uint64_t replay_rate = std::numeric_limits<uint64_t>::max();
|
||||||
std::chrono::milliseconds delay = std::chrono::milliseconds(0);
|
std::chrono::milliseconds delay = std::chrono::milliseconds(0);
|
||||||
unsigned replay_cleanup_after_replays;
|
unsigned replay_cleanup_after_replays;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class batchlog_stage : int8_t {
|
||||||
|
initial,
|
||||||
|
failed_replay
|
||||||
|
};
|
||||||
|
|
||||||
class batchlog_manager : public peering_sharded_service<batchlog_manager> {
|
class batchlog_manager : public peering_sharded_service<batchlog_manager> {
|
||||||
public:
|
public:
|
||||||
using post_replay_cleanup = bool_class<class post_replay_cleanup_tag>;
|
using post_replay_cleanup = bool_class<class post_replay_cleanup_tag>;
|
||||||
@@ -59,7 +64,7 @@ private:
|
|||||||
|
|
||||||
cql3::query_processor& _qp;
|
cql3::query_processor& _qp;
|
||||||
db::system_keyspace& _sys_ks;
|
db::system_keyspace& _sys_ks;
|
||||||
db_clock::duration _write_request_timeout;
|
db_clock::duration _replay_timeout;
|
||||||
uint64_t _replay_rate;
|
uint64_t _replay_rate;
|
||||||
std::chrono::milliseconds _delay;
|
std::chrono::milliseconds _delay;
|
||||||
unsigned _replay_cleanup_after_replays = 100;
|
unsigned _replay_cleanup_after_replays = 100;
|
||||||
@@ -71,6 +76,14 @@ private:
|
|||||||
|
|
||||||
gc_clock::time_point _last_replay;
|
gc_clock::time_point _last_replay;
|
||||||
|
|
||||||
|
// Was the v1 -> v2 migration already done since last restart?
|
||||||
|
// The migration is attempted once after each restart. This is redundant but
|
||||||
|
// keeps thing simple. Once no upgrade path exists from a ScyllaDB version
|
||||||
|
// which can still produce v1 entries, this migration code can be removed.
|
||||||
|
bool _migration_done = false;
|
||||||
|
|
||||||
|
future<> maybe_migrate_v1_to_v2();
|
||||||
|
|
||||||
future<all_batches_replayed> replay_all_failed_batches(post_replay_cleanup cleanup);
|
future<all_batches_replayed> replay_all_failed_batches(post_replay_cleanup cleanup);
|
||||||
public:
|
public:
|
||||||
// Takes a QP, not a distributes. Because this object is supposed
|
// Takes a QP, not a distributes. Because this object is supposed
|
||||||
@@ -85,10 +98,13 @@ public:
|
|||||||
future<all_batches_replayed> do_batch_log_replay(post_replay_cleanup cleanup);
|
future<all_batches_replayed> do_batch_log_replay(post_replay_cleanup cleanup);
|
||||||
|
|
||||||
future<size_t> count_all_batches() const;
|
future<size_t> count_all_batches() const;
|
||||||
db_clock::duration get_batch_log_timeout() const;
|
|
||||||
gc_clock::time_point get_last_replay() const {
|
gc_clock::time_point get_last_replay() const {
|
||||||
return _last_replay;
|
return _last_replay;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const stats& stats() const {
|
||||||
|
return _stats;
|
||||||
|
}
|
||||||
private:
|
private:
|
||||||
future<> batchlog_replay_loop();
|
future<> batchlog_replay_loop();
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -54,12 +54,14 @@ public:
|
|||||||
uint64_t applied_mutations = 0;
|
uint64_t applied_mutations = 0;
|
||||||
uint64_t corrupt_bytes = 0;
|
uint64_t corrupt_bytes = 0;
|
||||||
uint64_t truncated_at = 0;
|
uint64_t truncated_at = 0;
|
||||||
|
uint64_t broken_files = 0;
|
||||||
|
|
||||||
stats& operator+=(const stats& s) {
|
stats& operator+=(const stats& s) {
|
||||||
invalid_mutations += s.invalid_mutations;
|
invalid_mutations += s.invalid_mutations;
|
||||||
skipped_mutations += s.skipped_mutations;
|
skipped_mutations += s.skipped_mutations;
|
||||||
applied_mutations += s.applied_mutations;
|
applied_mutations += s.applied_mutations;
|
||||||
corrupt_bytes += s.corrupt_bytes;
|
corrupt_bytes += s.corrupt_bytes;
|
||||||
|
broken_files += s.broken_files;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
stats operator+(const stats& s) const {
|
stats operator+(const stats& s) const {
|
||||||
@@ -192,6 +194,8 @@ db::commitlog_replayer::impl::recover(const commitlog::descriptor& d, const comm
|
|||||||
s->corrupt_bytes += e.bytes();
|
s->corrupt_bytes += e.bytes();
|
||||||
} catch (commitlog::segment_truncation& e) {
|
} catch (commitlog::segment_truncation& e) {
|
||||||
s->truncated_at = e.position();
|
s->truncated_at = e.position();
|
||||||
|
} catch (commitlog::header_checksum_error&) {
|
||||||
|
++s->broken_files;
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
@@ -370,6 +374,9 @@ future<> db::commitlog_replayer::recover(std::vector<sstring> files, sstring fna
|
|||||||
if (stats.truncated_at != 0) {
|
if (stats.truncated_at != 0) {
|
||||||
rlogger.warn("Truncated file: {} at position {}.", f, stats.truncated_at);
|
rlogger.warn("Truncated file: {} at position {}.", f, stats.truncated_at);
|
||||||
}
|
}
|
||||||
|
if (stats.broken_files != 0) {
|
||||||
|
rlogger.warn("Corrupted file header: {}. Skipped.", f);
|
||||||
|
}
|
||||||
rlogger.debug("Log replay of {} complete, {} replayed mutations ({} invalid, {} skipped)"
|
rlogger.debug("Log replay of {} complete, {} replayed mutations ({} invalid, {} skipped)"
|
||||||
, f
|
, f
|
||||||
, stats.applied_mutations
|
, stats.applied_mutations
|
||||||
|
|||||||
@@ -1152,7 +1152,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
|||||||
"Number of threads with which to deliver hints. In multiple data-center deployments, consider increasing this number because cross data-center handoff is generally slower.")
|
"Number of threads with which to deliver hints. In multiple data-center deployments, consider increasing this number because cross data-center handoff is generally slower.")
|
||||||
, batchlog_replay_throttle_in_kb(this, "batchlog_replay_throttle_in_kb", value_status::Unused, 1024,
|
, batchlog_replay_throttle_in_kb(this, "batchlog_replay_throttle_in_kb", value_status::Unused, 1024,
|
||||||
"Total maximum throttle. Throttling is reduced proportionally to the number of nodes in the cluster.")
|
"Total maximum throttle. Throttling is reduced proportionally to the number of nodes in the cluster.")
|
||||||
, batchlog_replay_cleanup_after_replays(this, "batchlog_replay_cleanup_after_replays", liveness::LiveUpdate, value_status::Used, 60,
|
, batchlog_replay_cleanup_after_replays(this, "batchlog_replay_cleanup_after_replays", liveness::LiveUpdate, value_status::Used, 1,
|
||||||
"Clean up batchlog memtable after every N replays. Replays are issued on a timer, every 60 seconds. So if batchlog_replay_cleanup_after_replays is set to 60, the batchlog memtable is flushed every 60 * 60 seconds.")
|
"Clean up batchlog memtable after every N replays. Replays are issued on a timer, every 60 seconds. So if batchlog_replay_cleanup_after_replays is set to 60, the batchlog memtable is flushed every 60 * 60 seconds.")
|
||||||
/**
|
/**
|
||||||
* @Group Request scheduler properties
|
* @Group Request scheduler properties
|
||||||
|
|||||||
@@ -248,7 +248,7 @@ future<db::commitlog> hint_endpoint_manager::add_store() noexcept {
|
|||||||
// which is larger than the segment ID of the RP of the last written hint.
|
// which is larger than the segment ID of the RP of the last written hint.
|
||||||
cfg.base_segment_id = _last_written_rp.base_id();
|
cfg.base_segment_id = _last_written_rp.base_id();
|
||||||
|
|
||||||
return commitlog::create_commitlog(std::move(cfg)).then([this] (commitlog l) -> future<commitlog> {
|
return commitlog::create_commitlog(std::move(cfg)).then([this] (this auto, commitlog l) -> future<commitlog> {
|
||||||
// add_store() is triggered every time hint files are forcefully flushed to I/O (every hints_flush_period).
|
// add_store() is triggered every time hint files are forcefully flushed to I/O (every hints_flush_period).
|
||||||
// When this happens we want to refill _sender's segments only if it has finished with the segments he had before.
|
// When this happens we want to refill _sender's segments only if it has finished with the segments he had before.
|
||||||
if (_sender.have_segments()) {
|
if (_sender.have_segments()) {
|
||||||
|
|||||||
@@ -135,5 +135,5 @@ const std::string db::object_storage_endpoint_param::gs_type = "gs";
|
|||||||
|
|
||||||
auto fmt::formatter<db::object_storage_endpoint_param>::format(const db::object_storage_endpoint_param& e, fmt::format_context& ctx) const
|
auto fmt::formatter<db::object_storage_endpoint_param>::format(const db::object_storage_endpoint_param& e, fmt::format_context& ctx) const
|
||||||
-> decltype(ctx.out()) {
|
-> decltype(ctx.out()) {
|
||||||
return fmt::format_to(ctx.out(), "object_storage_endpoint_param{{}}", e.to_json_string());
|
return fmt::format_to(ctx.out(), "object_storage_endpoint_param{}", e.to_json_string());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1262,16 +1262,9 @@ static future<> do_merge_schema(sharded<service::storage_proxy>& proxy, sharded
|
|||||||
{
|
{
|
||||||
slogger.trace("do_merge_schema: {}", mutations);
|
slogger.trace("do_merge_schema: {}", mutations);
|
||||||
schema_applier ap(proxy, ss, sys_ks, reload);
|
schema_applier ap(proxy, ss, sys_ks, reload);
|
||||||
std::exception_ptr ex;
|
co_await execute_do_merge_schema(proxy, ap, std::move(mutations)).finally([&ap]() {
|
||||||
try {
|
return ap.destroy();
|
||||||
co_await execute_do_merge_schema(proxy, ap, std::move(mutations));
|
});
|
||||||
} catch (...) {
|
|
||||||
ex = std::current_exception();
|
|
||||||
}
|
|
||||||
co_await ap.destroy();
|
|
||||||
if (ex) {
|
|
||||||
throw ex;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -110,6 +110,7 @@ namespace {
|
|||||||
system_keyspace::v3::CDC_LOCAL,
|
system_keyspace::v3::CDC_LOCAL,
|
||||||
system_keyspace::DICTS,
|
system_keyspace::DICTS,
|
||||||
system_keyspace::VIEW_BUILDING_TASKS,
|
system_keyspace::VIEW_BUILDING_TASKS,
|
||||||
|
system_keyspace::CLIENT_ROUTES,
|
||||||
};
|
};
|
||||||
if (ks_name == system_keyspace::NAME && tables.contains(cf_name)) {
|
if (ks_name == system_keyspace::NAME && tables.contains(cf_name)) {
|
||||||
props.enable_schema_commitlog();
|
props.enable_schema_commitlog();
|
||||||
@@ -137,6 +138,7 @@ namespace {
|
|||||||
system_keyspace::ROLE_PERMISSIONS,
|
system_keyspace::ROLE_PERMISSIONS,
|
||||||
system_keyspace::DICTS,
|
system_keyspace::DICTS,
|
||||||
system_keyspace::VIEW_BUILDING_TASKS,
|
system_keyspace::VIEW_BUILDING_TASKS,
|
||||||
|
system_keyspace::CLIENT_ROUTES,
|
||||||
};
|
};
|
||||||
if (ks_name == system_keyspace::NAME && tables.contains(cf_name)) {
|
if (ks_name == system_keyspace::NAME && tables.contains(cf_name)) {
|
||||||
props.is_group0_table = true;
|
props.is_group0_table = true;
|
||||||
@@ -213,6 +215,30 @@ schema_ptr system_keyspace::batchlog() {
|
|||||||
return batchlog;
|
return batchlog;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
schema_ptr system_keyspace::batchlog_v2() {
|
||||||
|
static thread_local auto batchlog_v2 = [] {
|
||||||
|
schema_builder builder(generate_legacy_id(NAME, BATCHLOG_V2), NAME, BATCHLOG_V2,
|
||||||
|
// partition key
|
||||||
|
{{"version", int32_type}, {"stage", byte_type}, {"shard", int32_type}},
|
||||||
|
// clustering key
|
||||||
|
{{"written_at", timestamp_type}, {"id", uuid_type}},
|
||||||
|
// regular columns
|
||||||
|
{{"data", bytes_type}},
|
||||||
|
// static columns
|
||||||
|
{},
|
||||||
|
// regular column name type
|
||||||
|
utf8_type,
|
||||||
|
// comment
|
||||||
|
"batches awaiting replay"
|
||||||
|
);
|
||||||
|
builder.set_gc_grace_seconds(0);
|
||||||
|
builder.set_caching_options(caching_options::get_disabled_caching_options());
|
||||||
|
builder.with_hash_version();
|
||||||
|
return builder.build(schema_builder::compact_storage::no);
|
||||||
|
}();
|
||||||
|
return batchlog_v2;
|
||||||
|
}
|
||||||
|
|
||||||
/*static*/ schema_ptr system_keyspace::paxos() {
|
/*static*/ schema_ptr system_keyspace::paxos() {
|
||||||
static thread_local auto paxos = [] {
|
static thread_local auto paxos = [] {
|
||||||
// FIXME: switch to the new schema_builder interface (with_column(...), etc)
|
// FIXME: switch to the new schema_builder interface (with_column(...), etc)
|
||||||
@@ -285,6 +311,7 @@ schema_ptr system_keyspace::topology() {
|
|||||||
.with_column("tablet_balancing_enabled", boolean_type, column_kind::static_column)
|
.with_column("tablet_balancing_enabled", boolean_type, column_kind::static_column)
|
||||||
.with_column("upgrade_state", utf8_type, column_kind::static_column)
|
.with_column("upgrade_state", utf8_type, column_kind::static_column)
|
||||||
.with_column("global_requests", set_type_impl::get_instance(timeuuid_type, true), column_kind::static_column)
|
.with_column("global_requests", set_type_impl::get_instance(timeuuid_type, true), column_kind::static_column)
|
||||||
|
.with_column("paused_rf_change_requests", set_type_impl::get_instance(timeuuid_type, true), column_kind::static_column)
|
||||||
.set_comment("Current state of topology change machine")
|
.set_comment("Current state of topology change machine")
|
||||||
.with_hash_version()
|
.with_hash_version()
|
||||||
.build();
|
.build();
|
||||||
@@ -1391,6 +1418,23 @@ schema_ptr system_keyspace::view_building_tasks() {
|
|||||||
return schema;
|
return schema;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
schema_ptr system_keyspace::client_routes() {
|
||||||
|
static thread_local auto schema = [] {
|
||||||
|
auto id = generate_legacy_id(NAME, CLIENT_ROUTES);
|
||||||
|
return schema_builder(NAME, CLIENT_ROUTES, std::make_optional(id))
|
||||||
|
.with_column("connection_id", utf8_type, column_kind::partition_key)
|
||||||
|
.with_column("host_id", uuid_type, column_kind::clustering_key)
|
||||||
|
.with_column("address", utf8_type)
|
||||||
|
.with_column("port", int32_type)
|
||||||
|
.with_column("tls_port", int32_type)
|
||||||
|
.with_column("alternator_port", int32_type)
|
||||||
|
.with_column("alternator_https_port", int32_type)
|
||||||
|
.with_hash_version()
|
||||||
|
.build();
|
||||||
|
}();
|
||||||
|
return schema;
|
||||||
|
}
|
||||||
|
|
||||||
future<system_keyspace::local_info> system_keyspace::load_local_info() {
|
future<system_keyspace::local_info> system_keyspace::load_local_info() {
|
||||||
auto msg = co_await execute_cql(format("SELECT host_id, cluster_name, data_center, rack FROM system.{} WHERE key=?", LOCAL), sstring(LOCAL));
|
auto msg = co_await execute_cql(format("SELECT host_id, cluster_name, data_center, rack FROM system.{} WHERE key=?", LOCAL), sstring(LOCAL));
|
||||||
|
|
||||||
@@ -2304,7 +2348,7 @@ std::vector<schema_ptr> system_keyspace::all_tables(const db::config& cfg) {
|
|||||||
std::copy(schema_tables.begin(), schema_tables.end(), std::back_inserter(r));
|
std::copy(schema_tables.begin(), schema_tables.end(), std::back_inserter(r));
|
||||||
auto auth_tables = system_keyspace::auth_tables();
|
auto auth_tables = system_keyspace::auth_tables();
|
||||||
std::copy(auth_tables.begin(), auth_tables.end(), std::back_inserter(r));
|
std::copy(auth_tables.begin(), auth_tables.end(), std::back_inserter(r));
|
||||||
r.insert(r.end(), { built_indexes(), hints(), batchlog(), paxos(), local(),
|
r.insert(r.end(), { built_indexes(), hints(), batchlog(), batchlog_v2(), paxos(), local(),
|
||||||
peers(), peer_events(), range_xfers(),
|
peers(), peer_events(), range_xfers(),
|
||||||
compactions_in_progress(), compaction_history(),
|
compactions_in_progress(), compaction_history(),
|
||||||
sstable_activity(), size_estimates(), large_partitions(), large_rows(), large_cells(),
|
sstable_activity(), size_estimates(), large_partitions(), large_rows(), large_cells(),
|
||||||
@@ -2318,7 +2362,7 @@ std::vector<schema_ptr> system_keyspace::all_tables(const db::config& cfg) {
|
|||||||
v3::cdc_local(),
|
v3::cdc_local(),
|
||||||
raft(), raft_snapshots(), raft_snapshot_config(), group0_history(), discovery(),
|
raft(), raft_snapshots(), raft_snapshot_config(), group0_history(), discovery(),
|
||||||
topology(), cdc_generations_v3(), topology_requests(), service_levels_v2(), view_build_status_v2(),
|
topology(), cdc_generations_v3(), topology_requests(), service_levels_v2(), view_build_status_v2(),
|
||||||
dicts(), view_building_tasks(), cdc_streams_state(), cdc_streams_history()
|
dicts(), view_building_tasks(), client_routes(), cdc_streams_state(), cdc_streams_history()
|
||||||
});
|
});
|
||||||
|
|
||||||
if (cfg.check_experimental(db::experimental_features_t::feature::BROADCAST_TABLES)) {
|
if (cfg.check_experimental(db::experimental_features_t::feature::BROADCAST_TABLES)) {
|
||||||
@@ -2335,7 +2379,9 @@ std::vector<schema_ptr> system_keyspace::all_tables(const db::config& cfg) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static bool maybe_write_in_user_memory(schema_ptr s) {
|
static bool maybe_write_in_user_memory(schema_ptr s) {
|
||||||
return (s.get() == system_keyspace::batchlog().get()) || (s.get() == system_keyspace::paxos().get())
|
return (s.get() == system_keyspace::batchlog().get())
|
||||||
|
|| (s.get() == system_keyspace::batchlog_v2().get())
|
||||||
|
|| (s.get() == system_keyspace::paxos().get())
|
||||||
|| s == system_keyspace::v3::scylla_views_builds_in_progress();
|
|| s == system_keyspace::v3::scylla_views_builds_in_progress();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3111,7 +3157,10 @@ static bool must_have_tokens(service::node_state nst) {
|
|||||||
// A decommissioning node doesn't have tokens at the end, they are
|
// A decommissioning node doesn't have tokens at the end, they are
|
||||||
// removed during transition to the left_token_ring state.
|
// removed during transition to the left_token_ring state.
|
||||||
case service::node_state::decommissioning: return false;
|
case service::node_state::decommissioning: return false;
|
||||||
case service::node_state::removing: return true;
|
// A removing node might or might not have tokens depending on whether
|
||||||
|
// REMOVENODE_WITH_LEFT_TOKEN_RING feature is enabled. To support both
|
||||||
|
// cases, we allow removing nodes to not have tokens.
|
||||||
|
case service::node_state::removing: return false;
|
||||||
case service::node_state::rebuilding: return true;
|
case service::node_state::rebuilding: return true;
|
||||||
case service::node_state::normal: return true;
|
case service::node_state::normal: return true;
|
||||||
case service::node_state::left: return false;
|
case service::node_state::left: return false;
|
||||||
@@ -3351,6 +3400,12 @@ future<service::topology> system_keyspace::load_topology_state(const std::unorde
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (some_row.has("paused_rf_change_requests")) {
|
||||||
|
for (auto&& v : deserialize_set_column(*topology(), some_row, "paused_rf_change_requests")) {
|
||||||
|
ret.paused_rf_change_requests.insert(value_cast<utils::UUID>(v));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (some_row.has("enabled_features")) {
|
if (some_row.has("enabled_features")) {
|
||||||
ret.enabled_features = decode_features(deserialize_set_column(*topology(), some_row, "enabled_features"));
|
ret.enabled_features = decode_features(deserialize_set_column(*topology(), some_row, "enabled_features"));
|
||||||
}
|
}
|
||||||
@@ -3562,35 +3617,43 @@ system_keyspace::topology_requests_entry system_keyspace::topology_request_row_t
|
|||||||
return entry;
|
return entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
future<system_keyspace::topology_requests_entry> system_keyspace::get_topology_request_entry(utils::UUID id, bool require_entry) {
|
future<system_keyspace::topology_requests_entry> system_keyspace::get_topology_request_entry(utils::UUID id) {
|
||||||
|
auto r = co_await get_topology_request_entry_opt(id);
|
||||||
|
if (!r) {
|
||||||
|
on_internal_error(slogger, format("no entry for request id {}", id));
|
||||||
|
}
|
||||||
|
co_return std::move(*r);
|
||||||
|
}
|
||||||
|
|
||||||
|
future<std::optional<system_keyspace::topology_requests_entry>> system_keyspace::get_topology_request_entry_opt(utils::UUID id) {
|
||||||
auto rs = co_await execute_cql(
|
auto rs = co_await execute_cql(
|
||||||
format("SELECT * FROM system.{} WHERE id = {}", TOPOLOGY_REQUESTS, id));
|
format("SELECT * FROM system.{} WHERE id = {}", TOPOLOGY_REQUESTS, id));
|
||||||
|
|
||||||
if (!rs || rs->empty()) {
|
if (!rs || rs->empty()) {
|
||||||
if (require_entry) {
|
co_return std::nullopt;
|
||||||
on_internal_error(slogger, format("no entry for request id {}", id));
|
|
||||||
} else {
|
|
||||||
co_return topology_requests_entry{
|
|
||||||
.id = utils::null_uuid()
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto& row = rs->one();
|
const auto& row = rs->one();
|
||||||
co_return topology_request_row_to_entry(id, row);
|
co_return topology_request_row_to_entry(id, row);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<system_keyspace::topology_requests_entries> system_keyspace::get_node_ops_request_entries(db_clock::time_point end_time_limit) {
|
future<system_keyspace::topology_requests_entries> system_keyspace::get_topology_request_entries(std::vector<std::variant<service::topology_request, service::global_topology_request>> request_types, db_clock::time_point end_time_limit) {
|
||||||
|
sstring request_types_str = "";
|
||||||
|
bool first = true;
|
||||||
|
for (const auto& rt : request_types) {
|
||||||
|
if (!std::exchange(first, false)) {
|
||||||
|
request_types_str += ", ";
|
||||||
|
}
|
||||||
|
request_types_str += std::visit([] (auto&& arg) { return fmt::format("'{}'", arg); }, rt);
|
||||||
|
}
|
||||||
|
|
||||||
// Running requests.
|
// Running requests.
|
||||||
auto rs_running = co_await execute_cql(
|
auto rs_running = co_await execute_cql(
|
||||||
format("SELECT * FROM system.{} WHERE done = false AND request_type IN ('{}', '{}', '{}', '{}', '{}') ALLOW FILTERING", TOPOLOGY_REQUESTS,
|
format("SELECT * FROM system.{} WHERE done = false AND request_type IN ({}) ALLOW FILTERING", TOPOLOGY_REQUESTS, request_types_str));
|
||||||
service::topology_request::join, service::topology_request::replace, service::topology_request::rebuild, service::topology_request::leave, service::topology_request::remove));
|
|
||||||
|
|
||||||
|
|
||||||
// Requests which finished after end_time_limit.
|
// Requests which finished after end_time_limit.
|
||||||
auto rs_done = co_await execute_cql(
|
auto rs_done = co_await execute_cql(
|
||||||
format("SELECT * FROM system.{} WHERE end_time > {} AND request_type IN ('{}', '{}', '{}', '{}', '{}') ALLOW FILTERING", TOPOLOGY_REQUESTS, end_time_limit.time_since_epoch().count(),
|
format("SELECT * FROM system.{} WHERE end_time > {} AND request_type IN ({}) ALLOW FILTERING", TOPOLOGY_REQUESTS, end_time_limit.time_since_epoch().count(), request_types_str));
|
||||||
service::topology_request::join, service::topology_request::replace, service::topology_request::rebuild, service::topology_request::leave, service::topology_request::remove));
|
|
||||||
|
|
||||||
topology_requests_entries m;
|
topology_requests_entries m;
|
||||||
for (const auto& row: *rs_done) {
|
for (const auto& row: *rs_done) {
|
||||||
@@ -3608,6 +3671,16 @@ future<system_keyspace::topology_requests_entries> system_keyspace::get_node_ops
|
|||||||
co_return m;
|
co_return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
future<system_keyspace::topology_requests_entries> system_keyspace::get_node_ops_request_entries(db_clock::time_point end_time_limit) {
|
||||||
|
return get_topology_request_entries({
|
||||||
|
service::topology_request::join,
|
||||||
|
service::topology_request::replace,
|
||||||
|
service::topology_request::rebuild,
|
||||||
|
service::topology_request::leave,
|
||||||
|
service::topology_request::remove
|
||||||
|
}, end_time_limit);
|
||||||
|
}
|
||||||
|
|
||||||
future<mutation> system_keyspace::get_insert_dict_mutation(
|
future<mutation> system_keyspace::get_insert_dict_mutation(
|
||||||
std::string_view name,
|
std::string_view name,
|
||||||
bytes data,
|
bytes data,
|
||||||
|
|||||||
@@ -163,6 +163,7 @@ public:
|
|||||||
static constexpr auto NAME = "system";
|
static constexpr auto NAME = "system";
|
||||||
static constexpr auto HINTS = "hints";
|
static constexpr auto HINTS = "hints";
|
||||||
static constexpr auto BATCHLOG = "batchlog";
|
static constexpr auto BATCHLOG = "batchlog";
|
||||||
|
static constexpr auto BATCHLOG_V2 = "batchlog_v2";
|
||||||
static constexpr auto PAXOS = "paxos";
|
static constexpr auto PAXOS = "paxos";
|
||||||
static constexpr auto BUILT_INDEXES = "IndexInfo";
|
static constexpr auto BUILT_INDEXES = "IndexInfo";
|
||||||
static constexpr auto LOCAL = "local";
|
static constexpr auto LOCAL = "local";
|
||||||
@@ -198,6 +199,7 @@ public:
|
|||||||
static constexpr auto VIEW_BUILD_STATUS_V2 = "view_build_status_v2";
|
static constexpr auto VIEW_BUILD_STATUS_V2 = "view_build_status_v2";
|
||||||
static constexpr auto DICTS = "dicts";
|
static constexpr auto DICTS = "dicts";
|
||||||
static constexpr auto VIEW_BUILDING_TASKS = "view_building_tasks";
|
static constexpr auto VIEW_BUILDING_TASKS = "view_building_tasks";
|
||||||
|
static constexpr auto CLIENT_ROUTES = "client_routes";
|
||||||
|
|
||||||
// auth
|
// auth
|
||||||
static constexpr auto ROLES = "roles";
|
static constexpr auto ROLES = "roles";
|
||||||
@@ -255,6 +257,7 @@ public:
|
|||||||
|
|
||||||
static schema_ptr hints();
|
static schema_ptr hints();
|
||||||
static schema_ptr batchlog();
|
static schema_ptr batchlog();
|
||||||
|
static schema_ptr batchlog_v2();
|
||||||
static schema_ptr paxos();
|
static schema_ptr paxos();
|
||||||
static schema_ptr built_indexes(); // TODO (from Cassandra): make private
|
static schema_ptr built_indexes(); // TODO (from Cassandra): make private
|
||||||
static schema_ptr raft();
|
static schema_ptr raft();
|
||||||
@@ -274,6 +277,7 @@ public:
|
|||||||
static schema_ptr view_build_status_v2();
|
static schema_ptr view_build_status_v2();
|
||||||
static schema_ptr dicts();
|
static schema_ptr dicts();
|
||||||
static schema_ptr view_building_tasks();
|
static schema_ptr view_building_tasks();
|
||||||
|
static schema_ptr client_routes();
|
||||||
|
|
||||||
// auth
|
// auth
|
||||||
static schema_ptr roles();
|
static schema_ptr roles();
|
||||||
@@ -665,7 +669,9 @@ public:
|
|||||||
|
|
||||||
future<service::topology_request_state> get_topology_request_state(utils::UUID id, bool require_entry);
|
future<service::topology_request_state> get_topology_request_state(utils::UUID id, bool require_entry);
|
||||||
topology_requests_entry topology_request_row_to_entry(utils::UUID id, const cql3::untyped_result_set_row& row);
|
topology_requests_entry topology_request_row_to_entry(utils::UUID id, const cql3::untyped_result_set_row& row);
|
||||||
future<topology_requests_entry> get_topology_request_entry(utils::UUID id, bool require_entry);
|
future<topology_requests_entry> get_topology_request_entry(utils::UUID id);
|
||||||
|
future<std::optional<topology_requests_entry>> get_topology_request_entry_opt(utils::UUID id);
|
||||||
|
future<system_keyspace::topology_requests_entries> get_topology_request_entries(std::vector<std::variant<service::topology_request, service::global_topology_request>> request_types, db_clock::time_point end_time_limit);
|
||||||
future<topology_requests_entries> get_node_ops_request_entries(db_clock::time_point end_time_limit);
|
future<topology_requests_entries> get_node_ops_request_entries(db_clock::time_point end_time_limit);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|||||||
399
db/view/view.cc
399
db/view/view.cc
@@ -1744,6 +1744,115 @@ bool should_generate_view_updates_on_this_shard(const schema_ptr& base, const lo
|
|||||||
&& std::ranges::contains(shards, this_shard_id());
|
&& std::ranges::contains(shards, this_shard_id());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static endpoints_to_update get_view_natural_endpoint_vnodes(
|
||||||
|
locator::host_id me,
|
||||||
|
std::vector<std::reference_wrapper<const locator::node>> base_nodes,
|
||||||
|
std::vector<std::reference_wrapper<const locator::node>> view_nodes,
|
||||||
|
locator::endpoint_dc_rack my_location,
|
||||||
|
const locator::network_topology_strategy* network_topology,
|
||||||
|
replica::cf_stats& cf_stats) {
|
||||||
|
using node_vector = std::vector<std::reference_wrapper<const locator::node>>;
|
||||||
|
node_vector base_endpoints, view_endpoints;
|
||||||
|
auto& my_datacenter = my_location.dc;
|
||||||
|
|
||||||
|
auto process_candidate = [&] (node_vector& nodes, std::reference_wrapper<const locator::node> node) {
|
||||||
|
if (!network_topology || node.get().dc() == my_datacenter) {
|
||||||
|
nodes.emplace_back(node);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
for (auto&& base_node : base_nodes) {
|
||||||
|
process_candidate(base_endpoints, base_node);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto&& view_node : view_nodes) {
|
||||||
|
auto it = std::ranges::find(base_endpoints, view_node.get().host_id(), std::mem_fn(&locator::node::host_id));
|
||||||
|
// If this base replica is also one of the view replicas, we use
|
||||||
|
// ourselves as the view replica.
|
||||||
|
// We don't return an extra endpoint, as it's only needed when
|
||||||
|
// using tablets (so !use_legacy_self_pairing)
|
||||||
|
if (view_node.get().host_id() == me && it != base_endpoints.end()) {
|
||||||
|
return {.natural_endpoint = me};
|
||||||
|
}
|
||||||
|
|
||||||
|
// We have to remove any endpoint which is shared between the base
|
||||||
|
// and the view, as it will select itself and throw off the counts
|
||||||
|
// otherwise.
|
||||||
|
if (it != base_endpoints.end()) {
|
||||||
|
base_endpoints.erase(it);
|
||||||
|
} else if (!network_topology || view_node.get().dc() == my_datacenter) {
|
||||||
|
view_endpoints.push_back(view_node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto base_it = std::ranges::find(base_endpoints, me, std::mem_fn(&locator::node::host_id));
|
||||||
|
if (base_it == base_endpoints.end()) {
|
||||||
|
// This node is not a base replica of this key, so we return empty
|
||||||
|
// FIXME: This case shouldn't happen, and if it happens, a view update
|
||||||
|
// would be lost.
|
||||||
|
++cf_stats.total_view_updates_on_wrong_node;
|
||||||
|
vlogger.warn("Could not find {} in base_endpoints={}", me,
|
||||||
|
base_endpoints | std::views::transform(std::mem_fn(&locator::node::host_id)));
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
size_t idx = base_it - base_endpoints.begin();
|
||||||
|
return {.natural_endpoint = view_endpoints[idx].get().host_id()};
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::optional<locator::host_id> get_unpaired_view_endpoint(
|
||||||
|
std::vector<std::reference_wrapper<const locator::node>> base_nodes,
|
||||||
|
std::vector<std::reference_wrapper<const locator::node>> view_nodes,
|
||||||
|
replica::cf_stats& cf_stats) {
|
||||||
|
std::unordered_set<locator::endpoint_dc_rack> base_dc_racks;
|
||||||
|
for (auto&& base_node : base_nodes) {
|
||||||
|
if (base_dc_racks.contains(base_node.get().dc_rack())) {
|
||||||
|
// We can't do rack-aware pairing if there are multiple replicas in the same rack.
|
||||||
|
++cf_stats.total_view_updates_failed_pairing;
|
||||||
|
vlogger.warn("Can't perform base-view pairing in this topology. There are multiple base table replicas in the same dc/rack({}/{}):",
|
||||||
|
base_node.get().dc(), base_node.get().rack());
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
base_dc_racks.insert(base_node.get().dc_rack());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unordered_set<locator::endpoint_dc_rack> paired_view_dc_racks;
|
||||||
|
std::unordered_map<locator::endpoint_dc_rack, locator::host_id> unpaired_view_dc_rack_replicas;
|
||||||
|
for (auto&& view_node : view_nodes) {
|
||||||
|
if (paired_view_dc_racks.contains(view_node.get().dc_rack()) || unpaired_view_dc_rack_replicas.contains(view_node.get().dc_rack())) {
|
||||||
|
// We can't do rack-aware pairing if there are multiple replicas in the same rack.
|
||||||
|
++cf_stats.total_view_updates_failed_pairing;
|
||||||
|
vlogger.warn("Can't perform base-view pairing in this topology. There are multiple view table replicas in the same dc/rack({}/{}):",
|
||||||
|
view_node.get().dc(), view_node.get().rack());
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
// Track unpaired replicas in both sets
|
||||||
|
if (base_dc_racks.contains(view_node.get().dc_rack())) {
|
||||||
|
paired_view_dc_racks.insert(view_node.get().dc_rack());
|
||||||
|
} else {
|
||||||
|
unpaired_view_dc_rack_replicas.insert({view_node.get().dc_rack(), view_node.get().host_id()});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unpaired_view_dc_rack_replicas.size() > 0) {
|
||||||
|
// There are view replicas that can't be paired with any base replica
|
||||||
|
// This can happen as a result of an RF change when the view replica finishes streaming
|
||||||
|
// before the base replica.
|
||||||
|
// Because of this, a view replica might not get paired with any base replica, so we need
|
||||||
|
// to send an additional update to it.
|
||||||
|
++cf_stats.total_view_updates_due_to_replica_count_mismatch;
|
||||||
|
auto extra_replica = unpaired_view_dc_rack_replicas.begin()->second;
|
||||||
|
unpaired_view_dc_rack_replicas.erase(unpaired_view_dc_rack_replicas.begin());
|
||||||
|
if (unpaired_view_dc_rack_replicas.size() > 0) {
|
||||||
|
// We only expect one extra replica to appear due to an RF change. If there's more, that's an error,
|
||||||
|
// but we'll still perform updates to the paired and last replicas to minimize degradation.
|
||||||
|
vlogger.warn("There are too many view endpoints for base-view pairing. View updates may get lost on view_endpoints={}",
|
||||||
|
unpaired_view_dc_rack_replicas | std::views::values);
|
||||||
|
}
|
||||||
|
return extra_replica;
|
||||||
|
}
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
// Calculate the node ("natural endpoint") to which this node should send
|
// Calculate the node ("natural endpoint") to which this node should send
|
||||||
// a view update.
|
// a view update.
|
||||||
//
|
//
|
||||||
@@ -1756,29 +1865,19 @@ bool should_generate_view_updates_on_this_shard(const schema_ptr& base, const lo
|
|||||||
// of this function is to find, assuming that this node is one of the base
|
// of this function is to find, assuming that this node is one of the base
|
||||||
// replicas for a given partition, the paired view replica.
|
// replicas for a given partition, the paired view replica.
|
||||||
//
|
//
|
||||||
// In the past, we used an optimization called "self-pairing" that if a single
|
// When using vnodes, we have an optimization called "self-pairing" - if a single
|
||||||
// node was both a base replica and a view replica for a write, the pairing is
|
// node is both a base replica and a view replica for a write, the pairing is
|
||||||
// modified so that this node would send the update to itself. This self-
|
// modified so that this node sends the update to itself and this node is removed
|
||||||
// pairing optimization could cause the pairing to change after view ranges
|
// from the lists of nodes paired by index. This self-pairing optimization can
|
||||||
// are moved between nodes, so currently we only use it if
|
// cause the pairing to change after view ranges are moved between nodes.
|
||||||
// use_legacy_self_pairing is set to true. When using tablets - where range
|
|
||||||
// movements are common - it is strongly recommended to set it to false.
|
|
||||||
//
|
//
|
||||||
// If the keyspace's replication strategy is a NetworkTopologyStrategy,
|
// If the keyspace's replication strategy is a NetworkTopologyStrategy,
|
||||||
// we pair only nodes in the same datacenter.
|
// we pair only nodes in the same datacenter.
|
||||||
//
|
//
|
||||||
// When use_legacy_self_pairing is enabled, if one of the base replicas
|
// If the table uses tablets, then pairing is rack-aware. In this case, in each
|
||||||
// also happens to be a view replica, it is paired with itself
|
// rack where we have a base replica there is also one replica of each view tablet.
|
||||||
// (with the other nodes paired by order in the list
|
// Therefore, the base replicas are naturally paired with the view replicas that
|
||||||
// after taking this node out).
|
// are in the same rack.
|
||||||
//
|
|
||||||
// If the table uses tablets and the replication strategy is NetworkTopologyStrategy
|
|
||||||
// and the replication factor in the node's datacenter is a multiple of the number
|
|
||||||
// of racks in the datacenter, then pairing is rack-aware. In this case,
|
|
||||||
// all racks have the same number of replicas, and those are never migrated
|
|
||||||
// outside their racks. Therefore, the base replicas are naturally paired with the
|
|
||||||
// view replicas that are in the same rack, based on the ordinal position.
|
|
||||||
// Note that typically, there is a single replica per rack and pairing is trivial.
|
|
||||||
//
|
//
|
||||||
// If the assumption that the given base token belongs to this replica
|
// If the assumption that the given base token belongs to this replica
|
||||||
// does not hold, we return an empty optional.
|
// does not hold, we return an empty optional.
|
||||||
@@ -1806,19 +1905,12 @@ endpoints_to_update get_view_natural_endpoint(
|
|||||||
const locator::abstract_replication_strategy& replication_strategy,
|
const locator::abstract_replication_strategy& replication_strategy,
|
||||||
const dht::token& base_token,
|
const dht::token& base_token,
|
||||||
const dht::token& view_token,
|
const dht::token& view_token,
|
||||||
bool use_legacy_self_pairing,
|
bool use_tablets,
|
||||||
bool use_tablets_rack_aware_view_pairing,
|
|
||||||
replica::cf_stats& cf_stats) {
|
replica::cf_stats& cf_stats) {
|
||||||
auto& topology = base_erm->get_token_metadata_ptr()->get_topology();
|
auto& topology = base_erm->get_token_metadata_ptr()->get_topology();
|
||||||
auto& view_topology = view_erm->get_token_metadata_ptr()->get_topology();
|
auto& view_topology = view_erm->get_token_metadata_ptr()->get_topology();
|
||||||
auto& my_location = topology.get_location(me);
|
auto& my_location = topology.get_location(me);
|
||||||
auto& my_datacenter = my_location.dc;
|
|
||||||
auto* network_topology = dynamic_cast<const locator::network_topology_strategy*>(&replication_strategy);
|
auto* network_topology = dynamic_cast<const locator::network_topology_strategy*>(&replication_strategy);
|
||||||
auto rack_aware_pairing = use_tablets_rack_aware_view_pairing && network_topology;
|
|
||||||
bool simple_rack_aware_pairing = false;
|
|
||||||
using node_vector = std::vector<std::reference_wrapper<const locator::node>>;
|
|
||||||
node_vector orig_base_endpoints, orig_view_endpoints;
|
|
||||||
node_vector base_endpoints, view_endpoints;
|
|
||||||
|
|
||||||
auto resolve = [&] (const locator::topology& topology, const locator::host_id& ep, bool is_view) -> const locator::node& {
|
auto resolve = [&] (const locator::topology& topology, const locator::host_id& ep, bool is_view) -> const locator::node& {
|
||||||
if (auto* np = topology.find_node(ep)) {
|
if (auto* np = topology.find_node(ep)) {
|
||||||
@@ -1829,6 +1921,7 @@ endpoints_to_update get_view_natural_endpoint(
|
|||||||
|
|
||||||
// We need to use get_replicas() for pairing to be stable in case base or view tablet
|
// We need to use get_replicas() for pairing to be stable in case base or view tablet
|
||||||
// is rebuilding a replica which has left the ring. get_natural_endpoints() filters such replicas.
|
// is rebuilding a replica which has left the ring. get_natural_endpoints() filters such replicas.
|
||||||
|
using node_vector = std::vector<std::reference_wrapper<const locator::node>>;
|
||||||
auto base_nodes = base_erm->get_replicas(base_token) | std::views::transform([&] (const locator::host_id& ep) -> const locator::node& {
|
auto base_nodes = base_erm->get_replicas(base_token) | std::views::transform([&] (const locator::host_id& ep) -> const locator::node& {
|
||||||
return resolve(topology, ep, false);
|
return resolve(topology, ep, false);
|
||||||
}) | std::ranges::to<node_vector>();
|
}) | std::ranges::to<node_vector>();
|
||||||
@@ -1852,231 +1945,43 @@ endpoints_to_update get_view_natural_endpoint(
|
|||||||
// note that the recursive call will not recurse again because leaving_base is in base_nodes.
|
// note that the recursive call will not recurse again because leaving_base is in base_nodes.
|
||||||
auto leaving_base = it->get().host_id();
|
auto leaving_base = it->get().host_id();
|
||||||
return get_view_natural_endpoint(leaving_base, base_erm, view_erm, replication_strategy, base_token,
|
return get_view_natural_endpoint(leaving_base, base_erm, view_erm, replication_strategy, base_token,
|
||||||
view_token, use_legacy_self_pairing, use_tablets_rack_aware_view_pairing, cf_stats);
|
view_token, use_tablets, cf_stats);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::function<bool(const locator::node&)> is_candidate;
|
if (!use_tablets) {
|
||||||
if (network_topology) {
|
return get_view_natural_endpoint_vnodes(
|
||||||
is_candidate = [&] (const locator::node& node) { return node.dc() == my_datacenter; };
|
me,
|
||||||
} else {
|
base_nodes,
|
||||||
is_candidate = [&] (const locator::node&) { return true; };
|
view_nodes,
|
||||||
}
|
my_location,
|
||||||
auto process_candidate = [&] (node_vector& nodes, std::reference_wrapper<const locator::node> node) {
|
network_topology,
|
||||||
if (is_candidate(node)) {
|
cf_stats);
|
||||||
nodes.emplace_back(node);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
for (auto&& base_node : base_nodes) {
|
|
||||||
process_candidate(base_endpoints, base_node);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (use_legacy_self_pairing) {
|
std::optional<locator::host_id> paired_replica;
|
||||||
for (auto&& view_node : view_nodes) {
|
for (auto&& view_node : view_nodes) {
|
||||||
auto it = std::ranges::find(base_endpoints, view_node.get().host_id(), std::mem_fn(&locator::node::host_id));
|
if (view_node.get().dc_rack() == my_location) {
|
||||||
// If this base replica is also one of the view replicas, we use
|
paired_replica = view_node.get().host_id();
|
||||||
// ourselves as the view replica.
|
break;
|
||||||
// We don't return an extra endpoint, as it's only needed when
|
|
||||||
// using tablets (so !use_legacy_self_pairing)
|
|
||||||
if (view_node.get().host_id() == me && it != base_endpoints.end()) {
|
|
||||||
return {.natural_endpoint = me};
|
|
||||||
}
|
|
||||||
|
|
||||||
// We have to remove any endpoint which is shared between the base
|
|
||||||
// and the view, as it will select itself and throw off the counts
|
|
||||||
// otherwise.
|
|
||||||
if (it != base_endpoints.end()) {
|
|
||||||
base_endpoints.erase(it);
|
|
||||||
} else if (is_candidate(view_node)) {
|
|
||||||
view_endpoints.push_back(view_node);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (auto&& view_node : view_nodes) {
|
|
||||||
process_candidate(view_endpoints, view_node);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (paired_replica && base_nodes.size() == view_nodes.size()) {
|
||||||
// Try optimizing for simple rack-aware pairing
|
// We don't need to find any extra replicas, so we can return early
|
||||||
// If the numbers of base and view replica differ, that means an RF change is taking place
|
return {.natural_endpoint = paired_replica};
|
||||||
// and we can't use simple rack-aware pairing.
|
|
||||||
if (rack_aware_pairing && base_endpoints.size() == view_endpoints.size()) {
|
|
||||||
auto dc_rf = network_topology->get_replication_factor(my_datacenter);
|
|
||||||
const auto& racks = topology.get_datacenter_rack_nodes().at(my_datacenter);
|
|
||||||
// Simple rack-aware pairing is possible when the datacenter replication factor
|
|
||||||
// is a multiple of the number of racks in the datacenter.
|
|
||||||
if (dc_rf % racks.size() == 0) {
|
|
||||||
simple_rack_aware_pairing = true;
|
|
||||||
size_t rack_rf = dc_rf / racks.size();
|
|
||||||
// If any rack doesn't have enough nodes to satisfy the per-rack rf
|
|
||||||
// simple rack-aware pairing is disabled.
|
|
||||||
for (const auto& [rack, nodes] : racks) {
|
|
||||||
if (nodes.size() < rack_rf) {
|
|
||||||
simple_rack_aware_pairing = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (dc_rf != base_endpoints.size()) {
|
|
||||||
// If the datacenter replication factor is not equal to the number of base replicas,
|
|
||||||
// we're in progress of a RF change and we can't use simple rack-aware pairing.
|
|
||||||
simple_rack_aware_pairing = false;
|
|
||||||
}
|
|
||||||
if (simple_rack_aware_pairing) {
|
|
||||||
std::erase_if(base_endpoints, [&] (const locator::node& node) { return node.dc_rack() != my_location; });
|
|
||||||
std::erase_if(view_endpoints, [&] (const locator::node& node) { return node.dc_rack() != my_location; });
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
orig_base_endpoints = base_endpoints;
|
|
||||||
orig_view_endpoints = view_endpoints;
|
|
||||||
|
|
||||||
// For the complex rack_aware_pairing case, nodes are already filtered by datacenter
|
|
||||||
// Use best-match, for the minimum number of base and view replicas in each rack,
|
|
||||||
// and ordinal match for the rest.
|
|
||||||
std::optional<std::reference_wrapper<const locator::node>> paired_replica;
|
|
||||||
if (rack_aware_pairing && !simple_rack_aware_pairing) {
|
|
||||||
struct indexed_replica {
|
|
||||||
size_t idx;
|
|
||||||
std::reference_wrapper<const locator::node> node;
|
|
||||||
};
|
|
||||||
std::unordered_map<sstring, std::vector<indexed_replica>> base_racks, view_racks;
|
|
||||||
|
|
||||||
// First, index all replicas by rack
|
|
||||||
auto index_replica_set = [] (std::unordered_map<sstring, std::vector<indexed_replica>>& racks, const node_vector& replicas) {
|
|
||||||
size_t idx = 0;
|
|
||||||
for (const auto& r: replicas) {
|
|
||||||
racks[r.get().rack()].emplace_back(idx++, r);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
index_replica_set(base_racks, base_endpoints);
|
|
||||||
index_replica_set(view_racks, view_endpoints);
|
|
||||||
|
|
||||||
// Try optimistically pairing `me` first
|
|
||||||
const auto& my_base_replicas = base_racks[my_location.rack];
|
|
||||||
auto base_it = std::ranges::find(my_base_replicas, me, [] (const indexed_replica& ir) { return ir.node.get().host_id(); });
|
|
||||||
if (base_it == my_base_replicas.end()) {
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
const auto& my_view_replicas = view_racks[my_location.rack];
|
|
||||||
size_t idx = base_it - my_base_replicas.begin();
|
|
||||||
if (idx < my_view_replicas.size()) {
|
|
||||||
if (orig_view_endpoints.size() <= orig_base_endpoints.size()) {
|
|
||||||
return {.natural_endpoint = my_view_replicas[idx].node.get().host_id()};
|
|
||||||
} else {
|
|
||||||
// If the number of view replicas is larger than the number of base replicas,
|
|
||||||
// we need to find the unpaired view replica, so we can't return yet.
|
|
||||||
paired_replica = my_view_replicas[idx].node;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Collect all unpaired base and view replicas,
|
|
||||||
// where the number of replicas in the base rack is different than the respective view rack
|
|
||||||
std::vector<indexed_replica> unpaired_base_replicas, unpaired_view_replicas;
|
|
||||||
for (const auto& [rack, base_replicas] : base_racks) {
|
|
||||||
const auto& view_replicas = view_racks[rack];
|
|
||||||
for (auto i = view_replicas.size(); i < base_replicas.size(); ++i) {
|
|
||||||
unpaired_base_replicas.emplace_back(base_replicas[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (const auto& [rack, view_replicas] : view_racks) {
|
|
||||||
const auto& base_replicas = base_racks[rack];
|
|
||||||
for (auto i = base_replicas.size(); i < view_replicas.size(); ++i) {
|
|
||||||
unpaired_view_replicas.emplace_back(view_replicas[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sort by the original ordinality, and copy the sorted results
|
|
||||||
// back into {base,view}_endpoints, for backward compatible processing below.
|
|
||||||
std::ranges::sort(unpaired_base_replicas, std::less(), std::mem_fn(&indexed_replica::idx));
|
|
||||||
base_endpoints.clear();
|
|
||||||
std::ranges::transform(unpaired_base_replicas, std::back_inserter(base_endpoints), std::mem_fn(&indexed_replica::node));
|
|
||||||
|
|
||||||
std::ranges::sort(unpaired_view_replicas, std::less(), std::mem_fn(&indexed_replica::idx));
|
|
||||||
view_endpoints.clear();
|
|
||||||
std::ranges::transform(unpaired_view_replicas, std::back_inserter(view_endpoints), std::mem_fn(&indexed_replica::node));
|
|
||||||
}
|
|
||||||
|
|
||||||
auto base_it = std::ranges::find(base_endpoints, me, std::mem_fn(&locator::node::host_id));
|
|
||||||
if (!paired_replica && base_it == base_endpoints.end()) {
|
|
||||||
// This node is not a base replica of this key, so we return empty
|
|
||||||
// FIXME: This case shouldn't happen, and if it happens, a view update
|
|
||||||
// would be lost.
|
|
||||||
++cf_stats.total_view_updates_on_wrong_node;
|
|
||||||
vlogger.warn("Could not find {} in base_endpoints={}", me,
|
|
||||||
orig_base_endpoints | std::views::transform(std::mem_fn(&locator::node::host_id)));
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
size_t idx = base_it - base_endpoints.begin();
|
|
||||||
std::optional<std::reference_wrapper<const locator::node>> no_pairing_replica;
|
|
||||||
if (!paired_replica && idx >= view_endpoints.size()) {
|
|
||||||
// There are fewer view replicas than base replicas
|
|
||||||
// FIXME: This might still happen when reducing replication factor with tablets,
|
|
||||||
// see https://github.com/scylladb/scylladb/issues/21492
|
|
||||||
++cf_stats.total_view_updates_failed_pairing;
|
|
||||||
vlogger.warn("Could not pair {}: rack_aware={} base_endpoints={} view_endpoints={}", me,
|
|
||||||
rack_aware_pairing ? (simple_rack_aware_pairing ? "simple" : "complex") : "none",
|
|
||||||
orig_base_endpoints | std::views::transform(std::mem_fn(&locator::node::host_id)),
|
|
||||||
orig_view_endpoints | std::views::transform(std::mem_fn(&locator::node::host_id)));
|
|
||||||
return {};
|
|
||||||
} else if (base_endpoints.size() < view_endpoints.size()) {
|
|
||||||
// There are fewer base replicas than view replicas.
|
|
||||||
// This can happen as a result of an RF change when the view replica finishes streaming
|
|
||||||
// before the base replica.
|
|
||||||
// Because of this, a view replica might not get paired with any base replica, so we need
|
|
||||||
// to send an additional update to it.
|
|
||||||
++cf_stats.total_view_updates_due_to_replica_count_mismatch;
|
|
||||||
no_pairing_replica = view_endpoints.back();
|
|
||||||
if (base_endpoints.size() < view_endpoints.size() - 1) {
|
|
||||||
// We only expect one extra replica to appear due to an RF change. If there's more, that's an error,
|
|
||||||
// but we'll still perform updates to the paired and last replicas to minimize degradation.
|
|
||||||
vlogger.warn("There are too many view endpoints for base-view pairing. View updates may get lost on view_endpoints={}",
|
|
||||||
std::span(view_endpoints.begin() + base_endpoints.size(), view_endpoints.end() - 1) | std::views::transform(std::mem_fn(&locator::node::host_id)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!paired_replica) {
|
if (!paired_replica) {
|
||||||
paired_replica = view_endpoints[idx];
|
// We couldn't find any view replica in our rack
|
||||||
|
++cf_stats.total_view_updates_failed_pairing;
|
||||||
|
vlogger.warn("Could not find a view replica in the same rack as base replica {} for base_endpoints={} view_endpoints={}",
|
||||||
|
me,
|
||||||
|
base_nodes | std::views::transform(std::mem_fn(&locator::node::host_id)),
|
||||||
|
view_nodes | std::views::transform(std::mem_fn(&locator::node::host_id)));
|
||||||
}
|
}
|
||||||
if (!no_pairing_replica && base_nodes.size() < view_nodes.size()) {
|
std::optional<locator::host_id> no_pairing_replica = get_unpaired_view_endpoint(base_nodes, view_nodes, cf_stats);
|
||||||
// This can happen when the view replica with no pairing is in another DC.
|
return {.natural_endpoint = paired_replica,
|
||||||
// We need to send an update to it if there are no base replicas in that DC yet,
|
.endpoint_with_no_pairing = no_pairing_replica};
|
||||||
// as it won't receive updates otherwise.
|
|
||||||
std::unordered_set<sstring> dcs_with_base_replicas;
|
|
||||||
for (const auto& base_node : base_nodes) {
|
|
||||||
dcs_with_base_replicas.insert(base_node.get().dc());
|
|
||||||
}
|
|
||||||
for (const auto& view_node : view_nodes) {
|
|
||||||
if (!dcs_with_base_replicas.contains(view_node.get().dc())) {
|
|
||||||
++cf_stats.total_view_updates_due_to_replica_count_mismatch;
|
|
||||||
no_pairing_replica = view_node;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// https://github.com/scylladb/scylladb/issues/19439
|
|
||||||
// With tablets, a node being replaced might transition to "left" state
|
|
||||||
// but still be kept as a replica.
|
|
||||||
// As of writing this hints are not prepared to handle nodes that are left
|
|
||||||
// but are still replicas. Therefore, there is no other sensible option
|
|
||||||
// right now but to give up attempt to send the update or write a hint
|
|
||||||
// to the paired, permanently down replica.
|
|
||||||
// We use the same workaround for the extra replica.
|
|
||||||
auto return_host_id_if_not_left = [] (const auto& replica) -> std::optional<locator::host_id> {
|
|
||||||
if (!replica) {
|
|
||||||
return std::nullopt;
|
|
||||||
}
|
|
||||||
const auto& node = replica->get();
|
|
||||||
if (!node.left()) {
|
|
||||||
return node.host_id();
|
|
||||||
} else {
|
|
||||||
return std::nullopt;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
return {.natural_endpoint = return_host_id_if_not_left(paired_replica),
|
|
||||||
.endpoint_with_no_pairing = return_host_id_if_not_left(no_pairing_replica)};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static future<> apply_to_remote_endpoints(service::storage_proxy& proxy, locator::effective_replication_map_ptr ermp,
|
static future<> apply_to_remote_endpoints(service::storage_proxy& proxy, locator::effective_replication_map_ptr ermp,
|
||||||
@@ -2136,12 +2041,6 @@ future<> view_update_generator::mutate_MV(
|
|||||||
{
|
{
|
||||||
auto& ks = _db.find_keyspace(base->ks_name());
|
auto& ks = _db.find_keyspace(base->ks_name());
|
||||||
auto& replication = ks.get_replication_strategy();
|
auto& replication = ks.get_replication_strategy();
|
||||||
// We set legacy self-pairing for old vnode-based tables (for backward
|
|
||||||
// compatibility), and unset it for tablets - where range movements
|
|
||||||
// are more frequent and backward compatibility is less important.
|
|
||||||
// TODO: Maybe allow users to set use_legacy_self_pairing explicitly
|
|
||||||
// on a view, like we have the synchronous_updates_flag.
|
|
||||||
bool use_legacy_self_pairing = !ks.uses_tablets();
|
|
||||||
std::unordered_map<table_id, locator::effective_replication_map_ptr> erms;
|
std::unordered_map<table_id, locator::effective_replication_map_ptr> erms;
|
||||||
auto get_erm = [&] (table_id id) {
|
auto get_erm = [&] (table_id id) {
|
||||||
auto it = erms.find(id);
|
auto it = erms.find(id);
|
||||||
@@ -2154,10 +2053,6 @@ future<> view_update_generator::mutate_MV(
|
|||||||
for (const auto& mut : view_updates) {
|
for (const auto& mut : view_updates) {
|
||||||
(void)get_erm(mut.s->id());
|
(void)get_erm(mut.s->id());
|
||||||
}
|
}
|
||||||
// Enable rack-aware view updates pairing for tablets
|
|
||||||
// when the cluster feature is enabled so that all replicas agree
|
|
||||||
// on the pairing algorithm.
|
|
||||||
bool use_tablets_rack_aware_view_pairing = _db.features().tablet_rack_aware_view_pairing && ks.uses_tablets();
|
|
||||||
auto me = base_ermp->get_topology().my_host_id();
|
auto me = base_ermp->get_topology().my_host_id();
|
||||||
static constexpr size_t max_concurrent_updates = 128;
|
static constexpr size_t max_concurrent_updates = 128;
|
||||||
co_await utils::get_local_injector().inject("delay_before_get_view_natural_endpoint", 8000ms);
|
co_await utils::get_local_injector().inject("delay_before_get_view_natural_endpoint", 8000ms);
|
||||||
@@ -2165,7 +2060,7 @@ future<> view_update_generator::mutate_MV(
|
|||||||
auto view_token = dht::get_token(*mut.s, mut.fm.key());
|
auto view_token = dht::get_token(*mut.s, mut.fm.key());
|
||||||
auto view_ermp = erms.at(mut.s->id());
|
auto view_ermp = erms.at(mut.s->id());
|
||||||
auto [target_endpoint, no_pairing_endpoint] = get_view_natural_endpoint(me, base_ermp, view_ermp, replication, base_token, view_token,
|
auto [target_endpoint, no_pairing_endpoint] = get_view_natural_endpoint(me, base_ermp, view_ermp, replication, base_token, view_token,
|
||||||
use_legacy_self_pairing, use_tablets_rack_aware_view_pairing, cf_stats);
|
ks.uses_tablets(), cf_stats);
|
||||||
auto remote_endpoints = view_ermp->get_pending_replicas(view_token);
|
auto remote_endpoints = view_ermp->get_pending_replicas(view_token);
|
||||||
auto memory_units = seastar::make_lw_shared<db::timeout_semaphore_units>(pending_view_update_memory_units.split(memory_usage_of(mut)));
|
auto memory_units = seastar::make_lw_shared<db::timeout_semaphore_units>(pending_view_update_memory_units.split(memory_usage_of(mut)));
|
||||||
if (no_pairing_endpoint) {
|
if (no_pairing_endpoint) {
|
||||||
|
|||||||
@@ -305,8 +305,7 @@ endpoints_to_update get_view_natural_endpoint(
|
|||||||
const locator::abstract_replication_strategy& replication_strategy,
|
const locator::abstract_replication_strategy& replication_strategy,
|
||||||
const dht::token& base_token,
|
const dht::token& base_token,
|
||||||
const dht::token& view_token,
|
const dht::token& view_token,
|
||||||
bool use_legacy_self_pairing,
|
bool use_tablets,
|
||||||
bool use_tablets_basic_rack_aware_view_pairing,
|
|
||||||
replica::cf_stats& cf_stats);
|
replica::cf_stats& cf_stats);
|
||||||
|
|
||||||
/// Verify that the provided keyspace is eligible for storing materialized views.
|
/// Verify that the provided keyspace is eligible for storing materialized views.
|
||||||
|
|||||||
@@ -198,6 +198,7 @@ future<> view_building_worker::register_staging_sstable_tasks(std::vector<sstabl
|
|||||||
|
|
||||||
future<> view_building_worker::run_staging_sstables_registrator() {
|
future<> view_building_worker::run_staging_sstables_registrator() {
|
||||||
while (!_as.abort_requested()) {
|
while (!_as.abort_requested()) {
|
||||||
|
bool sleep = false;
|
||||||
try {
|
try {
|
||||||
auto lock = co_await get_units(_staging_sstables_mutex, 1, _as);
|
auto lock = co_await get_units(_staging_sstables_mutex, 1, _as);
|
||||||
co_await create_staging_sstable_tasks();
|
co_await create_staging_sstable_tasks();
|
||||||
@@ -214,6 +215,14 @@ future<> view_building_worker::run_staging_sstables_registrator() {
|
|||||||
vbw_logger.warn("Got group0_concurrent_modification while creating staging sstable tasks");
|
vbw_logger.warn("Got group0_concurrent_modification while creating staging sstable tasks");
|
||||||
} catch (raft::request_aborted&) {
|
} catch (raft::request_aborted&) {
|
||||||
vbw_logger.warn("Got raft::request_aborted while creating staging sstable tasks");
|
vbw_logger.warn("Got raft::request_aborted while creating staging sstable tasks");
|
||||||
|
} catch (...) {
|
||||||
|
vbw_logger.error("Exception while creating staging sstable tasks: {}", std::current_exception());
|
||||||
|
sleep = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sleep) {
|
||||||
|
vbw_logger.debug("Sleeping after exception.");
|
||||||
|
co_await seastar::sleep_abortable(1s, _as).handle_exception([] (auto x) { return make_ready_future<>(); });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -417,9 +426,12 @@ future<> view_building_worker::check_for_aborted_tasks() {
|
|||||||
|
|
||||||
auto my_host_id = vbw._db.get_token_metadata().get_topology().my_host_id();
|
auto my_host_id = vbw._db.get_token_metadata().get_topology().my_host_id();
|
||||||
auto my_replica = locator::tablet_replica{my_host_id, this_shard_id()};
|
auto my_replica = locator::tablet_replica{my_host_id, this_shard_id()};
|
||||||
auto tasks_map = vbw._state._batch->tasks; // Potentially, we'll remove elements from the map, so we need a copy to iterate over it
|
auto it = vbw._state._batch->tasks.begin();
|
||||||
for (auto& [id, t]: tasks_map) {
|
while (it != vbw._state._batch->tasks.end()) {
|
||||||
auto task_opt = building_state.get_task(t.base_id, my_replica, id);
|
auto id = it->first;
|
||||||
|
auto task_opt = building_state.get_task(it->second.base_id, my_replica, id);
|
||||||
|
|
||||||
|
++it; // Advance the iterator before potentially removing the entry from the map.
|
||||||
if (!task_opt || task_opt->get().aborted) {
|
if (!task_opt || task_opt->get().aborted) {
|
||||||
co_await vbw._state._batch->abort_task(id);
|
co_await vbw._state._batch->abort_task(id);
|
||||||
}
|
}
|
||||||
@@ -449,7 +461,7 @@ static std::unordered_set<table_id> get_ids_of_all_views(replica::database& db,
|
|||||||
}) | std::ranges::to<std::unordered_set>();;
|
}) | std::ranges::to<std::unordered_set>();;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If `state::processing_base_table` is diffrent that the `view_building_state::currently_processed_base_table`,
|
// If `state::processing_base_table` is different that the `view_building_state::currently_processed_base_table`,
|
||||||
// clear the state, save and flush new base table
|
// clear the state, save and flush new base table
|
||||||
future<> view_building_worker::state::update_processing_base_table(replica::database& db, const view_building_state& building_state, abort_source& as) {
|
future<> view_building_worker::state::update_processing_base_table(replica::database& db, const view_building_state& building_state, abort_source& as) {
|
||||||
if (processing_base_table != building_state.currently_processed_base_table) {
|
if (processing_base_table != building_state.currently_processed_base_table) {
|
||||||
@@ -571,8 +583,6 @@ future<> view_building_worker::batch::do_work() {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_vbw.local()._vb_state_machine.event.broadcast();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> view_building_worker::do_build_range(table_id base_id, std::vector<table_id> views_ids, dht::token last_token, abort_source& as) {
|
future<> view_building_worker::do_build_range(table_id base_id, std::vector<table_id> views_ids, dht::token last_token, abort_source& as) {
|
||||||
@@ -774,13 +784,15 @@ future<std::vector<utils::UUID>> view_building_worker::work_on_tasks(raft::term_
|
|||||||
tasks.insert({id, *task_opt});
|
tasks.insert({id, *task_opt});
|
||||||
}
|
}
|
||||||
#ifdef SEASTAR_DEBUG
|
#ifdef SEASTAR_DEBUG
|
||||||
auto& some_task = tasks.begin()->second;
|
{
|
||||||
for (auto& [_, t]: tasks) {
|
auto& some_task = tasks.begin()->second;
|
||||||
SCYLLA_ASSERT(t.base_id == some_task.base_id);
|
for (auto& [_, t]: tasks) {
|
||||||
SCYLLA_ASSERT(t.last_token == some_task.last_token);
|
SCYLLA_ASSERT(t.base_id == some_task.base_id);
|
||||||
SCYLLA_ASSERT(t.replica == some_task.replica);
|
SCYLLA_ASSERT(t.last_token == some_task.last_token);
|
||||||
SCYLLA_ASSERT(t.type == some_task.type);
|
SCYLLA_ASSERT(t.replica == some_task.replica);
|
||||||
SCYLLA_ASSERT(t.replica.shard == this_shard_id());
|
SCYLLA_ASSERT(t.type == some_task.type);
|
||||||
|
SCYLLA_ASSERT(t.replica.shard == this_shard_id());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -811,25 +823,6 @@ future<std::vector<utils::UUID>> view_building_worker::work_on_tasks(raft::term_
|
|||||||
co_return collect_completed_tasks();
|
co_return collect_completed_tasks();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -749,6 +749,7 @@ class clients_table : public streaming_virtual_table {
|
|||||||
.with_column("ssl_protocol", utf8_type)
|
.with_column("ssl_protocol", utf8_type)
|
||||||
.with_column("username", utf8_type)
|
.with_column("username", utf8_type)
|
||||||
.with_column("scheduling_group", utf8_type)
|
.with_column("scheduling_group", utf8_type)
|
||||||
|
.with_column("client_options", map_type_impl::get_instance(utf8_type, utf8_type, false))
|
||||||
.with_hash_version()
|
.with_hash_version()
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
@@ -766,7 +767,7 @@ class clients_table : public streaming_virtual_table {
|
|||||||
|
|
||||||
future<> execute(reader_permit permit, result_collector& result, const query_restrictions& qr) override {
|
future<> execute(reader_permit permit, result_collector& result, const query_restrictions& qr) override {
|
||||||
// Collect
|
// Collect
|
||||||
using client_data_vec = utils::chunked_vector<client_data>;
|
using client_data_vec = utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>;
|
||||||
using shard_client_data = std::vector<client_data_vec>;
|
using shard_client_data = std::vector<client_data_vec>;
|
||||||
std::vector<foreign_ptr<std::unique_ptr<shard_client_data>>> cd_vec;
|
std::vector<foreign_ptr<std::unique_ptr<shard_client_data>>> cd_vec;
|
||||||
cd_vec.resize(smp::count);
|
cd_vec.resize(smp::count);
|
||||||
@@ -806,13 +807,13 @@ class clients_table : public streaming_virtual_table {
|
|||||||
for (unsigned i = 0; i < smp::count; i++) {
|
for (unsigned i = 0; i < smp::count; i++) {
|
||||||
for (auto&& ps_cdc : *cd_vec[i]) {
|
for (auto&& ps_cdc : *cd_vec[i]) {
|
||||||
for (auto&& cd : ps_cdc) {
|
for (auto&& cd : ps_cdc) {
|
||||||
if (cd_map.contains(cd.ip)) {
|
if (cd_map.contains(cd->ip)) {
|
||||||
cd_map[cd.ip].emplace_back(std::move(cd));
|
cd_map[cd->ip].emplace_back(std::move(cd));
|
||||||
} else {
|
} else {
|
||||||
dht::decorated_key key = make_partition_key(cd.ip);
|
dht::decorated_key key = make_partition_key(cd->ip);
|
||||||
if (this_shard_owns(key) && contains_key(qr.partition_range(), key)) {
|
if (this_shard_owns(key) && contains_key(qr.partition_range(), key)) {
|
||||||
ips.insert(decorated_ip{std::move(key), cd.ip});
|
ips.insert(decorated_ip{std::move(key), cd->ip});
|
||||||
cd_map[cd.ip].emplace_back(std::move(cd));
|
cd_map[cd->ip].emplace_back(std::move(cd));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
co_await coroutine::maybe_yield();
|
co_await coroutine::maybe_yield();
|
||||||
@@ -825,39 +826,58 @@ class clients_table : public streaming_virtual_table {
|
|||||||
co_await result.emit_partition_start(dip.key);
|
co_await result.emit_partition_start(dip.key);
|
||||||
auto& clients = cd_map[dip.ip];
|
auto& clients = cd_map[dip.ip];
|
||||||
|
|
||||||
std::ranges::sort(clients, [] (const client_data& a, const client_data& b) {
|
std::ranges::sort(clients, [] (const foreign_ptr<std::unique_ptr<client_data>>& a, const foreign_ptr<std::unique_ptr<client_data>>& b) {
|
||||||
return a.port < b.port || a.client_type_str() < b.client_type_str();
|
return a->port < b->port || a->client_type_str() < b->client_type_str();
|
||||||
});
|
});
|
||||||
|
|
||||||
for (const auto& cd : clients) {
|
for (const auto& cd : clients) {
|
||||||
clustering_row cr(make_clustering_key(cd.port, cd.client_type_str()));
|
clustering_row cr(make_clustering_key(cd->port, cd->client_type_str()));
|
||||||
set_cell(cr.cells(), "shard_id", cd.shard_id);
|
set_cell(cr.cells(), "shard_id", cd->shard_id);
|
||||||
set_cell(cr.cells(), "connection_stage", cd.stage_str());
|
set_cell(cr.cells(), "connection_stage", cd->stage_str());
|
||||||
if (cd.driver_name) {
|
if (cd->driver_name) {
|
||||||
set_cell(cr.cells(), "driver_name", *cd.driver_name);
|
set_cell(cr.cells(), "driver_name", cd->driver_name->key());
|
||||||
}
|
}
|
||||||
if (cd.driver_version) {
|
if (cd->driver_version) {
|
||||||
set_cell(cr.cells(), "driver_version", *cd.driver_version);
|
set_cell(cr.cells(), "driver_version", cd->driver_version->key());
|
||||||
}
|
}
|
||||||
if (cd.hostname) {
|
if (cd->hostname) {
|
||||||
set_cell(cr.cells(), "hostname", *cd.hostname);
|
set_cell(cr.cells(), "hostname", *cd->hostname);
|
||||||
}
|
}
|
||||||
if (cd.protocol_version) {
|
if (cd->protocol_version) {
|
||||||
set_cell(cr.cells(), "protocol_version", *cd.protocol_version);
|
set_cell(cr.cells(), "protocol_version", *cd->protocol_version);
|
||||||
}
|
}
|
||||||
if (cd.ssl_cipher_suite) {
|
if (cd->ssl_cipher_suite) {
|
||||||
set_cell(cr.cells(), "ssl_cipher_suite", *cd.ssl_cipher_suite);
|
set_cell(cr.cells(), "ssl_cipher_suite", *cd->ssl_cipher_suite);
|
||||||
}
|
}
|
||||||
if (cd.ssl_enabled) {
|
if (cd->ssl_enabled) {
|
||||||
set_cell(cr.cells(), "ssl_enabled", *cd.ssl_enabled);
|
set_cell(cr.cells(), "ssl_enabled", *cd->ssl_enabled);
|
||||||
}
|
}
|
||||||
if (cd.ssl_protocol) {
|
if (cd->ssl_protocol) {
|
||||||
set_cell(cr.cells(), "ssl_protocol", *cd.ssl_protocol);
|
set_cell(cr.cells(), "ssl_protocol", *cd->ssl_protocol);
|
||||||
}
|
}
|
||||||
set_cell(cr.cells(), "username", cd.username ? *cd.username : sstring("anonymous"));
|
set_cell(cr.cells(), "username", cd->username ? *cd->username : sstring("anonymous"));
|
||||||
if (cd.scheduling_group_name) {
|
if (cd->scheduling_group_name) {
|
||||||
set_cell(cr.cells(), "scheduling_group", *cd.scheduling_group_name);
|
set_cell(cr.cells(), "scheduling_group", *cd->scheduling_group_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto map_type = map_type_impl::get_instance(
|
||||||
|
utf8_type,
|
||||||
|
utf8_type,
|
||||||
|
false
|
||||||
|
);
|
||||||
|
|
||||||
|
auto prepare_client_options = [] (const auto& client_options) {
|
||||||
|
map_type_impl::native_type tmp;
|
||||||
|
for (auto& co: client_options) {
|
||||||
|
auto map_element = std::make_pair(data_value(co.key.key()), data_value(co.value.key()));
|
||||||
|
tmp.push_back(std::move(map_element));
|
||||||
|
}
|
||||||
|
return tmp;
|
||||||
|
};
|
||||||
|
|
||||||
|
set_cell(cr.cells(), "client_options",
|
||||||
|
make_map_value(map_type, prepare_client_options(cd->client_options)));
|
||||||
|
|
||||||
co_await result.emit_row(std::move(cr));
|
co_await result.emit_row(std::move(cr));
|
||||||
}
|
}
|
||||||
co_await result.emit_partition_end();
|
co_await result.emit_partition_end();
|
||||||
|
|||||||
2
dist/common/sysconfig/scylla-node-exporter
vendored
2
dist/common/sysconfig/scylla-node-exporter
vendored
@@ -1 +1 @@
|
|||||||
SCYLLA_NODE_EXPORTER_ARGS="--collector.interrupts --no-collector.hwmon --no-collector.bcache --no-collector.btrfs --no-collector.fibrechannel --no-collector.infiniband --no-collector.ipvs --no-collector.nfs --no-collector.nfsd --no-collector.powersupplyclass --no-collector.rapl --no-collector.tapestats --no-collector.thermal_zone --no-collector.udp_queues --no-collector.zfs"
|
SCYLLA_NODE_EXPORTER_ARGS="--collector.interrupts --collector.ethtool.metrics-include='(bw_in_allowance_exceeded|bw_out_allowance_exceeded|conntrack_allowance_exceeded|conntrack_allowance_available|linklocal_allowance_exceeded)' --collector.ethtool --no-collector.hwmon --no-collector.bcache --no-collector.btrfs --no-collector.fibrechannel --no-collector.infiniband --no-collector.ipvs --no-collector.nfs --no-collector.nfsd --no-collector.powersupplyclass --no-collector.rapl --no-collector.tapestats --no-collector.thermal_zone --no-collector.udp_queues --no-collector.zfs"
|
||||||
|
|||||||
@@ -71,7 +71,7 @@ Use "Bash on Ubuntu on Windows" for the same tools and capabilities as on Linux
|
|||||||
|
|
||||||
### Building the Docs
|
### Building the Docs
|
||||||
|
|
||||||
1. Run `make preview` to build the documentation.
|
1. Run `make preview` in the `docs/` directory to build the documentation.
|
||||||
1. Preview the built documentation locally at http://127.0.0.1:5500/.
|
1. Preview the built documentation locally at http://127.0.0.1:5500/.
|
||||||
|
|
||||||
### Cleanup
|
### Cleanup
|
||||||
|
|||||||
@@ -41,6 +41,8 @@ class MetricsProcessor:
|
|||||||
# Get metrics from the file
|
# Get metrics from the file
|
||||||
try:
|
try:
|
||||||
metrics_file = metrics.get_metrics_from_file(relative_path, "scylla_", metrics_info, strict=strict)
|
metrics_file = metrics.get_metrics_from_file(relative_path, "scylla_", metrics_info, strict=strict)
|
||||||
|
except SystemExit:
|
||||||
|
pass
|
||||||
finally:
|
finally:
|
||||||
os.chdir(old_cwd)
|
os.chdir(old_cwd)
|
||||||
if metrics_file:
|
if metrics_file:
|
||||||
|
|||||||
@@ -1,17 +1,17 @@
|
|||||||
# Alternator: DynamoDB API in Scylla
|
# Alternator: DynamoDB API in ScyllaDB
|
||||||
|
|
||||||
## Introduction
|
## Introduction
|
||||||
Alternator is a Scylla feature adding compatibility with Amazon DynamoDB(TM).
|
Alternator is a ScyllaDB feature adding compatibility with Amazon DynamoDB(TM).
|
||||||
DynamoDB's API uses JSON-encoded requests and responses which are sent over
|
DynamoDB's API uses JSON-encoded requests and responses which are sent over
|
||||||
an HTTP or HTTPS transport. It is described in detail in Amazon's [DynamoDB
|
an HTTP or HTTPS transport. It is described in detail in Amazon's [DynamoDB
|
||||||
API Reference](https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/).
|
API Reference](https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/).
|
||||||
|
|
||||||
Our goal is that any application written to use Amazon DynamoDB could
|
Our goal is that any application written to use Amazon DynamoDB could
|
||||||
be run, unmodified, against Scylla with Alternator enabled. Alternator's
|
be run, unmodified, against ScyllaDB with Alternator enabled. Alternator's
|
||||||
compatibility with DynamoDB is fairly complete, but users should be aware
|
compatibility with DynamoDB is fairly complete, but users should be aware
|
||||||
of some differences and some unimplemented features. The extent of
|
of some differences and some unimplemented features. The extent of
|
||||||
Alternator's compatibility with DynamoDB is described in the
|
Alternator's compatibility with DynamoDB is described in the
|
||||||
[Scylla Alternator for DynamoDB users](compatibility.md) document,
|
[ScyllaDB Alternator for DynamoDB users](compatibility.md) document,
|
||||||
which is updated as the work on Alternator progresses and compatibility
|
which is updated as the work on Alternator progresses and compatibility
|
||||||
continues to improve.
|
continues to improve.
|
||||||
|
|
||||||
@@ -19,8 +19,8 @@ Alternator also adds several features and APIs that are not available in
|
|||||||
DynamoDB. These are described in [Alternator-specific APIs](new-apis.md).
|
DynamoDB. These are described in [Alternator-specific APIs](new-apis.md).
|
||||||
|
|
||||||
## Running Alternator
|
## Running Alternator
|
||||||
By default, Scylla does not listen for DynamoDB API requests. To enable
|
By default, ScyllaDB does not listen for DynamoDB API requests. To enable
|
||||||
this API in Scylla you must set at least two configuration options,
|
this API in ScyllaDB you must set at least two configuration options,
|
||||||
**alternator_port** and **alternator_write_isolation**. For example in the
|
**alternator_port** and **alternator_write_isolation**. For example in the
|
||||||
YAML configuration file:
|
YAML configuration file:
|
||||||
```yaml
|
```yaml
|
||||||
@@ -30,7 +30,7 @@ alternator_write_isolation: only_rmw_uses_lwt # or always, forbid or unsafe
|
|||||||
or, equivalently, via command-line arguments: `--alternator-port=8000
|
or, equivalently, via command-line arguments: `--alternator-port=8000
|
||||||
--alternator-write-isolation=only_rmw_uses_lwt.
|
--alternator-write-isolation=only_rmw_uses_lwt.
|
||||||
|
|
||||||
the **alternator_port** option determines on which port Scylla listens for
|
the **alternator_port** option determines on which port ScyllaDB listens for
|
||||||
DynamoDB API requests. By default, it listens on this port on all network
|
DynamoDB API requests. By default, it listens on this port on all network
|
||||||
interfaces. To listen only on a specific interface, configure also the
|
interfaces. To listen only on a specific interface, configure also the
|
||||||
**alternator_address** option.
|
**alternator_address** option.
|
||||||
@@ -41,12 +41,12 @@ Alternator has four different choices
|
|||||||
for the implementation of writes, each with different advantages. You should
|
for the implementation of writes, each with different advantages. You should
|
||||||
carefully consider which of the options makes more sense for your intended
|
carefully consider which of the options makes more sense for your intended
|
||||||
use case and configure alternator_write_isolation accordingly. There is
|
use case and configure alternator_write_isolation accordingly. There is
|
||||||
currently no default for this option: Trying to run Scylla with an Alternator
|
currently no default for this option: Trying to run ScyllaDB with an Alternator
|
||||||
port selected but without configuring write isolation will result in an error message,
|
port selected but without configuring write isolation will result in an error message,
|
||||||
asking you to set it.
|
asking you to set it.
|
||||||
|
|
||||||
In addition to (or instead of) serving HTTP requests on alternator_port,
|
In addition to (or instead of) serving HTTP requests on alternator_port,
|
||||||
Scylla can accept DynamoDB API requests over HTTPS (encrypted), on the port
|
ScyllaDB can accept DynamoDB API requests over HTTPS (encrypted), on the port
|
||||||
specified by **alternator_https_port**. As usual for HTTPS servers, the
|
specified by **alternator_https_port**. As usual for HTTPS servers, the
|
||||||
operator must specify certificate and key files. By default these should
|
operator must specify certificate and key files. By default these should
|
||||||
be placed in `/etc/scylla/scylla.crt` and `/etc/scylla/scylla.key`, but
|
be placed in `/etc/scylla/scylla.crt` and `/etc/scylla/scylla.key`, but
|
||||||
@@ -54,7 +54,7 @@ these default locations can overridden by specifying
|
|||||||
`--alternator-encryption-options keyfile="..."` and
|
`--alternator-encryption-options keyfile="..."` and
|
||||||
`--alternator-encryption-options certificate="..."`.
|
`--alternator-encryption-options certificate="..."`.
|
||||||
|
|
||||||
By default, Scylla saves a snapshot of deleted tables. But Alternator does
|
By default, ScyllaDB saves a snapshot of deleted tables. But Alternator does
|
||||||
not offer an API to restore these snapshots, so these snapshots are not useful
|
not offer an API to restore these snapshots, so these snapshots are not useful
|
||||||
and waste disk space - deleting a table does not recover any disk space.
|
and waste disk space - deleting a table does not recover any disk space.
|
||||||
It is therefore recommended to disable this automatic-snapshotting feature
|
It is therefore recommended to disable this automatic-snapshotting feature
|
||||||
@@ -73,11 +73,11 @@ itself. Instructions, code and examples for doing this can be found in the
|
|||||||
|
|
||||||
This section provides only a very brief introduction to Alternator's
|
This section provides only a very brief introduction to Alternator's
|
||||||
design. A much more detailed document about the features of the DynamoDB
|
design. A much more detailed document about the features of the DynamoDB
|
||||||
API and how they are, or could be, implemented in Scylla can be found in:
|
API and how they are, or could be, implemented in ScyllaDB can be found in:
|
||||||
<https://docs.google.com/document/d/1i4yjF5OSAazAY_-T8CBce9-2ykW4twx_E_Nt2zDoOVs>
|
<https://docs.google.com/document/d/1i4yjF5OSAazAY_-T8CBce9-2ykW4twx_E_Nt2zDoOVs>
|
||||||
|
|
||||||
Almost all of Alternator's source code (except some initialization code)
|
Almost all of Alternator's source code (except some initialization code)
|
||||||
can be found in the alternator/ subdirectory of Scylla's source code.
|
can be found in the alternator/ subdirectory of ScyllaDB's source code.
|
||||||
Extensive functional tests can be found in the test/alternator
|
Extensive functional tests can be found in the test/alternator
|
||||||
subdirectory. These tests are written in Python, and can be run against
|
subdirectory. These tests are written in Python, and can be run against
|
||||||
both Alternator and Amazon's DynamoDB; This allows verifying that
|
both Alternator and Amazon's DynamoDB; This allows verifying that
|
||||||
@@ -85,15 +85,15 @@ Alternator's behavior matches the one observed on DynamoDB.
|
|||||||
See test/alternator/README.md for more information about the tests and
|
See test/alternator/README.md for more information about the tests and
|
||||||
how to run them.
|
how to run them.
|
||||||
|
|
||||||
With Alternator enabled on port 8000 (for example), every Scylla node
|
With Alternator enabled on port 8000 (for example), every ScyllaDB node
|
||||||
listens for DynamoDB API requests on this port. These requests, in
|
listens for DynamoDB API requests on this port. These requests, in
|
||||||
JSON format over HTTP, are parsed and result in calls to internal Scylla
|
JSON format over HTTP, are parsed and result in calls to internal Scylla
|
||||||
C++ functions - there is no CQL generation or parsing involved.
|
C++ functions - there is no CQL generation or parsing involved.
|
||||||
In Scylla terminology, the node receiving the request acts as the
|
In ScyllaDB terminology, the node receiving the request acts as the
|
||||||
*coordinator*, and often passes the request on to one or more other nodes -
|
*coordinator*, and often passes the request on to one or more other nodes -
|
||||||
*replicas* which hold copies of the requested data.
|
*replicas* which hold copies of the requested data.
|
||||||
|
|
||||||
Alternator tables are stored as Scylla tables, each in a separate keyspace.
|
Alternator tables are stored as ScyllaDB tables, each in a separate keyspace.
|
||||||
Each keyspace is initialized when the corresponding Alternator table is
|
Each keyspace is initialized when the corresponding Alternator table is
|
||||||
created (with a CreateTable request). The replication factor (RF) for this
|
created (with a CreateTable request). The replication factor (RF) for this
|
||||||
keyspace is chosen at that point, depending on the size of the cluster:
|
keyspace is chosen at that point, depending on the size of the cluster:
|
||||||
@@ -101,19 +101,19 @@ RF=3 is used on clusters with three or more nodes, and RF=1 is used for
|
|||||||
smaller clusters. Such smaller clusters are, of course, only recommended
|
smaller clusters. Such smaller clusters are, of course, only recommended
|
||||||
for tests because of the risk of data loss.
|
for tests because of the risk of data loss.
|
||||||
|
|
||||||
Each table in Alternator is stored as a Scylla table in a separate
|
Each table in Alternator is stored as a ScyllaDB table in a separate
|
||||||
keyspace. The DynamoDB key columns (hash and sort key) have known types,
|
keyspace. The DynamoDB key columns (hash and sort key) have known types,
|
||||||
and become partition and clustering key columns of the Scylla table.
|
and become partition and clustering key columns of the ScyllaDB table.
|
||||||
All other attributes may be different for each row, so are stored in one
|
All other attributes may be different for each row, so are stored in one
|
||||||
map column in Scylla, and not as separate columns.
|
map column in ScyllaDB, and not as separate columns.
|
||||||
|
|
||||||
DynamoDB supports two consistency levels for reads, "eventual consistency"
|
DynamoDB supports two consistency levels for reads, "eventual consistency"
|
||||||
and "strong consistency". These two modes are implemented using Scylla's CL
|
and "strong consistency". These two modes are implemented using ScyllaDB's CL
|
||||||
(consistency level) feature: All writes are done using the `LOCAL_QUORUM`
|
(consistency level) feature: All writes are done using the `LOCAL_QUORUM`
|
||||||
consistency level, then strongly-consistent reads are done with
|
consistency level, then strongly-consistent reads are done with
|
||||||
`LOCAL_QUORUM`, while eventually-consistent reads are with just `LOCAL_ONE`.
|
`LOCAL_QUORUM`, while eventually-consistent reads are with just `LOCAL_ONE`.
|
||||||
|
|
||||||
In Scylla (and its inspiration, Cassandra), high write performance is
|
In ScyllaDB (and its inspiration, Cassandra), high write performance is
|
||||||
achieved by ensuring that writes do not require reads from disk.
|
achieved by ensuring that writes do not require reads from disk.
|
||||||
The DynamoDB API, however, provides many types of requests that need a read
|
The DynamoDB API, however, provides many types of requests that need a read
|
||||||
before the write (a.k.a. RMW requests - read-modify-write). For example,
|
before the write (a.k.a. RMW requests - read-modify-write). For example,
|
||||||
@@ -121,7 +121,7 @@ a request may copy an existing attribute, increment an attribute,
|
|||||||
be conditional on some expression involving existing values of attribute,
|
be conditional on some expression involving existing values of attribute,
|
||||||
or request that the previous values of attributes be returned. These
|
or request that the previous values of attributes be returned. These
|
||||||
read-modify-write transactions should be _isolated_ from each other, so
|
read-modify-write transactions should be _isolated_ from each other, so
|
||||||
by default Alternator implements every write operation using Scylla's
|
by default Alternator implements every write operation using ScyllaDB's
|
||||||
LWT (lightweight transactions). This default can be overridden on a per-table
|
LWT (lightweight transactions). This default can be overridden on a per-table
|
||||||
basis, by tagging the table as explained above in the "write isolation
|
basis, by tagging the table as explained above in the "write isolation
|
||||||
policies" section.
|
policies" section.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# ScyllaDB Alternator for DynamoDB users
|
# ScyllaDB Alternator for DynamoDB users
|
||||||
|
|
||||||
Scylla supports the DynamoDB API (this feature is codenamed "Alternator").
|
ScyllaDB supports the DynamoDB API (this feature is codenamed "Alternator").
|
||||||
Our goal is to support any application written for Amazon DynamoDB.
|
Our goal is to support any application written for Amazon DynamoDB.
|
||||||
Nevertheless, there are a few differences between DynamoDB and Scylla, and
|
Nevertheless, there are a few differences between DynamoDB and Scylla, and
|
||||||
and a few DynamoDB features that have not yet been implemented in Scylla.
|
and a few DynamoDB features that have not yet been implemented in Scylla.
|
||||||
@@ -8,16 +8,16 @@ The purpose of this document is to inform users of these differences.
|
|||||||
|
|
||||||
## Provisioning
|
## Provisioning
|
||||||
|
|
||||||
The most obvious difference between DynamoDB and Scylla is that while
|
The most obvious difference between DynamoDB and ScyllaDB is that while
|
||||||
DynamoDB is a shared cloud service, Scylla is a dedicated service running
|
DynamoDB is a shared cloud service, ScyllaDB is a dedicated service running
|
||||||
on your private cluster. Whereas DynamoDB allows you to "provision" the
|
on your private cluster. Whereas DynamoDB allows you to "provision" the
|
||||||
number of requests per second you'll need - or at an extra cost not even
|
number of requests per second you'll need - or at an extra cost not even
|
||||||
provision that - Scylla requires you to provision your cluster. You need
|
provision that - ScyllaDB requires you to provision your cluster. You need
|
||||||
to reason about the number and size of your nodes - not the throughput.
|
to reason about the number and size of your nodes - not the throughput.
|
||||||
|
|
||||||
Moreover, DynamoDB's per-table provisioning (`BillingMode=PROVISIONED`) is
|
Moreover, DynamoDB's per-table provisioning (`BillingMode=PROVISIONED`) is
|
||||||
not yet supported by Scylla. The BillingMode and ProvisionedThroughput options
|
not yet supported by Scylla. The BillingMode and ProvisionedThroughput options
|
||||||
on a table need to be valid but are ignored, and Scylla behaves like DynamoDB's
|
on a table need to be valid but are ignored, and ScyllaDB behaves like DynamoDB's
|
||||||
`BillingMode=PAY_PER_REQUEST`: All requests are accepted without a per-table
|
`BillingMode=PAY_PER_REQUEST`: All requests are accepted without a per-table
|
||||||
throughput cap.
|
throughput cap.
|
||||||
|
|
||||||
@@ -33,7 +33,7 @@ Instructions for doing this can be found in:
|
|||||||
|
|
||||||
## Write isolation policies
|
## Write isolation policies
|
||||||
|
|
||||||
Scylla was designed to optimize the performance of pure write operations -
|
ScyllaDB was designed to optimize the performance of pure write operations -
|
||||||
writes which do not need to read the previous value of the item.
|
writes which do not need to read the previous value of the item.
|
||||||
In CQL, writes which do need the previous value of the item must explicitly
|
In CQL, writes which do need the previous value of the item must explicitly
|
||||||
use the slower LWT ("LightWeight Transaction") feature to be correctly
|
use the slower LWT ("LightWeight Transaction") feature to be correctly
|
||||||
@@ -79,11 +79,11 @@ a _higher_ timestamp - and this will be the "last write" that wins.
|
|||||||
To avoid or mitigate this write reordering issue, users may consider
|
To avoid or mitigate this write reordering issue, users may consider
|
||||||
one or more of the following:
|
one or more of the following:
|
||||||
|
|
||||||
1. Use NTP to keep the clocks on the different Scylla nodes synchronized.
|
1. Use NTP to keep the clocks on the different ScyllaDB nodes synchronized.
|
||||||
If the delay between the two writes is longer than NTP's accuracy,
|
If the delay between the two writes is longer than NTP's accuracy,
|
||||||
they will not be reordered.
|
they will not be reordered.
|
||||||
2. If an application wants to ensure that two specific writes are not
|
2. If an application wants to ensure that two specific writes are not
|
||||||
reordered, it should send both requests to the same Scylla node.
|
reordered, it should send both requests to the same ScyllaDB node.
|
||||||
Care should be taken when using a load balancer - which might redirect
|
Care should be taken when using a load balancer - which might redirect
|
||||||
two requests to two different nodes.
|
two requests to two different nodes.
|
||||||
3. Consider using the `always_use_lwt` write isolation policy.
|
3. Consider using the `always_use_lwt` write isolation policy.
|
||||||
@@ -210,7 +210,7 @@ CREATE SERVICE_LEVEL IF NOT EXISTS oltp WITH SHARES = 1000;
|
|||||||
ATTACH SERVICE_LEVEL olap TO alice;
|
ATTACH SERVICE_LEVEL olap TO alice;
|
||||||
ATTACH SERVICE_LEVEL oltp TO bob;
|
ATTACH SERVICE_LEVEL oltp TO bob;
|
||||||
```
|
```
|
||||||
Note that `alternator_enforce_authorization` has to be enabled in Scylla configuration.
|
Note that `alternator_enforce_authorization` has to be enabled in ScyllaDB configuration.
|
||||||
|
|
||||||
See [Authorization](##Authorization) section to learn more about roles and authorization.
|
See [Authorization](##Authorization) section to learn more about roles and authorization.
|
||||||
See [Workload Prioritization](../features/workload-prioritization)
|
See [Workload Prioritization](../features/workload-prioritization)
|
||||||
@@ -218,11 +218,11 @@ to read about Workload Prioritization in detail.
|
|||||||
|
|
||||||
## Metrics
|
## Metrics
|
||||||
|
|
||||||
Scylla has an advanced and extensive monitoring framework for inspecting
|
ScyllaDB has an advanced and extensive monitoring framework for inspecting
|
||||||
and graphing hundreds of different metrics of Scylla's usage and performance.
|
and graphing hundreds of different metrics of ScyllaDB's usage and performance.
|
||||||
Scylla's monitoring stack, based on Grafana and Prometheus, is described in
|
ScyllaDB's monitoring stack, based on Grafana and Prometheus, is described in
|
||||||
<https://docs.scylladb.com/operating-scylla/monitoring/>.
|
<https://docs.scylladb.com/operating-scylla/monitoring/>.
|
||||||
This monitoring stack is different from DynamoDB's offering - but Scylla's
|
This monitoring stack is different from DynamoDB's offering - but ScyllaDB's
|
||||||
is significantly more powerful and gives the user better insights on
|
is significantly more powerful and gives the user better insights on
|
||||||
the internals of the database and its performance.
|
the internals of the database and its performance.
|
||||||
|
|
||||||
@@ -248,7 +248,7 @@ data in different partition order. Applications mustn't rely on that
|
|||||||
undocumented order.
|
undocumented order.
|
||||||
|
|
||||||
Note that inside each partition, the individual items will be sorted the same
|
Note that inside each partition, the individual items will be sorted the same
|
||||||
in DynamoDB and Scylla - determined by the _sort key_ defined for that table.
|
in DynamoDB and ScyllaDB - determined by the _sort key_ defined for that table.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -274,7 +274,7 @@ is different, or can be configured in Alternator:
|
|||||||
## Experimental API features
|
## Experimental API features
|
||||||
|
|
||||||
Some DynamoDB API features are supported by Alternator, but considered
|
Some DynamoDB API features are supported by Alternator, but considered
|
||||||
**experimental** in this release. An experimental feature in Scylla is a
|
**experimental** in this release. An experimental feature in ScyllaDB is a
|
||||||
feature whose functionality is complete, or mostly complete, but it is not
|
feature whose functionality is complete, or mostly complete, but it is not
|
||||||
as thoroughly tested or optimized as regular features. Also, an experimental
|
as thoroughly tested or optimized as regular features. Also, an experimental
|
||||||
feature's implementation is still subject to change and upgrades may not be
|
feature's implementation is still subject to change and upgrades may not be
|
||||||
@@ -351,8 +351,8 @@ they should be easy to detect. Here is a list of these unimplemented features:
|
|||||||
|
|
||||||
* The on-demand backup APIs are not supported: CreateBackup, DescribeBackup,
|
* The on-demand backup APIs are not supported: CreateBackup, DescribeBackup,
|
||||||
DeleteBackup, ListBackups, RestoreTableFromBackup.
|
DeleteBackup, ListBackups, RestoreTableFromBackup.
|
||||||
For now, users can use Scylla's existing backup solutions such as snapshots
|
For now, users can use ScyllaDB's existing backup solutions such as snapshots
|
||||||
or Scylla Manager.
|
or ScyllaDB Manager.
|
||||||
<https://github.com/scylladb/scylla/issues/5063>
|
<https://github.com/scylladb/scylla/issues/5063>
|
||||||
|
|
||||||
* Continuous backup (the ability to restore any point in time) is also not
|
* Continuous backup (the ability to restore any point in time) is also not
|
||||||
@@ -370,7 +370,7 @@ they should be easy to detect. Here is a list of these unimplemented features:
|
|||||||
<https://github.com/scylladb/scylla/issues/5068>
|
<https://github.com/scylladb/scylla/issues/5068>
|
||||||
|
|
||||||
* DAX (DynamoDB Accelerator), an in-memory cache for DynamoDB, is not
|
* DAX (DynamoDB Accelerator), an in-memory cache for DynamoDB, is not
|
||||||
available in for Alternator. Anyway, it should not be necessary - Scylla's
|
available in for Alternator. Anyway, it should not be necessary - ScyllaDB's
|
||||||
internal cache is already rather advanced and there is no need to place
|
internal cache is already rather advanced and there is no need to place
|
||||||
another cache in front of the it. We wrote more about this here:
|
another cache in front of the it. We wrote more about this here:
|
||||||
<https://www.scylladb.com/2017/07/31/database-caches-not-good/>
|
<https://www.scylladb.com/2017/07/31/database-caches-not-good/>
|
||||||
@@ -384,7 +384,7 @@ they should be easy to detect. Here is a list of these unimplemented features:
|
|||||||
* The PartiQL syntax (SQL-like SELECT/UPDATE/INSERT/DELETE expressions)
|
* The PartiQL syntax (SQL-like SELECT/UPDATE/INSERT/DELETE expressions)
|
||||||
and the operations ExecuteStatement, BatchExecuteStatement and
|
and the operations ExecuteStatement, BatchExecuteStatement and
|
||||||
ExecuteTransaction are not yet supported.
|
ExecuteTransaction are not yet supported.
|
||||||
A user that is interested in an SQL-like syntax can consider using Scylla's
|
A user that is interested in an SQL-like syntax can consider using ScyllaDB's
|
||||||
CQL protocol instead.
|
CQL protocol instead.
|
||||||
This feature was added to DynamoDB in November 2020.
|
This feature was added to DynamoDB in November 2020.
|
||||||
<https://github.com/scylladb/scylla/issues/8787>
|
<https://github.com/scylladb/scylla/issues/8787>
|
||||||
@@ -393,7 +393,7 @@ they should be easy to detect. Here is a list of these unimplemented features:
|
|||||||
which is different from AWS's. In particular, the operations
|
which is different from AWS's. In particular, the operations
|
||||||
DescribeContributorInsights, ListContributorInsights and
|
DescribeContributorInsights, ListContributorInsights and
|
||||||
UpdateContributorInsights that configure Amazon's "CloudWatch Contributor
|
UpdateContributorInsights that configure Amazon's "CloudWatch Contributor
|
||||||
Insights" are not yet supported. Scylla has different ways to retrieve the
|
Insights" are not yet supported. ScyllaDB has different ways to retrieve the
|
||||||
same information, such as which items were accessed most often.
|
same information, such as which items were accessed most often.
|
||||||
<https://github.com/scylladb/scylla/issues/8788>
|
<https://github.com/scylladb/scylla/issues/8788>
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ This section will guide you through the steps for setting up the cluster:
|
|||||||
<https://hub.docker.com/r/scylladb/scylla/>, but add to every `docker run`
|
<https://hub.docker.com/r/scylladb/scylla/>, but add to every `docker run`
|
||||||
command a `-p 8000:8000` before the image name and
|
command a `-p 8000:8000` before the image name and
|
||||||
`--alternator-port=8000 --alternator-write-isolation=always` at the end.
|
`--alternator-port=8000 --alternator-write-isolation=always` at the end.
|
||||||
The "alternator-port" option specifies on which port Scylla will listen for
|
The "alternator-port" option specifies on which port ScyllaDB will listen for
|
||||||
the (unencrypted) DynamoDB API, and the "alternator-write-isolation" chooses
|
the (unencrypted) DynamoDB API, and the "alternator-write-isolation" chooses
|
||||||
whether or not Alternator will use LWT for every write.
|
whether or not Alternator will use LWT for every write.
|
||||||
For example,
|
For example,
|
||||||
@@ -24,10 +24,10 @@ This section will guide you through the steps for setting up the cluster:
|
|||||||
By default, ScyllaDB run in this way will not have authentication or
|
By default, ScyllaDB run in this way will not have authentication or
|
||||||
authorization enabled, and any DynamoDB API request will be honored without
|
authorization enabled, and any DynamoDB API request will be honored without
|
||||||
requiring them to be signed appropriately. See the
|
requiring them to be signed appropriately. See the
|
||||||
[Scylla Alternator for DynamoDB users](compatibility.md#authentication-and-authorization)
|
[ScyllaDB Alternator for DynamoDB users](compatibility.md#authentication-and-authorization)
|
||||||
document on how to configure authentication and authorization.
|
document on how to configure authentication and authorization.
|
||||||
|
|
||||||
## Testing Scylla's DynamoDB API support:
|
## Testing ScyllaDB's DynamoDB API support:
|
||||||
### Running AWS Tic Tac Toe demo app to test the cluster:
|
### Running AWS Tic Tac Toe demo app to test the cluster:
|
||||||
1. Follow the instructions on the [AWS github page](https://github.com/awsdocs/amazon-dynamodb-developer-guide/blob/master/doc_source/TicTacToe.Phase1.md)
|
1. Follow the instructions on the [AWS github page](https://github.com/awsdocs/amazon-dynamodb-developer-guide/blob/master/doc_source/TicTacToe.Phase1.md)
|
||||||
2. Enjoy your tic-tac-toe game :-)
|
2. Enjoy your tic-tac-toe game :-)
|
||||||
|
|||||||
@@ -2,9 +2,9 @@
|
|||||||
|
|
||||||
Alternator's primary goal is to be compatible with Amazon DynamoDB(TM)
|
Alternator's primary goal is to be compatible with Amazon DynamoDB(TM)
|
||||||
and its APIs, so that any application written to use Amazon DynamoDB could
|
and its APIs, so that any application written to use Amazon DynamoDB could
|
||||||
be run, unmodified, against Scylla with Alternator enabled. The extent of
|
be run, unmodified, against ScyllaDB with Alternator enabled. The extent of
|
||||||
Alternator's compatibility with DynamoDB is described in the
|
Alternator's compatibility with DynamoDB is described in the
|
||||||
[Scylla Alternator for DynamoDB users](compatibility.md) document.
|
[ScyllaDB Alternator for DynamoDB users](compatibility.md) document.
|
||||||
|
|
||||||
But Alternator also adds several features and APIs that are not available in
|
But Alternator also adds several features and APIs that are not available in
|
||||||
DynamoDB. These Alternator-specific APIs are documented here.
|
DynamoDB. These Alternator-specific APIs are documented here.
|
||||||
@@ -15,7 +15,7 @@ _conditional_ update or an update based on the old value of an attribute.
|
|||||||
The read and the write should be treated as a single transaction - protected
|
The read and the write should be treated as a single transaction - protected
|
||||||
(_isolated_) from other parallel writes to the same item.
|
(_isolated_) from other parallel writes to the same item.
|
||||||
|
|
||||||
Alternator could do this isolation by using Scylla's LWT (lightweight
|
Alternator could do this isolation by using ScyllaDB's LWT (lightweight
|
||||||
transactions) for every write operation, but this significantly slows
|
transactions) for every write operation, but this significantly slows
|
||||||
down writes, and not necessary for workloads which don't use read-modify-write
|
down writes, and not necessary for workloads which don't use read-modify-write
|
||||||
(RMW) updates.
|
(RMW) updates.
|
||||||
@@ -41,7 +41,7 @@ isolation policy for a specific table can be overridden by tagging the table
|
|||||||
which need a read before the write. An attempt to use such statements
|
which need a read before the write. An attempt to use such statements
|
||||||
(e.g., UpdateItem with a ConditionExpression) will result in an error.
|
(e.g., UpdateItem with a ConditionExpression) will result in an error.
|
||||||
In this mode, the remaining write requests which are allowed - pure writes
|
In this mode, the remaining write requests which are allowed - pure writes
|
||||||
without a read - are performed using standard Scylla writes, not LWT,
|
without a read - are performed using standard ScyllaDB writes, not LWT,
|
||||||
so they are significantly faster than they would have been in the
|
so they are significantly faster than they would have been in the
|
||||||
`always_use_lwt`, but their isolation is still correct.
|
`always_use_lwt`, but their isolation is still correct.
|
||||||
|
|
||||||
@@ -65,19 +65,19 @@ isolation policy for a specific table can be overridden by tagging the table
|
|||||||
read-modify-write updates. This mode is not recommended for any use case,
|
read-modify-write updates. This mode is not recommended for any use case,
|
||||||
and will likely be removed in the future.
|
and will likely be removed in the future.
|
||||||
|
|
||||||
## Accessing system tables from Scylla
|
## Accessing system tables from ScyllaDB
|
||||||
Scylla exposes lots of useful information via its internal system tables,
|
ScyllaDB exposes lots of useful information via its internal system tables,
|
||||||
which can be found in system keyspaces: 'system', 'system\_auth', etc.
|
which can be found in system keyspaces: 'system', 'system\_auth', etc.
|
||||||
In order to access to these tables via alternator interface,
|
In order to access to these tables via alternator interface,
|
||||||
Scan and Query requests can use a special table name:
|
Scan and Query requests can use a special table name:
|
||||||
`.scylla.alternator.KEYSPACE_NAME.TABLE_NAME`
|
`.scylla.alternator.KEYSPACE_NAME.TABLE_NAME`
|
||||||
which will return results fetched from corresponding Scylla table.
|
which will return results fetched from corresponding ScyllaDB table.
|
||||||
|
|
||||||
This interface can be used only to fetch data from system tables.
|
This interface can be used only to fetch data from system tables.
|
||||||
Attempts to read regular tables via the virtual interface will result
|
Attempts to read regular tables via the virtual interface will result
|
||||||
in an error.
|
in an error.
|
||||||
|
|
||||||
Example: in order to query the contents of Scylla's `system.large_rows`,
|
Example: in order to query the contents of ScyllaDB's `system.large_rows`,
|
||||||
pass `TableName='.scylla.alternator.system.large_rows'` to a Query/Scan
|
pass `TableName='.scylla.alternator.system.large_rows'` to a Query/Scan
|
||||||
request.
|
request.
|
||||||
|
|
||||||
@@ -113,14 +113,14 @@ connection (either active or idle), not necessarily an active request as
|
|||||||
in Alternator.
|
in Alternator.
|
||||||
|
|
||||||
## Service discovery
|
## Service discovery
|
||||||
As explained in [Scylla Alternator for DynamoDB users](compatibility.md),
|
As explained in [ScyllaDB Alternator for DynamoDB users](compatibility.md),
|
||||||
Alternator requires a load-balancer or a client-side load-balancing library
|
Alternator requires a load-balancer or a client-side load-balancing library
|
||||||
to distribute requests between all Scylla nodes. This load-balancer needs
|
to distribute requests between all ScyllaDB nodes. This load-balancer needs
|
||||||
to be able to _discover_ the Scylla nodes. Alternator provides two special
|
to be able to _discover_ the ScyllaDB nodes. Alternator provides two special
|
||||||
requests, `/` and `/localnodes`, to help with this service discovery, which
|
requests, `/` and `/localnodes`, to help with this service discovery, which
|
||||||
we will now explain.
|
we will now explain.
|
||||||
|
|
||||||
Some setups know exactly which Scylla nodes were brought up, so all that
|
Some setups know exactly which ScyllaDB nodes were brought up, so all that
|
||||||
remains is to periodically verify that each node is still functional. The
|
remains is to periodically verify that each node is still functional. The
|
||||||
easiest way to do this is to make an HTTP (or HTTPS) GET request to the node,
|
easiest way to do this is to make an HTTP (or HTTPS) GET request to the node,
|
||||||
with URL `/`. This is a trivial GET request and does **not** need to be
|
with URL `/`. This is a trivial GET request and does **not** need to be
|
||||||
@@ -133,10 +133,10 @@ $ curl http://localhost:8000/
|
|||||||
healthy: localhost:8000
|
healthy: localhost:8000
|
||||||
```
|
```
|
||||||
|
|
||||||
In other setups, the load balancer might not know which Scylla nodes exist.
|
In other setups, the load balancer might not know which ScyllaDB nodes exist.
|
||||||
For example, it may be possible to add or remove Scylla nodes without a
|
For example, it may be possible to add or remove ScyllaDB nodes without a
|
||||||
client-side load balancer knowing. For these setups we have the `/localnodes`
|
client-side load balancer knowing. For these setups we have the `/localnodes`
|
||||||
request that can be used to discover which Scylla nodes exist: A load balancer
|
request that can be used to discover which ScyllaDB nodes exist: A load balancer
|
||||||
that already knows at least one live node can discover the rest by sending
|
that already knows at least one live node can discover the rest by sending
|
||||||
a `/localnodes` request to the known node. It's again an unauthenticated
|
a `/localnodes` request to the known node. It's again an unauthenticated
|
||||||
HTTP (or HTTPS) GET request:
|
HTTP (or HTTPS) GET request:
|
||||||
@@ -160,7 +160,7 @@ list the nodes in a specific _data center_ or _rack_. These options are
|
|||||||
useful for certain use cases:
|
useful for certain use cases:
|
||||||
|
|
||||||
* A `dc` option (e.g., `/localnodes?dc=dc1`) can be passed to list the
|
* A `dc` option (e.g., `/localnodes?dc=dc1`) can be passed to list the
|
||||||
nodes in a specific Scylla data center, not the data center of the node
|
nodes in a specific ScyllaDB data center, not the data center of the node
|
||||||
being contacted. This is useful when a client knowns of _some_ Scylla
|
being contacted. This is useful when a client knowns of _some_ Scylla
|
||||||
node belonging to an unknown DC, but wants to list the nodes in _its_
|
node belonging to an unknown DC, but wants to list the nodes in _its_
|
||||||
DC, which it knows by name.
|
DC, which it knows by name.
|
||||||
@@ -191,7 +191,7 @@ tells them to.
|
|||||||
|
|
||||||
If you want to influence whether a specific Alternator table is created with tablets or vnodes,
|
If you want to influence whether a specific Alternator table is created with tablets or vnodes,
|
||||||
you can do this by specifying the `system:initial_tablets` tag
|
you can do this by specifying the `system:initial_tablets` tag
|
||||||
(in earlier versions of Scylla the tag was `experimental:initial_tablets`)
|
(in earlier versions of ScyllaDB the tag was `experimental:initial_tablets`)
|
||||||
in the CreateTable operation. The value of this tag can be:
|
in the CreateTable operation. The value of this tag can be:
|
||||||
|
|
||||||
* Any valid integer as the value of this tag enables tablets.
|
* Any valid integer as the value of this tag enables tablets.
|
||||||
|
|||||||
@@ -365,7 +365,7 @@ Modifying a keyspace with tablets enabled is possible and doesn't require any sp
|
|||||||
|
|
||||||
- The replication factor (RF) can be increased or decreased by at most 1 at a time. To reach the desired RF value, modify the RF repeatedly.
|
- The replication factor (RF) can be increased or decreased by at most 1 at a time. To reach the desired RF value, modify the RF repeatedly.
|
||||||
- The ``ALTER`` statement rejects the ``replication_factor`` tag. List the DCs explicitly when altering a keyspace. See :ref:`NetworkTopologyStrategy <replication-strategy>`.
|
- The ``ALTER`` statement rejects the ``replication_factor`` tag. List the DCs explicitly when altering a keyspace. See :ref:`NetworkTopologyStrategy <replication-strategy>`.
|
||||||
- If there's any other ongoing global topology operation, executing the ``ALTER`` statement will fail (with an explicit and specific error) and needs to be repeated.
|
- An RF change cannot be requested while another RF change is pending for the same keyspace. Attempting to execute an ``ALTER`` statement in this scenario will fail with an explicit error. Wait for the ongoing RF change to complete before issuing another ``ALTER`` statement.
|
||||||
- The ``ALTER`` statement may take longer than the regular query timeout, and even if it times out, it will continue to execute in the background.
|
- The ``ALTER`` statement may take longer than the regular query timeout, and even if it times out, it will continue to execute in the background.
|
||||||
- The replication strategy cannot be modified, as keyspaces with tablets only support ``NetworkTopologyStrategy``.
|
- The replication strategy cannot be modified, as keyspaces with tablets only support ``NetworkTopologyStrategy``.
|
||||||
- The ``ALTER`` statement will fail if it would make the keyspace :term:`RF-rack-invalid <RF-rack-valid keyspace>`.
|
- The ``ALTER`` statement will fail if it would make the keyspace :term:`RF-rack-invalid <RF-rack-valid keyspace>`.
|
||||||
@@ -1043,6 +1043,8 @@ The following modes are available:
|
|||||||
* - ``immediate``
|
* - ``immediate``
|
||||||
- Tombstone GC is immediately performed. There is no wait time or repair requirement. This mode is useful for a table that uses the TWCS compaction strategy with no user deletes. After data is expired after TTL, ScyllaDB can perform compaction to drop the expired data immediately.
|
- Tombstone GC is immediately performed. There is no wait time or repair requirement. This mode is useful for a table that uses the TWCS compaction strategy with no user deletes. After data is expired after TTL, ScyllaDB can perform compaction to drop the expired data immediately.
|
||||||
|
|
||||||
|
.. warning:: The ``repair`` mode is not supported for :term:`Colocated Tables <Colocated Table>` in this version.
|
||||||
|
|
||||||
.. _cql-per-table-tablet-options:
|
.. _cql-per-table-tablet-options:
|
||||||
|
|
||||||
Per-table tablet options
|
Per-table tablet options
|
||||||
|
|||||||
@@ -102,6 +102,7 @@ Additional Information
|
|||||||
|
|
||||||
To learn more about TTL, and see a hands-on example, check out `this lesson <https://university.scylladb.com/courses/data-modeling/lessons/advanced-data-modeling/topic/expiring-data-with-ttl-time-to-live/>`_ on ScyllaDB University.
|
To learn more about TTL, and see a hands-on example, check out `this lesson <https://university.scylladb.com/courses/data-modeling/lessons/advanced-data-modeling/topic/expiring-data-with-ttl-time-to-live/>`_ on ScyllaDB University.
|
||||||
|
|
||||||
|
* `Video: Managing data expiration with Time-To-Live <https://www.youtube.com/watch?v=SXkbu7mFHeA>`_
|
||||||
* :doc:`Apache Cassandra Query Language (CQL) Reference </cql/index>`
|
* :doc:`Apache Cassandra Query Language (CQL) Reference </cql/index>`
|
||||||
* :doc:`KB Article:How to Change gc_grace_seconds for a Table </kb/gc-grace-seconds/>`
|
* :doc:`KB Article:How to Change gc_grace_seconds for a Table </kb/gc-grace-seconds/>`
|
||||||
* :doc:`KB Article:Time to Live (TTL) and Compaction </kb/ttl-facts/>`
|
* :doc:`KB Article:Time to Live (TTL) and Compaction </kb/ttl-facts/>`
|
||||||
|
|||||||
@@ -74,6 +74,8 @@ The keys and values are:
|
|||||||
as an indicator to which shard client wants to connect. The desired shard number
|
as an indicator to which shard client wants to connect. The desired shard number
|
||||||
is calculated as: `desired_shard_no = client_port % SCYLLA_NR_SHARDS`.
|
is calculated as: `desired_shard_no = client_port % SCYLLA_NR_SHARDS`.
|
||||||
Its value is a decimal representation of type `uint16_t`, by default `19142`.
|
Its value is a decimal representation of type `uint16_t`, by default `19142`.
|
||||||
|
- `CLIENT_OPTIONS` is a string containing a JSON object representation that
|
||||||
|
contains CQL Driver configuration, e.g. load balancing policy, retry policy, timeouts, etc.
|
||||||
|
|
||||||
Currently, one `SCYLLA_SHARDING_ALGORITHM` is defined,
|
Currently, one `SCYLLA_SHARDING_ALGORITHM` is defined,
|
||||||
`biased-token-round-robin`. To apply the algorithm,
|
`biased-token-round-robin`. To apply the algorithm,
|
||||||
@@ -236,3 +238,26 @@ the same mechanism for other protocol versions, such as CQLv4.
|
|||||||
|
|
||||||
The feature is identified by the `SCYLLA_USE_METADATA_ID` key, which is meant to be sent
|
The feature is identified by the `SCYLLA_USE_METADATA_ID` key, which is meant to be sent
|
||||||
in the SUPPORTED message.
|
in the SUPPORTED message.
|
||||||
|
|
||||||
|
## Sending the CLIENT_ROUTES_CHANGE event
|
||||||
|
|
||||||
|
This extension allows a driver to update its connections when the
|
||||||
|
`system.client_routes` table is modified.
|
||||||
|
|
||||||
|
In some network topologies a specific mapping of addresses and ports is required (e.g.
|
||||||
|
to support Private Link). This mapping can change dynamically even when no nodes are
|
||||||
|
added or removed. The driver must adapt to those changes; otherwise connectivity can be
|
||||||
|
lost.
|
||||||
|
|
||||||
|
The extension is implemented as a new `EVENT` type: `CLIENT_ROUTES_CHANGE`. The event
|
||||||
|
body consists of:
|
||||||
|
- [string] change
|
||||||
|
- [string list] connection_ids
|
||||||
|
- [string list] host_ids
|
||||||
|
|
||||||
|
There is only one change value: `UPDATE_NODES`, which means at least one client route
|
||||||
|
was inserted, updated, or deleted.
|
||||||
|
|
||||||
|
Events already have a subscription mechanism similar to protocol extensions (that is,
|
||||||
|
the driver only receives the events it explicitly subscribed to), so no additional
|
||||||
|
`cql_protocol_extension` key is introduced for this feature.
|
||||||
|
|||||||
@@ -45,22 +45,6 @@ immediately after it's finished.
|
|||||||
|
|
||||||
A flag which determines if a task can be aborted through API.
|
A flag which determines if a task can be aborted through API.
|
||||||
|
|
||||||
# Task timing fields
|
|
||||||
|
|
||||||
Tasks have three timing fields that track different stages of their lifecycle:
|
|
||||||
|
|
||||||
- `creation_time` - When the task was created/queued. This is extracted from the task's
|
|
||||||
UUID (which is a timeuuid) and represents the moment the task request was submitted.
|
|
||||||
- `start_time` - When the task actually began executing. For tasks that are queued, this
|
|
||||||
will be unspecified (equal to epoch) until execution starts. For node operations
|
|
||||||
like decommission, this is set when the request is picked up for execution by the
|
|
||||||
topology coordinator.
|
|
||||||
- `end_time` - When the task completed (successfully or with an error). This is
|
|
||||||
unspecified (equal to epoch) until the task finishes.
|
|
||||||
|
|
||||||
The difference between `creation_time` and `start_time` represents the time a task
|
|
||||||
spent waiting in the queue before execution began.
|
|
||||||
|
|
||||||
# Type vs scope vs kind
|
# Type vs scope vs kind
|
||||||
|
|
||||||
`type` of a task describes what operation is covered by a task,
|
`type` of a task describes what operation is covered by a task,
|
||||||
|
|||||||
@@ -86,6 +86,7 @@ stateDiagram-v2
|
|||||||
de_left_token_ring --> [*]
|
de_left_token_ring --> [*]
|
||||||
}
|
}
|
||||||
state removing {
|
state removing {
|
||||||
|
re_left_token_ring : left_token_ring
|
||||||
re_tablet_draining : tablet_draining
|
re_tablet_draining : tablet_draining
|
||||||
re_tablet_migration : tablet_migration
|
re_tablet_migration : tablet_migration
|
||||||
re_write_both_read_old : write_both_read_old
|
re_write_both_read_old : write_both_read_old
|
||||||
@@ -98,7 +99,8 @@ stateDiagram-v2
|
|||||||
re_tablet_draining --> re_write_both_read_old
|
re_tablet_draining --> re_write_both_read_old
|
||||||
re_write_both_read_old --> re_write_both_read_new: streaming completed
|
re_write_both_read_old --> re_write_both_read_new: streaming completed
|
||||||
re_write_both_read_old --> re_rollback_to_normal: rollback
|
re_write_both_read_old --> re_rollback_to_normal: rollback
|
||||||
re_write_both_read_new --> [*]
|
re_write_both_read_new --> re_left_token_ring
|
||||||
|
re_left_token_ring --> [*]
|
||||||
}
|
}
|
||||||
rebuilding --> normal: streaming completed
|
rebuilding --> normal: streaming completed
|
||||||
decommissioning --> left: operation succeeded
|
decommissioning --> left: operation succeeded
|
||||||
@@ -122,9 +124,10 @@ Note that these are not all states, as there are other states specific to tablet
|
|||||||
Writes to vnodes-based tables are going to both new and old replicas (new replicas means calculated according
|
Writes to vnodes-based tables are going to both new and old replicas (new replicas means calculated according
|
||||||
to modified token ring), reads are using old replicas.
|
to modified token ring), reads are using old replicas.
|
||||||
- `write_both_read_new` - as above, but reads are using new replicas.
|
- `write_both_read_new` - as above, but reads are using new replicas.
|
||||||
- `left_token_ring` - the decommissioning node left the token ring, but we still need to wait until other
|
- `left_token_ring` - the decommissioning or removing node left the token ring, but we still need to wait until other
|
||||||
nodes observe it and stop sending writes to this node. Then, we tell the node to shut down and remove
|
nodes observe it and stop sending writes to this node. For decommission, we tell the node to shut down,
|
||||||
it from group 0. We also use this state to rollback a failed bootstrap or decommission.
|
then remove it from group 0. For removenode, the node is already down, so we skip the shutdown step.
|
||||||
|
We also use this state to rollback a failed bootstrap or decommission.
|
||||||
- `rollback_to_normal` - the decommission or removenode operation failed. Rollback the operation by
|
- `rollback_to_normal` - the decommission or removenode operation failed. Rollback the operation by
|
||||||
moving the node we tried to decommission/remove back to the normal state.
|
moving the node we tried to decommission/remove back to the normal state.
|
||||||
- `lock` - the topology stays in this state until externally changed (to null state), preventing topology
|
- `lock` - the topology stays in this state until externally changed (to null state), preventing topology
|
||||||
@@ -141,7 +144,9 @@ reads that started before this point exist in the system. Finally we remove the
|
|||||||
transitioning state.
|
transitioning state.
|
||||||
|
|
||||||
Decommission, removenode and replace work similarly, except they don't go through
|
Decommission, removenode and replace work similarly, except they don't go through
|
||||||
`commit_cdc_generation`.
|
`commit_cdc_generation`. Both decommission and removenode go through the
|
||||||
|
`left_token_ring` state to run a global barrier ensuring all nodes are aware
|
||||||
|
of the topology change before the operation completes.
|
||||||
|
|
||||||
The state machine may also go only through the `commit_cdc_generation` state
|
The state machine may also go only through the `commit_cdc_generation` state
|
||||||
after getting a request from the user to create a new CDC generation if the
|
after getting a request from the user to create a new CDC generation if the
|
||||||
|
|||||||
@@ -41,12 +41,12 @@ Unless the task was aborted, the worker will eventually reply that the task was
|
|||||||
it temporarily saves list of ids of finished tasks and removes those tasks from group0 state (pernamently marking them as finished) in 200ms intervals. (*)
|
it temporarily saves list of ids of finished tasks and removes those tasks from group0 state (pernamently marking them as finished) in 200ms intervals. (*)
|
||||||
This batching of removing finished tasks is done in order to reduce number of generated group0 operations.
|
This batching of removing finished tasks is done in order to reduce number of generated group0 operations.
|
||||||
|
|
||||||
On the other hand, view buildind tasks can can also be aborted due to 2 main reasons:
|
On the other hand, view building tasks can can also be aborted due to 2 main reasons:
|
||||||
- a keyspace/view was dropped
|
- a keyspace/view was dropped
|
||||||
- tablet operations (see [tablet operations section](#tablet-operations))
|
- tablet operations (see [tablet operations section](#tablet-operations))
|
||||||
In the first case we simply delete relevant view building tasks as they are no longer needed.
|
In the first case we simply delete relevant view building tasks as they are no longer needed.
|
||||||
But if a task needs to be aborted due to tablet operation, we're firstly setting the `aborted` flag to true. We need to do this because we need the task informations
|
But if a task needs to be aborted due to tablet operation, we're firstly setting the `aborted` flag to true. We need to do this because we need the task information
|
||||||
to created a new adjusted tasks (if the operation succeeded) or rollback them (if the operation failed).
|
to create new adjusted tasks (if the operation succeeded) or rollback them (if the operation failed).
|
||||||
Once a task is aborted by setting the flag, this cannot be revoked, so rolling back a task means creating its duplicate and removing the original task.
|
Once a task is aborted by setting the flag, this cannot be revoked, so rolling back a task means creating its duplicate and removing the original task.
|
||||||
|
|
||||||
(*) - Because there is a time gap between when the coordinator learns that a task is finished (from the RPC response) and when the task is marked as completed,
|
(*) - Because there is a time gap between when the coordinator learns that a task is finished (from the RPC response) and when the task is marked as completed,
|
||||||
|
|||||||
@@ -29,9 +29,6 @@ A CDC generation consists of:
|
|||||||
|
|
||||||
This is the mapping used to decide on which stream IDs to use when making writes, as explained in the :doc:`./cdc-streams` document. It is a global property of the cluster: it doesn't depend on the table you're making writes to.
|
This is the mapping used to decide on which stream IDs to use when making writes, as explained in the :doc:`./cdc-streams` document. It is a global property of the cluster: it doesn't depend on the table you're making writes to.
|
||||||
|
|
||||||
.. caution::
|
|
||||||
The tables mentioned in the following sections: ``system_distributed.cdc_generation_timestamps`` and ``system_distributed.cdc_streams_descriptions_v2`` have been introduced in ScyllaDB 4.4. It is highly recommended to upgrade to 4.4 for efficient CDC usage. The last section explains how to run the below examples in ScyllaDB 4.3.
|
|
||||||
|
|
||||||
When CDC generations change
|
When CDC generations change
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
|
|||||||
@@ -28,7 +28,8 @@ Incremental Repair is only supported for tables that use the tablets architectur
|
|||||||
Incremental Repair Modes
|
Incremental Repair Modes
|
||||||
------------------------
|
------------------------
|
||||||
|
|
||||||
While incremental repair is the default and recommended mode, you can control its behavior for a given repair operation using the ``incremental_mode`` parameter. This is useful for situations where you might need to force a full data validation.
|
Incremental is currently disabled by default. You can control its behavior for a given repair operation using the ``incremental_mode`` parameter.
|
||||||
|
This is useful for enabling incremental repair, or in situations where you might need to force a full data validation.
|
||||||
|
|
||||||
The available modes are:
|
The available modes are:
|
||||||
|
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ This document highlights ScyllaDB's key data modeling features.
|
|||||||
Workload Prioritization </features/workload-prioritization>
|
Workload Prioritization </features/workload-prioritization>
|
||||||
Backup and Restore </features/backup-and-restore>
|
Backup and Restore </features/backup-and-restore>
|
||||||
Incremental Repair </features/incremental-repair/>
|
Incremental Repair </features/incremental-repair/>
|
||||||
|
Vector Search </features/vector-search/>
|
||||||
|
|
||||||
.. panel-box::
|
.. panel-box::
|
||||||
:title: ScyllaDB Features
|
:title: ScyllaDB Features
|
||||||
@@ -43,3 +44,5 @@ This document highlights ScyllaDB's key data modeling features.
|
|||||||
* :doc:`Incremental Repair </features/incremental-repair/>` provides a much more
|
* :doc:`Incremental Repair </features/incremental-repair/>` provides a much more
|
||||||
efficient and lightweight approach to maintaining data consistency by
|
efficient and lightweight approach to maintaining data consistency by
|
||||||
repairing only the data that has changed since the last repair.
|
repairing only the data that has changed since the last repair.
|
||||||
|
* :doc:`Vector Search in ScyllaDB </features/vector-search/>` enables
|
||||||
|
similarity-based queries on vector embeddings.
|
||||||
|
|||||||
55
docs/features/vector-search.rst
Normal file
55
docs/features/vector-search.rst
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
=================================
|
||||||
|
Vector Search in ScyllaDB
|
||||||
|
=================================
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
This feature is currently available only in `ScyllaDB Cloud <https://cloud.docs.scylladb.com/>`_.
|
||||||
|
|
||||||
|
What Is Vector Search
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
Vector Search enables similarity-based queries over high-dimensional data,
|
||||||
|
such as text, images, audio, or user behavior. Instead of searching for exact
|
||||||
|
matches, it allows applications to find items that are semantically similar to
|
||||||
|
a given input.
|
||||||
|
|
||||||
|
To do this, Vector Search works on vector embeddings, which are numerical
|
||||||
|
representations of data that capture semantic meaning. This enables queries
|
||||||
|
such as:
|
||||||
|
|
||||||
|
* “Find documents similar to this paragraph”
|
||||||
|
* “Find products similar to what the user just viewed”
|
||||||
|
* “Find previous tickets related to this support request”
|
||||||
|
|
||||||
|
Rather than relying on exact values or keywords, Vector Search returns results
|
||||||
|
based on distance or similarity between vectors. This capability is
|
||||||
|
increasingly used in modern workloads such as AI-powered search, recommendation
|
||||||
|
systems, and retrieval-augmented generation (RAG).
|
||||||
|
|
||||||
|
Why Vector Search Matters
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
Many applications already rely on ScyllaDB for high throughput, low and
|
||||||
|
predictable latency, and large-scale data storage.
|
||||||
|
|
||||||
|
Vector Search complements these strengths by enabling new classes of workloads,
|
||||||
|
including:
|
||||||
|
|
||||||
|
* Semantic search over text or documents
|
||||||
|
* Recommendations based on user or item similarity
|
||||||
|
* AI and ML applications, including RAG pipelines
|
||||||
|
* Anomaly and pattern detection
|
||||||
|
|
||||||
|
With Vector Search, ScyllaDB can serve as the similarity search backend for
|
||||||
|
AI-driven applications.
|
||||||
|
|
||||||
|
Availability
|
||||||
|
--------------
|
||||||
|
|
||||||
|
Vector Search is currently available only in ScyllaDB Cloud, the fully managed
|
||||||
|
ScyllaDB service.
|
||||||
|
|
||||||
|
|
||||||
|
👉 For details on using Vector Search, refer to the
|
||||||
|
`ScyllaDB Cloud documentation <https://cloud.docs.scylladb.com/stable/vector-search/index.html>`_.
|
||||||
@@ -20,7 +20,10 @@ You can run your ScyllaDB workloads on AWS, GCE, and Azure using a ScyllaDB imag
|
|||||||
Amazon Web Services (AWS)
|
Amazon Web Services (AWS)
|
||||||
-----------------------------
|
-----------------------------
|
||||||
|
|
||||||
The recommended instance types are :ref:`i3en <system-requirements-i3en-instances>`, :ref:`i4i <system-requirements-i4i-instances>`, :ref:`i7i <system-requirements-i7i-instances>`, and :ref:`i7ie <system-requirements-i7ie-instances>`.
|
The recommended instance types are :ref:`i3en <system-requirements-i3en-instances>`,
|
||||||
|
:ref:`i4i <system-requirements-i4i-instances>`, :ref:`i7i <system-requirements-i7i-instances>`,
|
||||||
|
:ref:`i7ie <system-requirements-i7ie-instances>`, :ref:`i8g<system-requirements-i8g-instances>`,
|
||||||
|
and :ref:`i8ge <system-requirements-i8ge-instances>`.
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
|
||||||
@@ -195,6 +198,118 @@ All i7i instances have the following specs:
|
|||||||
|
|
||||||
See `Amazon EC2 I7i Instances <https://aws.amazon.com/ec2/instance-types/i7i/>`_ for details.
|
See `Amazon EC2 I7i Instances <https://aws.amazon.com/ec2/instance-types/i7i/>`_ for details.
|
||||||
|
|
||||||
|
|
||||||
|
.. _system-requirements-i8g-instances:
|
||||||
|
|
||||||
|
i8g instances
|
||||||
|
^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
The following i8g instances are supported.
|
||||||
|
|
||||||
|
.. list-table::
|
||||||
|
:widths: 30 20 20 30
|
||||||
|
:header-rows: 1
|
||||||
|
|
||||||
|
* - Model
|
||||||
|
- vCPU
|
||||||
|
- Mem (GiB)
|
||||||
|
- Storage (GB)
|
||||||
|
* - i8g.large
|
||||||
|
- 2
|
||||||
|
- 16
|
||||||
|
- 1 x 468 GB
|
||||||
|
* - i8g.xlarge
|
||||||
|
- 4
|
||||||
|
- 32
|
||||||
|
- 1 x 937 GB
|
||||||
|
* - i8g.2xlarge
|
||||||
|
- 8
|
||||||
|
- 64
|
||||||
|
- 1 x 1,875 GB
|
||||||
|
* - i8g.4xlarge
|
||||||
|
- 16
|
||||||
|
- 128
|
||||||
|
- 1 x 3,750 GB
|
||||||
|
* - i8g.8xlarge
|
||||||
|
- 32
|
||||||
|
- 256
|
||||||
|
- 2 x 3,750 GB
|
||||||
|
* - i8g.12xlarge
|
||||||
|
- 48
|
||||||
|
- 384
|
||||||
|
- 3 x 3,750 GB
|
||||||
|
* - i8g.16xlarge
|
||||||
|
- 64
|
||||||
|
- 512
|
||||||
|
- 4 x 3,750 GB
|
||||||
|
|
||||||
|
All i8g instances have the following specs:
|
||||||
|
|
||||||
|
* Powered by AWS Graviton4 processors
|
||||||
|
* 3rd generation AWS Nitro SSD storage
|
||||||
|
* DDR5-5600 memory for improved throughput
|
||||||
|
* Up to 100 Gbps of networking bandwidth and up to 60 Gbps of bandwidth to
|
||||||
|
Amazon Elastic Block Store (EBS)
|
||||||
|
* Instance sizes offer up to 45 TB of total local NVMe instance storage
|
||||||
|
|
||||||
|
See `Amazon EC2 I8g Instances <https://aws.amazon.com/ec2/instance-types/i8g/>`_ for details.
|
||||||
|
|
||||||
|
.. _system-requirements-i8ge-instances:
|
||||||
|
|
||||||
|
i8ge instances
|
||||||
|
^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
The following i8ge instances are supported.
|
||||||
|
|
||||||
|
.. list-table::
|
||||||
|
:widths: 30 20 20 30
|
||||||
|
:header-rows: 1
|
||||||
|
|
||||||
|
* - Model
|
||||||
|
- vCPU
|
||||||
|
- Mem (GiB)
|
||||||
|
- Storage (GB)
|
||||||
|
* - i8ge.large
|
||||||
|
- 2
|
||||||
|
- 16
|
||||||
|
- 1 x 1,250 GB
|
||||||
|
* - i8ge.xlarge
|
||||||
|
- 4
|
||||||
|
- 32
|
||||||
|
- 1 x 2,500 GB
|
||||||
|
* - i8ge.2xlarge
|
||||||
|
- 8
|
||||||
|
- 64
|
||||||
|
- 2 x 2,500 GB
|
||||||
|
* - i8ge.3xlarge
|
||||||
|
- 12
|
||||||
|
- 96
|
||||||
|
- 1 x 7,500 GB
|
||||||
|
* - i8ge.6xlarge
|
||||||
|
- 24
|
||||||
|
- 192
|
||||||
|
- 2 x 7,500 GB
|
||||||
|
* - i8ge.12xlarge
|
||||||
|
- 48
|
||||||
|
- 384
|
||||||
|
- 4 x 7,500 GB
|
||||||
|
* - i8ge.18xlarge
|
||||||
|
- 72
|
||||||
|
- 576
|
||||||
|
- 6 x 7,500 GB
|
||||||
|
|
||||||
|
All i8ge instances have the following specs:
|
||||||
|
|
||||||
|
* Powered by AWS Graviton4 processors
|
||||||
|
* 3rd generation AWS Nitro SSD storage
|
||||||
|
* DDR5-5600 memory for improved throughput
|
||||||
|
* Up to 300 Gbps of networking bandwidth and up to 60 Gbps of bandwidth to
|
||||||
|
Amazon Elastic Block Store (EBS)
|
||||||
|
* Instance sizes offer up to 120 TB of total local NVMe instance storage
|
||||||
|
|
||||||
|
See `Amazon EC2 I8g Instances <https://aws.amazon.com/ec2/instance-types/i8g/>`_ for details.
|
||||||
|
|
||||||
|
|
||||||
Im4gn and Is4gen instances
|
Im4gn and Is4gen instances
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
ScyllaDB supports Arm-based Im4gn and Is4gen instances. See `Amazon EC2 Im4gn and Is4gen instances <https://aws.amazon.com/ec2/instance-types/i4g/>`_ for specification details.
|
ScyllaDB supports Arm-based Im4gn and Is4gen instances. See `Amazon EC2 Im4gn and Is4gen instances <https://aws.amazon.com/ec2/instance-types/i4g/>`_ for specification details.
|
||||||
|
|||||||
@@ -25,8 +25,7 @@ Getting Started
|
|||||||
:id: "getting-started"
|
:id: "getting-started"
|
||||||
:class: my-panel
|
:class: my-panel
|
||||||
|
|
||||||
* `Install ScyllaDB (Binary Packages, Docker, or EC2) <https://www.scylladb.com/download/#core>`_ - Links to the ScyllaDB Download Center
|
* :doc:`Install ScyllaDB </getting-started/install-scylla/index/>`
|
||||||
|
|
||||||
* :doc:`Configure ScyllaDB </getting-started/system-configuration/>`
|
* :doc:`Configure ScyllaDB </getting-started/system-configuration/>`
|
||||||
* :doc:`Run ScyllaDB in a Shared Environment </getting-started/scylla-in-a-shared-environment>`
|
* :doc:`Run ScyllaDB in a Shared Environment </getting-started/scylla-in-a-shared-environment>`
|
||||||
* :doc:`Create a ScyllaDB Cluster - Single Data Center (DC) </operating-scylla/procedures/cluster-management/create-cluster/>`
|
* :doc:`Create a ScyllaDB Cluster - Single Data Center (DC) </operating-scylla/procedures/cluster-management/create-cluster/>`
|
||||||
|
|||||||
@@ -3,8 +3,7 @@
|
|||||||
ScyllaDB Housekeeping and how to disable it
|
ScyllaDB Housekeeping and how to disable it
|
||||||
============================================
|
============================================
|
||||||
|
|
||||||
It is always recommended to run the latest version of ScyllaDB.
|
It is always recommended to run the latest stable version of ScyllaDB.
|
||||||
The latest stable release version is always available from the `Download Center <https://www.scylladb.com/download/>`_.
|
|
||||||
|
|
||||||
When you install ScyllaDB, it installs by default two services: **scylla-housekeeping-restart** and **scylla-housekeeping-daily**. These services check for the latest ScyllaDB version and prompt the user if they are using a version that is older than what is publicly available.
|
When you install ScyllaDB, it installs by default two services: **scylla-housekeeping-restart** and **scylla-housekeeping-daily**. These services check for the latest ScyllaDB version and prompt the user if they are using a version that is older than what is publicly available.
|
||||||
Information about your ScyllaDB deployment, including the ScyllaDB version currently used, as well as unique user and server identifiers, are collected by a centralized service.
|
Information about your ScyllaDB deployment, including the ScyllaDB version currently used, as well as unique user and server identifiers, are collected by a centralized service.
|
||||||
|
|||||||
@@ -9,6 +9,8 @@ Running ``cluster repair`` on a **single node** synchronizes all data on all nod
|
|||||||
To synchronize all data in clusters that have both tablets-based and vnodes-based keyspaces, run :doc:`nodetool repair -pr </operating-scylla/nodetool-commands/repair/>` on **all**
|
To synchronize all data in clusters that have both tablets-based and vnodes-based keyspaces, run :doc:`nodetool repair -pr </operating-scylla/nodetool-commands/repair/>` on **all**
|
||||||
of the nodes in the cluster, and :doc:`nodetool cluster repair </operating-scylla/nodetool-commands/cluster/repair/>` on **any** of the nodes in the cluster.
|
of the nodes in the cluster, and :doc:`nodetool cluster repair </operating-scylla/nodetool-commands/cluster/repair/>` on **any** of the nodes in the cluster.
|
||||||
|
|
||||||
|
.. warning:: :term:`Colocated Tables <Colocated Table>` cannot be synchronized using cluster repair in this version.
|
||||||
|
|
||||||
To check if a keyspace enables tablets, use:
|
To check if a keyspace enables tablets, use:
|
||||||
|
|
||||||
.. code-block:: cql
|
.. code-block:: cql
|
||||||
@@ -53,13 +55,13 @@ ScyllaDB nodetool cluster repair command supports the following options:
|
|||||||
|
|
||||||
nodetool cluster repair --tablet-tokens 1,10474535988
|
nodetool cluster repair --tablet-tokens 1,10474535988
|
||||||
|
|
||||||
- ``--incremental-mode`` specifies the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to incremental.
|
- ``--incremental-mode`` specifies the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to 'disabled'.
|
||||||
|
|
||||||
For example:
|
For example:
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
nodetool cluster repair --incremental-mode regular
|
nodetool cluster repair --incremental-mode disabled
|
||||||
|
|
||||||
- ``keyspace`` executes a repair on a specific keyspace. The default is all keyspaces.
|
- ``keyspace`` executes a repair on a specific keyspace. The default is all keyspaces.
|
||||||
|
|
||||||
|
|||||||
@@ -42,21 +42,21 @@ For single list:
|
|||||||
|
|
||||||
.. code-block:: shell
|
.. code-block:: shell
|
||||||
|
|
||||||
task_id type kind scope state sequence_number keyspace table entity shard creation_time start_time end_time
|
task_id type kind scope state sequence_number keyspace table entity shard start_time end_time
|
||||||
5116ddb6-85b5-4c3e-94fb-72128f15d7b4 repair node keyspace done 3 abc 0 2025-01-16T16:12:08Z 2025-01-16T16:12:11Z 2025-01-16T16:12:13Z
|
5116ddb6-85b5-4c3e-94fb-72128f15d7b4 repair node keyspace done 3 abc 0 2025-01-16T16:12:11Z 2025-01-16T16:12:13Z
|
||||||
|
|
||||||
With repetition:
|
With repetition:
|
||||||
|
|
||||||
.. code-block:: shell
|
.. code-block:: shell
|
||||||
|
|
||||||
task_id type kind scope state sequence_number keyspace table entity shard creation_time start_time end_time
|
task_id type kind scope state sequence_number keyspace table entity shard start_time end_time
|
||||||
d8926ee7-0faf-47b7-bfeb-82477e0c7b33 repair node keyspace running 5 abc 0 2025-01-16T16:12:54Z 2025-01-16T16:12:57Z
|
d8926ee7-0faf-47b7-bfeb-82477e0c7b33 repair node keyspace running 5 abc 0 2025-01-16T16:12:57Z
|
||||||
1e028cb8-31a3-45ed-8728-af7a1ab586f6 repair node keyspace done 4 abc 0 2025-01-16T16:12:42Z 2025-01-16T16:12:45Z 2025-01-16T16:12:47Z
|
1e028cb8-31a3-45ed-8728-af7a1ab586f6 repair node keyspace done 4 abc 0 2025-01-16T16:12:45Z 2025-01-16T16:12:47Z
|
||||||
|
|
||||||
task_id type kind scope state sequence_number keyspace table entity shard creation_time start_time end_time
|
task_id type kind scope state sequence_number keyspace table entity shard start_time end_time
|
||||||
1e535f9b-97fa-4788-a956-8f3216a6ea8d repair node keyspace created 6 abc 0 2025-01-16T16:13:02Z
|
1e535f9b-97fa-4788-a956-8f3216a6ea8d repair node keyspace created 6 abc 0
|
||||||
d8926ee7-0faf-47b7-bfeb-82477e0c7b33 repair node keyspace running 5 abc 0 2025-01-16T16:12:54Z 2025-01-16T16:12:57Z
|
d8926ee7-0faf-47b7-bfeb-82477e0c7b33 repair node keyspace running 5 abc 0 2025-01-16T16:12:57Z
|
||||||
1e028cb8-31a3-45ed-8728-af7a1ab586f6 repair node keyspace done 4 abc 0 2025-01-16T16:12:42Z 2025-01-16T16:12:45Z 2025-01-16T16:12:47Z
|
1e028cb8-31a3-45ed-8728-af7a1ab586f6 repair node keyspace done 4 abc 0 2025-01-16T16:12:45Z 2025-01-16T16:12:47Z
|
||||||
|
|
||||||
See also
|
See also
|
||||||
--------
|
--------
|
||||||
|
|||||||
@@ -25,7 +25,6 @@ Example output
|
|||||||
scope: keyspace
|
scope: keyspace
|
||||||
state: running
|
state: running
|
||||||
is_abortable: true
|
is_abortable: true
|
||||||
creation_time: 2024-07-29T15:48:50Z
|
|
||||||
start_time: 2024-07-29T15:48:55Z
|
start_time: 2024-07-29T15:48:55Z
|
||||||
end_time:
|
end_time:
|
||||||
error:
|
error:
|
||||||
|
|||||||
@@ -26,22 +26,22 @@ For single task:
|
|||||||
|
|
||||||
.. code-block:: shell
|
.. code-block:: shell
|
||||||
|
|
||||||
id type kind scope state is_abortable creation_time start_time end_time error parent_id sequence_number shard keyspace table entity progress_units total completed children_ids
|
id type kind scope state is_abortable start_time end_time error parent_id sequence_number shard keyspace table entity progress_units total completed children_ids
|
||||||
be5559ea-bc5a-428c-b8ce-d14eac7a1765 repair node keyspace done true 2024-07-29T16:06:43Z 2024-07-29T16:06:46Z 2024-07-29T16:06:46Z none 1 0 abc ranges 4 4 [{task_id: 542e38cb-9ad4-40aa-9010-de2630004e55, node: 127.0.0.1 }, {task_id: 8974ebcc-1e87-4040-88fe-f2438261f7fb, node: 127.0.0.1 }]
|
be5559ea-bc5a-428c-b8ce-d14eac7a1765 repair node keyspace done true 2024-07-29T16:06:46Z 2024-07-29T16:06:46Z none 1 0 abc ranges 4 4 [{task_id: 542e38cb-9ad4-40aa-9010-de2630004e55, node: 127.0.0.1 }, {task_id: 8974ebcc-1e87-4040-88fe-f2438261f7fb, node: 127.0.0.1 }]
|
||||||
542e38cb-9ad4-40aa-9010-de2630004e55 repair node shard done false 2024-07-29T16:06:43Z 2024-07-29T16:06:46Z 2024-07-29T16:06:46Z be5559ea-bc5a-428c-b8ce-d14eac7a1765 1 0 abc ranges 2 2 []
|
542e38cb-9ad4-40aa-9010-de2630004e55 repair node shard done false 2024-07-29T16:06:46Z 2024-07-29T16:06:46Z be5559ea-bc5a-428c-b8ce-d14eac7a1765 1 0 abc ranges 2 2 []
|
||||||
8974ebcc-1e87-4040-88fe-f2438261f7fb repair node shard done false 2024-07-29T16:06:43Z 2024-07-29T16:06:46Z 2024-07-29T16:06:46Z be5559ea-bc5a-428c-b8ce-d14eac7a1765 1 1 abc ranges 2 2 []
|
8974ebcc-1e87-4040-88fe-f2438261f7fb repair node shard done false 2024-07-29T16:06:46Z 2024-07-29T16:06:46Z be5559ea-bc5a-428c-b8ce-d14eac7a1765 1 1 abc ranges 2 2 []
|
||||||
|
|
||||||
For all tasks:
|
For all tasks:
|
||||||
|
|
||||||
.. code-block:: shell
|
.. code-block:: shell
|
||||||
|
|
||||||
id type kind scope state is_abortable creation_time start_time end_time error parent_id sequence_number shard keyspace table entity progress_units total completed children_ids
|
id type kind scope state is_abortable start_time end_time error parent_id sequence_number shard keyspace table entity progress_units total completed children_ids
|
||||||
16eafb1e-8b2e-48e6-bd7a-432ca3d8b9fc repair node keyspace done true 2024-07-29T16:34:43Z 2024-07-29T16:34:46Z 2024-07-29T16:34:46Z none 1 0 abc ranges 4 4 [{task_id: e0aa1aa4-58ca-4bfb-b3e6-74e5f3a0f6ee, node: 127.0.0.1 }, {task_id: 49eb5797-b67e-46b0-9365-4460f7cf988a, node: 127.0.0.1 }]
|
16eafb1e-8b2e-48e6-bd7a-432ca3d8b9fc repair node keyspace done true 2024-07-29T16:34:46Z 2024-07-29T16:34:46Z none 1 0 abc ranges 4 4 [{task_id: e0aa1aa4-58ca-4bfb-b3e6-74e5f3a0f6ee, node: 127.0.0.1 }, {task_id: 49eb5797-b67e-46b0-9365-4460f7cf988a, node: 127.0.0.1 }]
|
||||||
e0aa1aa4-58ca-4bfb-b3e6-74e5f3a0f6ee repair node shard done false 2024-07-29T16:34:43Z 2024-07-29T16:34:46Z 2024-07-29T16:34:46Z 16eafb1e-8b2e-48e6-bd7a-432ca3d8b9fc 1 0 abc ranges 2 2 []
|
e0aa1aa4-58ca-4bfb-b3e6-74e5f3a0f6ee repair node shard done false 2024-07-29T16:34:46Z 2024-07-29T16:34:46Z 16eafb1e-8b2e-48e6-bd7a-432ca3d8b9fc 1 0 abc ranges 2 2 []
|
||||||
49eb5797-b67e-46b0-9365-4460f7cf988a repair node shard done false 2024-07-29T16:34:43Z 2024-07-29T16:34:46Z 2024-07-29T16:34:46Z 16eafb1e-8b2e-48e6-bd7a-432ca3d8b9fc 1 1 abc ranges 2 2 []
|
49eb5797-b67e-46b0-9365-4460f7cf988a repair node shard done false 2024-07-29T16:34:46Z 2024-07-29T16:34:46Z 16eafb1e-8b2e-48e6-bd7a-432ca3d8b9fc 1 1 abc ranges 2 2 []
|
||||||
82d7b2a4-146e-4a72-ba93-c66d5b4e9867 offstrategy compaction node keyspace done true 2024-07-29T16:34:13Z 2024-07-29T16:34:16Z 2024-07-29T16:34:16Z none 954 0 abc 1 1 [{task_id: 9818277b-238d-4298-a56b-c0d2153bf140, node: 127.0.0.1 }, {task_id: c1eb0701-ad7a-45ff-956f-7b8d671fc5db, node: 127.0.0.1 }
|
82d7b2a4-146e-4a72-ba93-c66d5b4e9867 offstrategy compaction node keyspace done true 2024-07-29T16:34:16Z 2024-07-29T16:34:16Z none 954 0 abc 1 1 [{task_id: 9818277b-238d-4298-a56b-c0d2153bf140, node: 127.0.0.1 }, {task_id: c1eb0701-ad7a-45ff-956f-7b8d671fc5db, node: 127.0.0.1 }
|
||||||
9818277b-238d-4298-a56b-c0d2153bf140 offstrategy compaction node shard done false 2024-07-29T16:34:13Z 2024-07-29T16:34:16Z 2024-07-29T16:34:16Z 82d7b2a4-146e-4a72-ba93-c66d5b4e9867 954 0 abc 1 1 []
|
9818277b-238d-4298-a56b-c0d2153bf140 offstrategy compaction node shard done false 2024-07-29T16:34:16Z 2024-07-29T16:34:16Z 82d7b2a4-146e-4a72-ba93-c66d5b4e9867 954 0 abc 1 1 []
|
||||||
c1eb0701-ad7a-45ff-956f-7b8d671fc5db offstrategy compaction node shard done false 2024-07-29T16:34:13Z 2024-07-29T16:34:16Z 2024-07-29T16:34:16Z 82d7b2a4-146e-4a72-ba93-c66d5b4e9867 954 1 abc 1 1 []
|
c1eb0701-ad7a-45ff-956f-7b8d671fc5db offstrategy compaction node shard done false 2024-07-29T16:34:16Z 2024-07-29T16:34:16Z 82d7b2a4-146e-4a72-ba93-c66d5b4e9867 954 1 abc 1 1 []
|
||||||
|
|
||||||
See also
|
See also
|
||||||
--------
|
--------
|
||||||
|
|||||||
@@ -64,13 +64,12 @@ ADMIN Logs service level operations: create, alter, drop, attach, detach, l
|
|||||||
auditing.
|
auditing.
|
||||||
========= =========================================================================================
|
========= =========================================================================================
|
||||||
|
|
||||||
Note that audit for every DML or QUERY might impact performance and consume a lot of storage.
|
Note that enabling audit may negatively impact performance and audit-to-table may consume extra storage. That's especially true when auditing DML and QUERY categories, which generate a high volume of audit messages.
|
||||||
|
|
||||||
Configuring Audit Storage
|
Configuring Audit Storage
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
Auditing messages can be sent to :ref:`Syslog <auditing-syslog-storage>` or stored in a Scylla :ref:`table <auditing-table-storage>`.
|
Auditing messages can be sent to :ref:`Syslog <auditing-syslog-storage>` or stored in a Scylla :ref:`table <auditing-table-storage>` or both.
|
||||||
Currently, auditing messages can only be saved to one location at a time. You cannot log into both a table and the Syslog.
|
|
||||||
|
|
||||||
.. _auditing-syslog-storage:
|
.. _auditing-syslog-storage:
|
||||||
|
|
||||||
@@ -193,6 +192,23 @@ For example:
|
|||||||
2018-03-18 00:00:00+0000 | 10.143.2.108 | 3429b1a5-2a94-11e8-8f4e-000000000001 | DDL | ONE | False | nba | DROP TABLE nba.team_roster ; | 127.0.0.1 | team_roster | Scylla |
|
2018-03-18 00:00:00+0000 | 10.143.2.108 | 3429b1a5-2a94-11e8-8f4e-000000000001 | DDL | ONE | False | nba | DROP TABLE nba.team_roster ; | 127.0.0.1 | team_roster | Scylla |
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
.. _auditing-table-and-syslog-storage:
|
||||||
|
|
||||||
|
Storing Audit Messages in a Table and Syslog Simultaneously
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
**Procedure**
|
||||||
|
|
||||||
|
#. Follow both procedures from above, and set the ``audit`` parameter in the ``scylla.yaml`` file to both ``syslog`` and ``table``. You need to restart scylla only once.
|
||||||
|
|
||||||
|
To have both syslog and table you need to specify both backends separated by a comma:
|
||||||
|
|
||||||
|
.. code-block:: shell
|
||||||
|
|
||||||
|
audit: "syslog,table"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Handling Audit Failures
|
Handling Audit Failures
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
|
|||||||
101
docs/poetry.lock
generated
101
docs/poetry.lock
generated
@@ -2,36 +2,35 @@
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "alabaster"
|
name = "alabaster"
|
||||||
version = "0.7.16"
|
version = "1.0.0"
|
||||||
description = "A light, configurable Sphinx theme"
|
description = "A light, configurable Sphinx theme"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.10"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
files = [
|
files = [
|
||||||
{file = "alabaster-0.7.16-py3-none-any.whl", hash = "sha256:b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92"},
|
{file = "alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b"},
|
||||||
{file = "alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65"},
|
{file = "alabaster-1.0.0.tar.gz", hash = "sha256:c00dca57bca26fa62a6d7d0a9fcce65f3e026e9bfe33e9c538fd3fbb2144fd9e"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "anyio"
|
name = "anyio"
|
||||||
version = "4.11.0"
|
version = "4.12.0"
|
||||||
description = "High-level concurrency and networking framework on top of asyncio or Trio"
|
description = "High-level concurrency and networking framework on top of asyncio or Trio"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
files = [
|
files = [
|
||||||
{file = "anyio-4.11.0-py3-none-any.whl", hash = "sha256:0287e96f4d26d4149305414d4e3bc32f0dcd0862365a4bddea19d7a1ec38c4fc"},
|
{file = "anyio-4.12.0-py3-none-any.whl", hash = "sha256:dad2376a628f98eeca4881fc56cd06affd18f659b17a747d3ff0307ced94b1bb"},
|
||||||
{file = "anyio-4.11.0.tar.gz", hash = "sha256:82a8d0b81e318cc5ce71a5f1f8b5c4e63619620b63141ef8c995fa0db95a57c4"},
|
{file = "anyio-4.12.0.tar.gz", hash = "sha256:73c693b567b0c55130c104d0b43a9baf3aa6a31fc6110116509f27bf75e21ec0"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
|
exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
|
||||||
idna = ">=2.8"
|
idna = ">=2.8"
|
||||||
sniffio = ">=1.1"
|
|
||||||
typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""}
|
typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""}
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
trio = ["trio (>=0.31.0)"]
|
trio = ["trio (>=0.31.0) ; python_version < \"3.10\"", "trio (>=0.32.0) ; python_version >= \"3.10\""]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "babel"
|
name = "babel"
|
||||||
@@ -50,14 +49,14 @@ dev = ["backports.zoneinfo ; python_version < \"3.9\"", "freezegun (>=1.0,<2.0)"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "beartype"
|
name = "beartype"
|
||||||
version = "0.22.6"
|
version = "0.22.8"
|
||||||
description = "Unbearably fast near-real-time pure-Python runtime-static type-checker."
|
description = "Unbearably fast near-real-time pure-Python runtime-static type-checker."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.10"
|
python-versions = ">=3.10"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
files = [
|
files = [
|
||||||
{file = "beartype-0.22.6-py3-none-any.whl", hash = "sha256:0584bc46a2ea2a871509679278cda992eadde676c01356ab0ac77421f3c9a093"},
|
{file = "beartype-0.22.8-py3-none-any.whl", hash = "sha256:b832882d04e41a4097bab9f63e6992bc6de58c414ee84cba9b45b67314f5ab2e"},
|
||||||
{file = "beartype-0.22.6.tar.gz", hash = "sha256:97fbda69c20b48c5780ac2ca60ce3c1bb9af29b3a1a0216898ffabdd523e48f4"},
|
{file = "beartype-0.22.8.tar.gz", hash = "sha256:b19b21c9359722ee3f7cc433f063b3e13997b27ae8226551ea5062e621f61165"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
@@ -70,18 +69,18 @@ test-tox-coverage = ["coverage (>=5.5)"]
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "beautifulsoup4"
|
name = "beautifulsoup4"
|
||||||
version = "4.14.2"
|
version = "4.14.3"
|
||||||
description = "Screen-scraping library"
|
description = "Screen-scraping library"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.7.0"
|
python-versions = ">=3.7.0"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
files = [
|
files = [
|
||||||
{file = "beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515"},
|
{file = "beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb"},
|
||||||
{file = "beautifulsoup4-4.14.2.tar.gz", hash = "sha256:2a98ab9f944a11acee9cc848508ec28d9228abfd522ef0fad6a02a72e0ded69e"},
|
{file = "beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
soupsieve = ">1.2"
|
soupsieve = ">=1.6.1"
|
||||||
typing-extensions = ">=4.0.0"
|
typing-extensions = ">=4.0.0"
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
@@ -802,18 +801,6 @@ files = [
|
|||||||
{file = "shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de"},
|
{file = "shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "sniffio"
|
|
||||||
version = "1.3.1"
|
|
||||||
description = "Sniff out which async library your code is running under"
|
|
||||||
optional = false
|
|
||||||
python-versions = ">=3.7"
|
|
||||||
groups = ["main"]
|
|
||||||
files = [
|
|
||||||
{file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
|
|
||||||
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "snowballstemmer"
|
name = "snowballstemmer"
|
||||||
version = "3.0.1"
|
version = "3.0.1"
|
||||||
@@ -840,18 +827,18 @@ files = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sphinx"
|
name = "sphinx"
|
||||||
version = "7.4.7"
|
version = "8.1.3"
|
||||||
description = "Python documentation generator"
|
description = "Python documentation generator"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.10"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
files = [
|
files = [
|
||||||
{file = "sphinx-7.4.7-py3-none-any.whl", hash = "sha256:c2419e2135d11f1951cd994d6eb18a1835bd8fdd8429f9ca375dc1f3281bd239"},
|
{file = "sphinx-8.1.3-py3-none-any.whl", hash = "sha256:09719015511837b76bf6e03e42eb7595ac8c2e41eeb9c29c5b755c6b677992a2"},
|
||||||
{file = "sphinx-7.4.7.tar.gz", hash = "sha256:242f92a7ea7e6c5b406fdc2615413890ba9f699114a9c09192d7dfead2ee9cfe"},
|
{file = "sphinx-8.1.3.tar.gz", hash = "sha256:43c1911eecb0d3e161ad78611bc905d1ad0e523e4ddc202a58a821773dc4c927"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
alabaster = ">=0.7.14,<0.8.0"
|
alabaster = ">=0.7.14"
|
||||||
babel = ">=2.13"
|
babel = ">=2.13"
|
||||||
colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\""}
|
colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\""}
|
||||||
docutils = ">=0.20,<0.22"
|
docutils = ">=0.20,<0.22"
|
||||||
@@ -861,17 +848,17 @@ packaging = ">=23.0"
|
|||||||
Pygments = ">=2.17"
|
Pygments = ">=2.17"
|
||||||
requests = ">=2.30.0"
|
requests = ">=2.30.0"
|
||||||
snowballstemmer = ">=2.2"
|
snowballstemmer = ">=2.2"
|
||||||
sphinxcontrib-applehelp = "*"
|
sphinxcontrib-applehelp = ">=1.0.7"
|
||||||
sphinxcontrib-devhelp = "*"
|
sphinxcontrib-devhelp = ">=1.0.6"
|
||||||
sphinxcontrib-htmlhelp = ">=2.0.0"
|
sphinxcontrib-htmlhelp = ">=2.0.6"
|
||||||
sphinxcontrib-jsmath = "*"
|
sphinxcontrib-jsmath = ">=1.0.1"
|
||||||
sphinxcontrib-qthelp = "*"
|
sphinxcontrib-qthelp = ">=1.0.6"
|
||||||
sphinxcontrib-serializinghtml = ">=1.1.9"
|
sphinxcontrib-serializinghtml = ">=1.1.9"
|
||||||
tomli = {version = ">=2", markers = "python_version < \"3.11\""}
|
tomli = {version = ">=2", markers = "python_version < \"3.11\""}
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
docs = ["sphinxcontrib-websupport"]
|
docs = ["sphinxcontrib-websupport"]
|
||||||
lint = ["flake8 (>=6.0)", "importlib-metadata (>=6.0)", "mypy (==1.10.1)", "pytest (>=6.0)", "ruff (==0.5.2)", "sphinx-lint (>=0.9)", "tomli (>=2)", "types-docutils (==0.21.0.20240711)", "types-requests (>=2.30.0)"]
|
lint = ["flake8 (>=6.0)", "mypy (==1.11.1)", "pyright (==1.1.384)", "pytest (>=6.0)", "ruff (==0.6.9)", "sphinx-lint (>=0.9)", "tomli (>=2)", "types-Pillow (==10.2.0.20240822)", "types-Pygments (==2.18.0.20240506)", "types-colorama (==0.4.15.20240311)", "types-defusedxml (==0.7.0.20240218)", "types-docutils (==0.21.0.20241005)", "types-requests (==2.32.0.20240914)", "types-urllib3 (==1.26.25.14)"]
|
||||||
test = ["cython (>=3.0)", "defusedxml (>=0.7.1)", "pytest (>=8.0)", "setuptools (>=70.0)", "typing_extensions (>=4.9)"]
|
test = ["cython (>=3.0)", "defusedxml (>=0.7.1)", "pytest (>=8.0)", "setuptools (>=70.0)", "typing_extensions (>=4.9)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1001,13 +988,14 @@ test = ["tox"]
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sphinx-scylladb-markdown"
|
name = "sphinx-scylladb-markdown"
|
||||||
version = "0.1.3"
|
version = "0.1.4"
|
||||||
description = "Sphinx extension for ScyllaDB documentation with enhanced Markdown support through MystParser and recommonmark."
|
description = "Sphinx extension for ScyllaDB documentation with enhanced Markdown support through MystParser and recommonmark."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = "*"
|
python-versions = "*"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
files = [
|
files = [
|
||||||
{file = "sphinx_scylladb_markdown-0.1.3-py3-none-any.whl", hash = "sha256:f20160b4aadf4c8cf95637f0a544121954b792914ab6ec05b67cae75e20a5566"},
|
{file = "sphinx_scylladb_markdown-0.1.4-py3-none-any.whl", hash = "sha256:598753e01cf159d4698eb1a707958828446e21749038d3d42c5b9c7e86eda6e4"},
|
||||||
|
{file = "sphinx_scylladb_markdown-0.1.4.tar.gz", hash = "sha256:9db3ae0dcf7c3519262da65e48c7f9e4db0ad1ce9c5f874864ea218f4cbc4c68"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
@@ -1018,14 +1006,14 @@ sphinx-markdown-tables = "0.0.17"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sphinx-scylladb-theme"
|
name = "sphinx-scylladb-theme"
|
||||||
version = "1.8.9"
|
version = "1.8.10"
|
||||||
description = "A Sphinx Theme for ScyllaDB documentation projects"
|
description = "A Sphinx Theme for ScyllaDB documentation projects"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = "<4.0,>=3.10"
|
python-versions = "<4.0,>=3.10"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
files = [
|
files = [
|
||||||
{file = "sphinx_scylladb_theme-1.8.9-py3-none-any.whl", hash = "sha256:f8649a7753a29494fd2b417d1cb855035dddb9ebd498ea033fd73f5f9338271e"},
|
{file = "sphinx_scylladb_theme-1.8.10-py3-none-any.whl", hash = "sha256:8b930f33bec7308ccaa92698ebb5ad85059bcbf93a463f92917aeaf473fce632"},
|
||||||
{file = "sphinx_scylladb_theme-1.8.9.tar.gz", hash = "sha256:ab7cda4c10a0d067c5c3a45f7b1f68cb8ebefe135a0be0738bfa282a344769b6"},
|
{file = "sphinx_scylladb_theme-1.8.10.tar.gz", hash = "sha256:8a78a9b692d9a946be2c4a64aa472fd82204cc8ea0b1ee7f60de6db35b356326"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
@@ -1059,24 +1047,25 @@ dev = ["build", "flake8", "pre-commit", "pytest", "sphinx", "sphinx-last-updated
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sphinx-substitution-extensions"
|
name = "sphinx-substitution-extensions"
|
||||||
version = "2025.1.2"
|
version = "2025.11.17"
|
||||||
description = "Extensions for Sphinx which allow for substitutions."
|
description = "Extensions for Sphinx which allow for substitutions."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.10"
|
python-versions = ">=3.10"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
files = [
|
files = [
|
||||||
{file = "sphinx_substitution_extensions-2025.1.2-py2.py3-none-any.whl", hash = "sha256:ff14f40e4393bd7434a196badb8d47983355d9755af884b902e3023fb456b958"},
|
{file = "sphinx_substitution_extensions-2025.11.17-py2.py3-none-any.whl", hash = "sha256:ac18455bdc8324b337b0fe7498c1c0d0b1cb65c74d131459be4dea9edb6abbef"},
|
||||||
{file = "sphinx_substitution_extensions-2025.1.2.tar.gz", hash = "sha256:53b8d394d5098a09aef36bc687fa310aeb28466319d2c750e996e46400fb2474"},
|
{file = "sphinx_substitution_extensions-2025.11.17.tar.gz", hash = "sha256:aae17f8db9efc3d454a304373ae3df763f8739e05e0b98d5381db46f6d250b27"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
beartype = ">=0.18.5"
|
beartype = ">=0.18.5"
|
||||||
docutils = ">=0.19"
|
docutils = ">=0.19"
|
||||||
sphinx = ">=7.3.5"
|
myst-parser = ">=4.0.0"
|
||||||
|
sphinx = ">=8.1.0"
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
dev = ["actionlint-py (==1.7.5.21)", "check-manifest (==0.50)", "deptry (==0.21.2)", "doc8 (==1.1.2)", "doccmd (==2024.12.26)", "docformatter (==1.7.5)", "interrogate (==1.7.0)", "mypy-strict-kwargs (==2024.12.25)", "mypy[faster-cache] (==1.14.1)", "myst-parser (==4.0.0)", "pre-commit (==4.0.1)", "pyenchant (==3.3.0rc1)", "pylint (==3.3.3)", "pyproject-fmt (==2.5.0)", "pyright (==1.1.391)", "pyroma (==4.2)", "pytest (==8.3.4)", "pytest-cov (==6.0.0)", "ruff (==0.8.4)", "shellcheck-py (==0.10.0.1)", "shfmt-py (==3.7.0.1)", "sphinx-toolbox (==3.8.1)", "sphinx[test] (==8.1.3)", "types-docutils (==0.21.0.20241128)", "vulture (==2.14)", "yamlfix (==1.17.0)"]
|
dev = ["actionlint-py (==1.7.8.24)", "check-manifest (==0.51)", "deptry (==0.24.0)", "doc8 (==2.0.0)", "doccmd (==2025.11.8.1)", "docformatter (==1.7.7)", "interrogate (==1.7.0)", "mypy-strict-kwargs (==2025.4.3)", "mypy[faster-cache] (==1.18.2)", "pre-commit (==4.4.0)", "pylint[spelling] (==4.0.3)", "pyproject-fmt (==2.11.1)", "pyright (==1.1.407)", "pyroma (==5.0)", "pytest (==9.0.1)", "pytest-cov (==7.0.0)", "ruff (==0.14.5)", "shellcheck-py (==0.11.0.1)", "shfmt-py (==3.12.0.2)", "sphinx-lint (==1.0.1)", "sphinx-toolbox (==4.0.0)", "types-docutils (==0.22.2.20251006)", "vulture (==2.14)", "yamlfix (==1.19.0)"]
|
||||||
release = ["check-wheel-contents (==0.6.1)"]
|
release = ["check-wheel-contents (==0.6.3)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sphinx-tabs"
|
name = "sphinx-tabs"
|
||||||
@@ -1363,21 +1352,21 @@ files = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "urllib3"
|
name = "urllib3"
|
||||||
version = "2.5.0"
|
version = "2.6.2"
|
||||||
description = "HTTP library with thread-safe connection pooling, file post, and more."
|
description = "HTTP library with thread-safe connection pooling, file post, and more."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
files = [
|
files = [
|
||||||
{file = "urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc"},
|
{file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
|
||||||
{file = "urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760"},
|
{file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""]
|
brotli = ["brotli (>=1.2.0) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=1.2.0.0) ; platform_python_implementation != \"CPython\""]
|
||||||
h2 = ["h2 (>=4,<5)"]
|
h2 = ["h2 (>=4,<5)"]
|
||||||
socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
|
socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
|
||||||
zstd = ["zstandard (>=0.18.0)"]
|
zstd = ["backports-zstd (>=1.0.0) ; python_version < \"3.14\""]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "uvicorn"
|
name = "uvicorn"
|
||||||
@@ -1603,4 +1592,4 @@ files = [
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.1"
|
lock-version = "2.1"
|
||||||
python-versions = "^3.10"
|
python-versions = "^3.10"
|
||||||
content-hash = "74912627a3f424290ed7889451c0bdb1a862ab85b1d07c85f4f3b8c34f32a020"
|
content-hash = "9a17caa38b3c88f3fe3d1a60fdb73a96aa12ff1e30ecb00e2f9249e7ba9f859c"
|
||||||
|
|||||||
@@ -9,13 +9,13 @@ package-mode = false
|
|||||||
python = "^3.10"
|
python = "^3.10"
|
||||||
pygments = "^2.18.0"
|
pygments = "^2.18.0"
|
||||||
redirects_cli ="^0.1.3"
|
redirects_cli ="^0.1.3"
|
||||||
sphinx-scylladb-theme = "^1.8.9"
|
sphinx-scylladb-theme = "^1.8.10"
|
||||||
sphinx-sitemap = "^2.6.0"
|
sphinx-sitemap = "^2.6.0"
|
||||||
sphinx-autobuild = "^2024.4.19"
|
sphinx-autobuild = "^2024.4.19"
|
||||||
Sphinx = "^7.3.7"
|
Sphinx = "^8.0.0"
|
||||||
sphinx-multiversion-scylla = "^0.3.4"
|
sphinx-multiversion-scylla = "^0.3.4"
|
||||||
sphinxcontrib-datatemplates = "^0.9.2"
|
sphinxcontrib-datatemplates = "^0.9.2"
|
||||||
sphinx-scylladb-markdown = "^0.1.2"
|
sphinx-scylladb-markdown = "^0.1.4"
|
||||||
sphinx_collapse ="^0.1.3"
|
sphinx_collapse ="^0.1.3"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
|||||||
@@ -202,3 +202,7 @@ Glossary
|
|||||||
The name comes from two basic operations, multiply (MU) and rotate (R), used in its inner loop.
|
The name comes from two basic operations, multiply (MU) and rotate (R), used in its inner loop.
|
||||||
The MurmurHash3 version used in ScyllaDB originated from `Apache Cassandra <https://commons.apache.org/proper/commons-codec/apidocs/org/apache/commons/codec/digest/MurmurHash3.html>`_, and is **not** identical to the `official MurmurHash3 calculation <https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/utils/MurmurHash.java#L31-L33>`_. More `here <https://github.com/russss/murmur3-cassandra>`_.
|
The MurmurHash3 version used in ScyllaDB originated from `Apache Cassandra <https://commons.apache.org/proper/commons-codec/apidocs/org/apache/commons/codec/digest/MurmurHash3.html>`_, and is **not** identical to the `official MurmurHash3 calculation <https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/utils/MurmurHash.java#L31-L33>`_. More `here <https://github.com/russss/murmur3-cassandra>`_.
|
||||||
|
|
||||||
|
Colocated Table
|
||||||
|
An internal table of a special type in a :doc:`tablets </architecture/tablets>` enabled keyspace that is colocated with another base table, meaning it always has the same tablet replicas as the base table.
|
||||||
|
Current types of colocated tables include CDC log tables, local indexes, and materialized views that have the same partition key as their base table.
|
||||||
|
|
||||||
|
|||||||
@@ -816,7 +816,6 @@ public:
|
|||||||
future<data_sink> wrap_sink(const sstables::sstable& sst, sstables::component_type type, data_sink sink) override {
|
future<data_sink> wrap_sink(const sstables::sstable& sst, sstables::component_type type, data_sink sink) override {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case sstables::component_type::Scylla:
|
case sstables::component_type::Scylla:
|
||||||
case sstables::component_type::TemporaryScylla:
|
|
||||||
case sstables::component_type::TemporaryTOC:
|
case sstables::component_type::TemporaryTOC:
|
||||||
case sstables::component_type::TOC:
|
case sstables::component_type::TOC:
|
||||||
co_return sink;
|
co_return sink;
|
||||||
@@ -845,7 +844,6 @@ public:
|
|||||||
sstables::component_type type,
|
sstables::component_type type,
|
||||||
data_source src) override {
|
data_source src) override {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case sstables::component_type::TemporaryScylla:
|
|
||||||
case sstables::component_type::Scylla:
|
case sstables::component_type::Scylla:
|
||||||
case sstables::component_type::TemporaryTOC:
|
case sstables::component_type::TemporaryTOC:
|
||||||
case sstables::component_type::TOC:
|
case sstables::component_type::TOC:
|
||||||
|
|||||||
@@ -176,6 +176,8 @@ public:
|
|||||||
gms::feature rack_list_rf { *this, "RACK_LIST_RF"sv };
|
gms::feature rack_list_rf { *this, "RACK_LIST_RF"sv };
|
||||||
gms::feature driver_service_level { *this, "DRIVER_SERVICE_LEVEL"sv };
|
gms::feature driver_service_level { *this, "DRIVER_SERVICE_LEVEL"sv };
|
||||||
gms::feature strongly_consistent_tables { *this, "STRONGLY_CONSISTENT_TABLES"sv };
|
gms::feature strongly_consistent_tables { *this, "STRONGLY_CONSISTENT_TABLES"sv };
|
||||||
|
gms::feature client_routes { *this, "CLIENT_ROUTES"sv };
|
||||||
|
gms::feature removenode_with_left_token_ring { *this, "REMOVENODE_WITH_LEFT_TOKEN_RING"sv };
|
||||||
public:
|
public:
|
||||||
|
|
||||||
const std::unordered_map<sstring, std::reference_wrapper<feature>>& registered_features() const;
|
const std::unordered_map<sstring, std::reference_wrapper<feature>>& registered_features() const;
|
||||||
|
|||||||
@@ -38,6 +38,7 @@ debian_base_packages=(
|
|||||||
python3-aiohttp
|
python3-aiohttp
|
||||||
python3-pyparsing
|
python3-pyparsing
|
||||||
python3-colorama
|
python3-colorama
|
||||||
|
python3-dev
|
||||||
python3-tabulate
|
python3-tabulate
|
||||||
python3-pytest
|
python3-pytest
|
||||||
python3-pytest-asyncio
|
python3-pytest-asyncio
|
||||||
@@ -65,6 +66,7 @@ debian_base_packages=(
|
|||||||
git-lfs
|
git-lfs
|
||||||
e2fsprogs
|
e2fsprogs
|
||||||
fuse3
|
fuse3
|
||||||
|
libev-dev # for python driver
|
||||||
)
|
)
|
||||||
|
|
||||||
fedora_packages=(
|
fedora_packages=(
|
||||||
@@ -90,6 +92,7 @@ fedora_packages=(
|
|||||||
patchelf
|
patchelf
|
||||||
python3
|
python3
|
||||||
python3-aiohttp
|
python3-aiohttp
|
||||||
|
python3-devel
|
||||||
python3-pip
|
python3-pip
|
||||||
python3-file-magic
|
python3-file-magic
|
||||||
python3-colorama
|
python3-colorama
|
||||||
@@ -154,6 +157,8 @@ fedora_packages=(
|
|||||||
https://github.com/scylladb/cassandra-stress/releases/download/v3.18.1/cassandra-stress-java21-3.18.1-1.noarch.rpm
|
https://github.com/scylladb/cassandra-stress/releases/download/v3.18.1/cassandra-stress-java21-3.18.1-1.noarch.rpm
|
||||||
elfutils
|
elfutils
|
||||||
jq
|
jq
|
||||||
|
|
||||||
|
libev-devel # for python driver
|
||||||
)
|
)
|
||||||
|
|
||||||
fedora_python3_packages=(
|
fedora_python3_packages=(
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user